2019-11-08 13:22:13 +01:00
/* SPDX-License-Identifier: GPL-2.0 */
/ *
* Copyright ( C ) 2 0 1 8 G o o g l e , I n c .
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
/ *
* Design n o t e s :
*
* 1 6 registers w o u l d b e n e e d e d t o h o l d t h e s t a t e m a t r i x , b u t o n l y 1 4 a r e
* available b e c a u s e ' s p ' a n d ' p c ' c a n n o t b e u s e d . S o w e s p i l l t h e e l e m e n t s
* ( x8 , x9 ) t o t h e s t a c k a n d s w a p t h e m o u t w i t h ( x10 , x11 ) . T h i s a d d s o n e
* ' ldrd' a n d o n e ' s t r d ' i n s t r u c t i o n p e r r o u n d .
*
* All r o t a t e s a r e p e r f o r m e d u s i n g t h e i m p l i c i t r o t a t e o p e r a n d a c c e p t e d b y t h e
* ' add' a n d ' e o r ' i n s t r u c t i o n s . T h i s i s f a s t e r t h a n u s i n g e x p l i c i t r o t a t e
* instructions. T o m a k e t h i s w o r k , w e a l l o w t h e v a l u e s i n t h e s e c o n d a n d l a s t
* rows o f t h e C h a C h a s t a t e m a t r i x ( r o w s ' b ' a n d ' d ' ) t o t e m p o r a r i l y h a v e t h e
* wrong r o t a t i o n a m o u n t . T h e r o t a t i o n a m o u n t i s t h e n f i x e d u p j u s t i n t i m e
* when t h e v a l u e s a r e u s e d . ' b r o t ' i s t h e n u m b e r o f b i t s t h e v a l u e s i n r o w ' b '
* need t o b e r o t a t e d r i g h t t o a r r i v e a t t h e c o r r e c t v a l u e s , a n d ' d r o t '
* similarly f o r r o w ' d ' . ( b r o t , d r o t ) s t a r t o u t a s ( 0 , 0 ) b u t w e m a k e i t s u c h
* that t h e y e n d u p a s ( 2 5 , 2 4 ) a f t e r e v e r y r o u n d .
* /
/ / ChaCha s t a t e r e g i s t e r s
X0 . r e q r0
X1 . r e q r1
X2 . r e q r2
X3 . r e q r3
X4 . r e q r4
X5 . r e q r5
X6 . r e q r6
X7 . r e q r7
X8 _ X 1 0 . r e q r8 / / s h a r e d b y x8 a n d x10
X9 _ X 1 1 . r e q r9 / / s h a r e d b y x9 a n d x11
X1 2 . r e q r10
X1 3 . r e q r11
X1 4 . r e q r12
X1 5 . r e q r14
2021-03-10 11:14:21 +01:00
.macro _le32_bswap_4x a, b , c , d , t m p
2019-11-08 13:22:13 +01:00
# ifdef _ _ A R M E B _ _
2021-03-10 11:14:21 +01:00
rev_ l \ a , \ t m p
rev_ l \ b , \ t m p
rev_ l \ c , \ t m p
rev_ l \ d , \ t m p
2019-11-08 13:22:13 +01:00
# endif
.endm
.macro __ldrd a, b , s r c , o f f s e t
# if _ _ L I N U X _ A R M _ A R C H _ _ > = 6
ldrd \ a , \ b , [ \ s r c , #\ o f f s e t ]
# else
ldr \ a , [ \ s r c , #\ o f f s e t ]
ldr \ b , [ \ s r c , #\ o f f s e t + 4 ]
# endif
.endm
.macro __strd a, b , d s t , o f f s e t
# if _ _ L I N U X _ A R M _ A R C H _ _ > = 6
strd \ a , \ b , [ \ d s t , #\ o f f s e t ]
# else
str \ a , [ \ d s t , #\ o f f s e t ]
str \ b , [ \ d s t , #\ o f f s e t + 4 ]
# endif
.endm
.macro _halfround a1 , b1 , c1 , d1 , a2 , b2 , c2 , d2
/ / a + = b ; d ^= a; d = rol(d, 16);
add \ a1 , \ a1 , \ b1 , r o r #b r o t
add \ a2 , \ a2 , \ b2 , r o r #b r o t
eor \ d1 , \ a1 , \ d1 , r o r #d r o t
eor \ d2 , \ a2 , \ d2 , r o r #d r o t
/ / drot = = 3 2 - 1 6 = = 1 6
/ / c + = d ; b ^= c; b = rol(b, 12);
add \ c1 , \ c1 , \ d1 , r o r #16
add \ c2 , \ c2 , \ d2 , r o r #16
eor \ b1 , \ c1 , \ b1 , r o r #b r o t
eor \ b2 , \ c2 , \ b2 , r o r #b r o t
/ / brot = = 3 2 - 1 2 = = 2 0
/ / a + = b ; d ^= a; d = rol(d, 8);
add \ a1 , \ a1 , \ b1 , r o r #20
add \ a2 , \ a2 , \ b2 , r o r #20
eor \ d1 , \ a1 , \ d1 , r o r #16
eor \ d2 , \ a2 , \ d2 , r o r #16
/ / drot = = 3 2 - 8 = = 2 4
/ / c + = d ; b ^= c; b = rol(b, 7);
add \ c1 , \ c1 , \ d1 , r o r #24
add \ c2 , \ c2 , \ d2 , r o r #24
eor \ b1 , \ c1 , \ b1 , r o r #20
eor \ b2 , \ c2 , \ b2 , r o r #20
/ / brot = = 3 2 - 7 = = 2 5
.endm
.macro _doubleround
/ / column r o u n d
/ / quarterrounds : ( x0 , x4 , x8 , x12 ) a n d ( x1 , x5 , x9 , x13 )
_ halfround X 0 , X 4 , X 8 _ X 1 0 , X 1 2 , X 1 , X 5 , X 9 _ X 1 1 , X 1 3
/ / save ( x8 , x9 ) ; restore (x10, x11)
_ _ strd X 8 _ X 1 0 , X 9 _ X 1 1 , s p , 0
_ _ ldrd X 8 _ X 1 0 , X 9 _ X 1 1 , s p , 8
/ / quarterrounds : ( x2 , x6 , x10 , x14 ) a n d ( x3 , x7 , x11 , x15 )
_ halfround X 2 , X 6 , X 8 _ X 1 0 , X 1 4 , X 3 , X 7 , X 9 _ X 1 1 , X 1 5
.set brot, 2 5
.set drot, 2 4
/ / diagonal r o u n d
/ / quarterrounds : ( x0 , x5 , x10 , x15 ) a n d ( x1 , x6 , x11 , x12 )
_ halfround X 0 , X 5 , X 8 _ X 1 0 , X 1 5 , X 1 , X 6 , X 9 _ X 1 1 , X 1 2
/ / save ( x10 , x11 ) ; restore (x8, x9)
_ _ strd X 8 _ X 1 0 , X 9 _ X 1 1 , s p , 8
_ _ ldrd X 8 _ X 1 0 , X 9 _ X 1 1 , s p , 0
/ / quarterrounds : ( x2 , x7 , x8 , x13 ) a n d ( x3 , x4 , x9 , x14 )
_ halfround X 2 , X 7 , X 8 _ X 1 0 , X 1 3 , X 3 , X 4 , X 9 _ X 1 1 , X 1 4
.endm
.macro _chacha_permute nrounds
.set brot, 0
.set drot, 0
.rept \ nrounds / 2
_ doubleround
.endr
.endm
.macro _chacha nrounds
.Lnext_block \ @:
/ / Stack : unused0 - u n u s e d1 x10 - x11 x0 - x15 O U T I N L E N
/ / Registers c o n t a i n x0 - x9 ,x12 - x15 .
/ / Do t h e c o r e C h a C h a p e r m u t a t i o n t o u p d a t e x0 - x15 .
_ chacha_ p e r m u t e \ n r o u n d s
add s p , #8
/ / Stack : x1 0 - x11 o r i g _ x0 - o r i g _ x15 O U T I N L E N
/ / Registers c o n t a i n x0 - x9 ,x12 - x15 .
/ / x4 - x7 a r e r o t a t e d b y ' b r o t ' ; x12-x15 are rotated by 'drot'.
/ / Free u p s o m e r e g i s t e r s ( r8 - r12 ,r14 ) b y p u s h i n g ( x8 - x9 ,x12 - x15 ) .
push { X 8 _ X 1 0 , X 9 _ X 1 1 , X 1 2 , X 1 3 , X 1 4 , X 1 5 }
/ / Load ( O U T , I N , L E N ) .
ldr r14 , [ s p , #96 ]
ldr r12 , [ s p , #100 ]
ldr r11 , [ s p , #104 ]
orr r10 , r14 , r12
/ / Use s l o w p a t h i f f e w e r t h a n 6 4 b y t e s r e m a i n .
cmp r11 , #64
blt . L x o r _ s l o w p a t h \ @
/ / Use s l o w p a t h i f I N a n d / o r O U T i s n ' t 4 - b y t e a l i g n e d . N e e d e d e v e n o n
/ / ARMv6 + , s i n c e l d m i a a n d s t m i a ( u s e d b e l o w ) s t i l l r e q u i r e a l i g n m e n t .
tst r10 , #3
bne . L x o r _ s l o w p a t h \ @
/ / Fast p a t h : X O R 6 4 b y t e s o f a l i g n e d d a t a .
/ / Stack : x8 - x9 x12 - x15 x10 - x11 o r i g _ x0 - o r i g _ x15 O U T I N L E N
/ / Registers : r0 - r7 a r e x0 - x7 ; r8-r11 are free; r12 is IN; r14 is OUT.
/ / x4 - x7 a r e r o t a t e d b y ' b r o t ' ; x12-x15 are rotated by 'drot'.
/ / x0 - x3
_ _ ldrd r8 , r9 , s p , 3 2
_ _ ldrd r10 , r11 , s p , 4 0
add X 0 , X 0 , r8
add X 1 , X 1 , r9
add X 2 , X 2 , r10
add X 3 , X 3 , r11
2021-03-10 11:14:21 +01:00
_ le3 2 _ b s w a p _ 4 x X 0 , X 1 , X 2 , X 3 , r8
2019-11-08 13:22:13 +01:00
ldmia r12 ! , { r8 - r11 }
eor X 0 , X 0 , r8
eor X 1 , X 1 , r9
eor X 2 , X 2 , r10
eor X 3 , X 3 , r11
stmia r14 ! , { X 0 - X 3 }
/ / x4 - x7
_ _ ldrd r8 , r9 , s p , 4 8
_ _ ldrd r10 , r11 , s p , 5 6
add X 4 , r8 , X 4 , r o r #b r o t
add X 5 , r9 , X 5 , r o r #b r o t
ldmia r12 ! , { X 0 - X 3 }
add X 6 , r10 , X 6 , r o r #b r o t
add X 7 , r11 , X 7 , r o r #b r o t
2021-03-10 11:14:21 +01:00
_ le3 2 _ b s w a p _ 4 x X 4 , X 5 , X 6 , X 7 , r8
2019-11-08 13:22:13 +01:00
eor X 4 , X 4 , X 0
eor X 5 , X 5 , X 1
eor X 6 , X 6 , X 2
eor X 7 , X 7 , X 3
stmia r14 ! , { X 4 - X 7 }
/ / x8 - x15
pop { r0 - r7 } / / ( x8 - x9 ,x12 - x15 ,x10 - x11 )
_ _ ldrd r8 , r9 , s p , 3 2
_ _ ldrd r10 , r11 , s p , 4 0
add r0 , r0 , r8 / / x8
add r1 , r1 , r9 / / x9
add r6 , r6 , r10 / / x10
add r7 , r7 , r11 / / x11
2021-03-10 11:14:21 +01:00
_ le3 2 _ b s w a p _ 4 x r0 , r1 , r6 , r7 , r8
2019-11-08 13:22:13 +01:00
ldmia r12 ! , { r8 - r11 }
eor r0 , r0 , r8 / / x8
eor r1 , r1 , r9 / / x9
eor r6 , r6 , r10 / / x10
eor r7 , r7 , r11 / / x11
stmia r14 ! , { r0 ,r1 ,r6 ,r7 }
ldmia r12 ! , { r0 ,r1 ,r6 ,r7 }
_ _ ldrd r8 , r9 , s p , 4 8
_ _ ldrd r10 , r11 , s p , 5 6
add r2 , r8 , r2 , r o r #d r o t / / x 12
add r3 , r9 , r3 , r o r #d r o t / / x 13
add r4 , r10 , r4 , r o r #d r o t / / x 14
add r5 , r11 , r5 , r o r #d r o t / / x 15
2021-03-10 11:14:21 +01:00
_ le3 2 _ b s w a p _ 4 x r2 , r3 , r4 , r5 , r9
2019-11-08 13:22:13 +01:00
ldr r9 , [ s p , #72 ] / / l o a d L E N
eor r2 , r2 , r0 / / x12
eor r3 , r3 , r1 / / x13
eor r4 , r4 , r6 / / x14
eor r5 , r5 , r7 / / x15
subs r9 , #64 / / d e c r e m e n t a n d c h e c k L E N
stmia r14 ! , { r2 - r5 }
beq . L d o n e \ @
.Lprepare_for_next_block \ @:
/ / Stack : x0 - x15 O U T I N L E N
/ / Increment b l o c k c o u n t e r ( x12 )
add r8 , #1
/ / Store u p d a t e d ( O U T , I N , L E N )
str r14 , [ s p , #64 ]
str r12 , [ s p , #68 ]
str r9 , [ s p , #72 ]
mov r14 , s p
/ / Store u p d a t e d b l o c k c o u n t e r ( x12 )
str r8 , [ s p , #48 ]
sub s p , #16
/ / Reload s t a t e a n d d o n e x t b l o c k
ldmia r14 ! , { r0 - r11 } / / l o a d x0 - x11
_ _ strd r10 , r11 , s p , 8 / / s t o r e x10 - x11 b e f o r e s t a t e
ldmia r14 , { r10 - r12 ,r14 } / / l o a d x12 - x15
b . L n e x t _ b l o c k \ @
.Lxor_slowpath \ @:
/ / Slow p a t h : < 6 4 b y t e s r e m a i n i n g , o r u n a l i g n e d i n p u t o r o u t p u t b u f f e r .
/ / We h a n d l e i t b y s t o r i n g t h e 6 4 b y t e s o f k e y s t r e a m t o t h e s t a c k , t h e n
/ / XOR- i n g t h e n e e d e d p o r t i o n w i t h t h e d a t a .
/ / Allocate k e y s t r e a m b u f f e r
sub s p , #64
mov r14 , s p
/ / Stack : ks0 - k s15 x8 - x9 x12 - x15 x10 - x11 o r i g _ x0 - o r i g _ x15 O U T I N L E N
/ / Registers : r0 - r7 a r e x0 - x7 ; r8-r11 are free; r12 is IN; r14 is &ks0.
/ / x4 - x7 a r e r o t a t e d b y ' b r o t ' ; x12-x15 are rotated by 'drot'.
/ / Save k e y s t r e a m f o r x0 - x3
_ _ ldrd r8 , r9 , s p , 9 6
_ _ ldrd r10 , r11 , s p , 1 0 4
add X 0 , X 0 , r8
add X 1 , X 1 , r9
add X 2 , X 2 , r10
add X 3 , X 3 , r11
2021-03-10 11:14:21 +01:00
_ le3 2 _ b s w a p _ 4 x X 0 , X 1 , X 2 , X 3 , r8
2019-11-08 13:22:13 +01:00
stmia r14 ! , { X 0 - X 3 }
/ / Save k e y s t r e a m f o r x4 - x7
_ _ ldrd r8 , r9 , s p , 1 1 2
_ _ ldrd r10 , r11 , s p , 1 2 0
add X 4 , r8 , X 4 , r o r #b r o t
add X 5 , r9 , X 5 , r o r #b r o t
add X 6 , r10 , X 6 , r o r #b r o t
add X 7 , r11 , X 7 , r o r #b r o t
2021-03-10 11:14:21 +01:00
_ le3 2 _ b s w a p _ 4 x X 4 , X 5 , X 6 , X 7 , r8
2019-11-08 13:22:13 +01:00
add r8 , s p , #64
stmia r14 ! , { X 4 - X 7 }
/ / Save k e y s t r e a m f o r x8 - x15
ldm r8 , { r0 - r7 } / / ( x8 - x9 ,x12 - x15 ,x10 - x11 )
_ _ ldrd r8 , r9 , s p , 1 2 8
_ _ ldrd r10 , r11 , s p , 1 3 6
add r0 , r0 , r8 / / x8
add r1 , r1 , r9 / / x9
add r6 , r6 , r10 / / x10
add r7 , r7 , r11 / / x11
2021-03-10 11:14:21 +01:00
_ le3 2 _ b s w a p _ 4 x r0 , r1 , r6 , r7 , r8
2019-11-08 13:22:13 +01:00
stmia r14 ! , { r0 ,r1 ,r6 ,r7 }
_ _ ldrd r8 , r9 , s p , 1 4 4
_ _ ldrd r10 , r11 , s p , 1 5 2
add r2 , r8 , r2 , r o r #d r o t / / x 12
add r3 , r9 , r3 , r o r #d r o t / / x 13
add r4 , r10 , r4 , r o r #d r o t / / x 14
add r5 , r11 , r5 , r o r #d r o t / / x 15
2021-03-10 11:14:21 +01:00
_ le3 2 _ b s w a p _ 4 x r2 , r3 , r4 , r5 , r9
2019-11-08 13:22:13 +01:00
stmia r14 , { r2 - r5 }
/ / Stack : ks0 - k s15 u n u s e d0 - u n u s e d7 x0 - x15 O U T I N L E N
/ / Registers : r8 i s b l o c k c o u n t e r , r12 i s I N .
ldr r9 , [ s p , #168 ] / / L E N
ldr r14 , [ s p , #160 ] / / O U T
cmp r9 , #64
mov r0 , s p
movle r1 , r9
movgt r1 , #64
/ / r1 i s n u m b e r o f b y t e s t o X O R , i n r a n g e [ 1 , 6 4 ]
.if __LINUX_ARM_ARCH__ < 6
orr r2 , r12 , r14
tst r2 , #3 / / I N o r O U T m i s a l i g n e d ?
bne . L x o r _ n e x t _ b y t e \ @
.endif
/ / XOR a w o r d a t a t i m e
.rept 16
subs r1 , #4
blt . L x o r _ w o r d s _ d o n e \ @
ldr r2 , [ r12 ] , #4
ldr r3 , [ r0 ] , #4
eor r2 , r2 , r3
str r2 , [ r14 ] , #4
.endr
b . L x o r _ s l o w p a t h _ d o n e \ @
.Lxor_words_done \ @:
ands r1 , r1 , #3
beq . L x o r _ s l o w p a t h _ d o n e \ @
/ / XOR a b y t e a t a t i m e
.Lxor_next_byte \ @:
ldrb r2 , [ r12 ] , #1
ldrb r3 , [ r0 ] , #1
eor r2 , r2 , r3
strb r2 , [ r14 ] , #1
subs r1 , #1
bne . L x o r _ n e x t _ b y t e \ @
.Lxor_slowpath_done \ @:
subs r9 , #64
add s p , #96
bgt . L p r e p a r e _ f o r _ n e x t _ b l o c k \ @
.Ldone \ @:
.endm / / _ chacha
/ *
2019-11-08 13:22:14 +01:00
* void c h a c h a _ d o a r m ( u 8 * d s t , c o n s t u 8 * s r c , u n s i g n e d i n t b y t e s ,
* const u 3 2 * s t a t e , i n t n r o u n d s ) ;
2019-11-08 13:22:13 +01:00
* /
2019-11-08 13:22:14 +01:00
ENTRY( c h a c h a _ d o a r m )
2019-11-08 13:22:13 +01:00
cmp r2 , #0 / / l e n = = 0 ?
reteq l r
2019-11-08 13:22:14 +01:00
ldr i p , [ s p ]
cmp i p , #12
2019-11-08 13:22:13 +01:00
push { r0 - r2 ,r4 - r11 ,l r }
/ / Push s t a t e x0 - x15 o n t o s t a c k .
/ / Also s t o r e a n e x t r a c o p y o f x10 - x11 j u s t b e f o r e t h e s t a t e .
2019-11-08 13:22:14 +01:00
add X 1 2 , r3 , #48
ldm X 1 2 , { X 1 2 ,X 1 3 ,X 1 4 ,X 1 5 }
push { X 1 2 ,X 1 3 ,X 1 4 ,X 1 5 }
sub s p , s p , #64
2019-11-08 13:22:13 +01:00
2019-11-08 13:22:14 +01:00
_ _ ldrd X 8 _ X 1 0 , X 9 _ X 1 1 , r3 , 4 0
2019-11-08 13:22:13 +01:00
_ _ strd X 8 _ X 1 0 , X 9 _ X 1 1 , s p , 8
2019-11-08 13:22:14 +01:00
_ _ strd X 8 _ X 1 0 , X 9 _ X 1 1 , s p , 5 6
ldm r3 , { X 0 - X 9 _ X 1 1 }
2019-11-08 13:22:13 +01:00
_ _ strd X 0 , X 1 , s p , 1 6
_ _ strd X 2 , X 3 , s p , 2 4
2019-11-08 13:22:14 +01:00
_ _ strd X 4 , X 5 , s p , 3 2
_ _ strd X 6 , X 7 , s p , 4 0
_ _ strd X 8 _ X 1 0 , X 9 _ X 1 1 , s p , 4 8
2019-11-08 13:22:13 +01:00
2019-11-08 13:22:14 +01:00
beq 1 f
2019-11-08 13:22:13 +01:00
_ chacha 2 0
2019-11-08 13:22:14 +01:00
0 : add s p , #76
2019-11-08 13:22:13 +01:00
pop { r4 - r11 , p c }
2019-11-08 13:22:14 +01:00
1 : _ chacha 1 2
b 0 b
ENDPROC( c h a c h a _ d o a r m )
2019-11-08 13:22:13 +01:00
/ *
2019-11-08 13:22:14 +01:00
* void h c h a c h a _ b l o c k _ a r m ( c o n s t u 3 2 s t a t e [ 1 6 ] , u 3 2 o u t [ 8 ] , i n t n r o u n d s ) ;
2019-11-08 13:22:13 +01:00
* /
2019-11-08 13:22:14 +01:00
ENTRY( h c h a c h a _ b l o c k _ a r m )
2019-11-08 13:22:13 +01:00
push { r1 ,r4 - r11 ,l r }
2019-11-08 13:22:14 +01:00
cmp r2 , #12 / / C h a C h a12 ?
2019-11-08 13:22:13 +01:00
mov r14 , r0
ldmia r14 ! , { r0 - r11 } / / l o a d x0 - x11
push { r10 - r11 } / / s t o r e x10 - x11 t o s t a c k
ldm r14 , { r10 - r12 ,r14 } / / l o a d x12 - x15
sub s p , #8
2019-11-08 13:22:14 +01:00
beq 1 f
2019-11-08 13:22:13 +01:00
_ chacha_ p e r m u t e 2 0
/ / Skip o v e r ( u n u s e d0 - u n u s e d1 , x10 - x11 )
2019-11-08 13:22:14 +01:00
0 : add s p , #16
2019-11-08 13:22:13 +01:00
/ / Fix u p r o t a t i o n s o f x12 - x15
ror X 1 2 , X 1 2 , #d r o t
ror X 1 3 , X 1 3 , #d r o t
pop { r4 } / / l o a d ' o u t '
ror X 1 4 , X 1 4 , #d r o t
ror X 1 5 , X 1 5 , #d r o t
/ / Store ( x0 - x3 ,x12 - x15 ) t o ' o u t '
stm r4 , { X 0 ,X 1 ,X 2 ,X 3 ,X 1 2 ,X 1 3 ,X 1 4 ,X 1 5 }
pop { r4 - r11 ,p c }
2019-11-08 13:22:14 +01:00
1 : _ chacha_ p e r m u t e 1 2
b 0 b
ENDPROC( h c h a c h a _ b l o c k _ a r m )