2015-03-10 09:47:48 +01:00
/ *
2017-07-24 11:28:17 +01:00
* Accelerated G H A S H i m p l e m e n t a t i o n w i t h N E O N / A R M v8 v m u l l . p8 / 6 4 i n s t r u c t i o n s .
2015-03-10 09:47:48 +01:00
*
2017-07-24 11:28:17 +01:00
* Copyright ( C ) 2 0 1 5 - 2 0 1 7 L i n a r o L t d . < a r d . b i e s h e u v e l @linaro.org>
2015-03-10 09:47:48 +01:00
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify it
* under t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 a s p u b l i s h e d
* by t h e F r e e S o f t w a r e F o u n d a t i o n .
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
SHASH . r e q q0
2017-07-24 11:28:17 +01:00
T1 . r e q q1
XL . r e q q2
XM . r e q q3
XH . r e q q4
IN1 . r e q q4
2015-03-10 09:47:48 +01:00
SHASH_ L . r e q d0
SHASH_ H . r e q d1
2017-07-24 11:28:17 +01:00
T1 _ L . r e q d2
T1 _ H . r e q d3
XL_ L . r e q d4
XL_ H . r e q d5
XM_ L . r e q d6
XM_ H . r e q d7
XH_ L . r e q d8
t0 l . r e q d10
t0 h . r e q d11
t1 l . r e q d12
t1 h . r e q d13
t2 l . r e q d14
t2 h . r e q d15
t3 l . r e q d16
t3 h . r e q d17
t4 l . r e q d18
t4 h . r e q d19
t0 q . r e q q5
t1 q . r e q q6
t2 q . r e q q7
t3 q . r e q q8
t4 q . r e q q9
T2 . r e q q9
s1 l . r e q d20
s1 h . r e q d21
s2 l . r e q d22
s2 h . r e q d23
s3 l . r e q d24
s3 h . r e q d25
s4 l . r e q d26
s4 h . r e q d27
MASK . r e q d28
SHASH2 _ p8 . r e q d28
k1 6 . r e q d29
k3 2 . r e q d30
k4 8 . r e q d31
SHASH2 _ p64 . r e q d31
2015-03-10 09:47:48 +01:00
.text
.fpu crypto- n e o n - f p - a r m v8
2017-07-24 11:28:17 +01:00
.macro _ _ pmull_ p64 , r d , r n , r m , b1 , b2 , b3 , b4
vmull. p64 \ r d , \ r n , \ r m
.endm
2015-03-10 09:47:48 +01:00
/ *
2017-07-24 11:28:17 +01:00
* This i m p l e m e n t a t i o n o f 6 4 x64 - > 1 2 8 b i t p o l y n o m i a l m u l t i p l i c a t i o n
* using v m u l l . p8 i n s t r u c t i o n s ( 8 x8 - > 1 6 ) i s t a k e n f r o m t h e p a p e r
* " Fast S o f t w a r e P o l y n o m i a l M u l t i p l i c a t i o n o n A R M P r o c e s s o r s U s i n g
* the N E O N E n g i n e " b y D a n i l o C a m a r a , C o n r a d o G o u v e a , J u l i o L o p e z a n d
* Ricardo D a h a b ( h t t p s : / / h a l . i n r i a . f r / h a l - 0 1 5 0 6 5 7 2 )
*
* It h a s b e e n s l i g h t l y t w e a k e d f o r i n - o r d e r p e r f o r m a n c e , a n d t o a l l o w
* ' rq' t o o v e r l a p w i t h ' a d ' o r ' b d ' .
2015-03-10 09:47:48 +01:00
* /
2017-07-24 11:28:17 +01:00
.macro _ _ pmull_ p8 , r q , a d , b d , b1 =t4l , b2 =t3l , b3 =t4l , b4 =t3l
vext. 8 t 0 l , \ a d , \ a d , #1 @ A1
.ifc \ b1 , t 4 l
vext. 8 t 4 l , \ b d , \ b d , #1 @ B1
.endif
vmull. p8 t 0 q , t 0 l , \ b d @ F = A1*B
vext. 8 t 1 l , \ a d , \ a d , #2 @ A2
vmull. p8 t 4 q , \ a d , \ b1 @ E = A*B1
.ifc \ b2 , t 3 l
vext. 8 t 3 l , \ b d , \ b d , #2 @ B2
.endif
vmull. p8 t 1 q , t 1 l , \ b d @ H = A2*B
vext. 8 t 2 l , \ a d , \ a d , #3 @ A3
vmull. p8 t 3 q , \ a d , \ b2 @ G = A*B2
veor t 0 q , t 0 q , t 4 q @ L = E + F
.ifc \ b3 , t 4 l
vext. 8 t 4 l , \ b d , \ b d , #3 @ B3
.endif
vmull. p8 t 2 q , t 2 l , \ b d @ J = A3*B
veor t 0 l , t 0 l , t 0 h @ t0 = (L) (P0 + P1) << 8
veor t 1 q , t 1 q , t 3 q @ M = G + H
.ifc \ b4 , t 3 l
vext. 8 t 3 l , \ b d , \ b d , #4 @ B4
.endif
vmull. p8 t 4 q , \ a d , \ b3 @ I = A*B3
veor t 1 l , t 1 l , t 1 h @ t1 = (M) (P2 + P3) << 16
vmull. p8 t 3 q , \ a d , \ b4 @ K = A*B4
vand t 0 h , t 0 h , k 4 8
vand t 1 h , t 1 h , k 3 2
veor t 2 q , t 2 q , t 4 q @ N = I + J
veor t 0 l , t 0 l , t 0 h
veor t 1 l , t 1 l , t 1 h
veor t 2 l , t 2 l , t 2 h @ t2 = (N) (P4 + P5) << 24
vand t 2 h , t 2 h , k 1 6
veor t 3 l , t 3 l , t 3 h @ t3 = (K) (P6 + P7) << 32
vmov. i 6 4 t 3 h , #0
vext. 8 t 0 q , t 0 q , t 0 q , #15
veor t 2 l , t 2 l , t 2 h
vext. 8 t 1 q , t 1 q , t 1 q , #14
vmull. p8 \ r q , \ a d , \ b d @ D = A*B
vext. 8 t 2 q , t 2 q , t 2 q , #13
vext. 8 t 3 q , t 3 q , t 3 q , #12
veor t 0 q , t 0 q , t 1 q
veor t 2 q , t 2 q , t 3 q
veor \ r q , \ r q , t 0 q
veor \ r q , \ r q , t 2 q
.endm
/ /
/ / PMULL ( 6 4 x64 - > 1 2 8 ) b a s e d r e d u c t i o n f o r C P U s t h a t c a n d o
/ / it i n a s i n g l e i n s t r u c t i o n .
/ /
.macro __pmull_reduce_p64
vmull. p64 T 1 , X L _ L , M A S K
veor X H _ L , X H _ L , X M _ H
vext. 8 T 1 , T 1 , T 1 , #8
veor X L _ H , X L _ H , X M _ L
veor T 1 , T 1 , X L
vmull. p64 X L , T 1 _ H , M A S K
.endm
/ /
/ / Alternative r e d u c t i o n f o r C P U s t h a t l a c k s u p p o r t f o r t h e
/ / 6 4 x6 4 - > 1 2 8 P M U L L i n s t r u c t i o n
/ /
.macro __pmull_reduce_p8
veor X L _ H , X L _ H , X M _ L
veor X H _ L , X H _ L , X M _ H
vshl. i 6 4 T 1 , X L , #57
vshl. i 6 4 T 2 , X L , #62
veor T 1 , T 1 , T 2
vshl. i 6 4 T 2 , X L , #63
veor T 1 , T 1 , T 2
veor X L _ H , X L _ H , T 1 _ L
veor X H _ L , X H _ L , T 1 _ H
vshr. u 6 4 T 1 , X L , #1
veor X H , X H , X L
veor X L , X L , T 1
vshr. u 6 4 T 1 , T 1 , #6
vshr. u 6 4 X L , X L , #1
.endm
.macro ghash_ u p d a t e , p n
2015-03-10 09:47:48 +01:00
vld1 . 6 4 { X L } , [ r1 ]
/* do the head block first, if supplied */
ldr i p , [ s p ]
teq i p , #0
beq 0 f
vld1 . 6 4 { T 1 } , [ i p ]
teq r0 , #0
b 1 f
0 : vld1 . 6 4 { T 1 } , [ r2 ] !
subs r0 , r0 , #1
1 : /* multiply XL by SHASH in GF(2^128) */
# ifndef C O N F I G _ C P U _ B I G _ E N D I A N
vrev6 4 . 8 T 1 , T 1
# endif
vext. 8 I N 1 , T 1 , T 1 , #8
2017-07-24 11:28:17 +01:00
veor T 1 _ L , T 1 _ L , X L _ H
2015-03-10 09:47:48 +01:00
veor X L , X L , I N 1
2017-07-24 11:28:17 +01:00
_ _ pmull_ \ p n X H , X L _ H , S H A S H _ H , s1 h , s2 h , s3 h , s4 h @ a1 * b1
2015-03-10 09:47:48 +01:00
veor T 1 , T 1 , X L
2017-07-24 11:28:17 +01:00
_ _ pmull_ \ p n X L , X L _ L , S H A S H _ L , s1 l , s2 l , s3 l , s4 l @ a0 * b0
_ _ pmull_ \ p n X M , T 1 _ L , S H A S H 2 _ \ p n @ (a1+a0)(b1+b0)
2015-03-10 09:47:48 +01:00
2017-07-24 11:28:17 +01:00
veor T 1 , X L , X H
2015-03-10 09:47:48 +01:00
veor X M , X M , T 1
2017-07-24 11:28:17 +01:00
_ _ pmull_ r e d u c e _ \ p n
2015-03-10 09:47:48 +01:00
2017-07-24 11:28:17 +01:00
veor T 1 , T 1 , X H
veor X L , X L , T 1
2015-03-10 09:47:48 +01:00
bne 0 b
vst1 . 6 4 { X L } , [ r1 ]
bx l r
2017-07-24 11:28:17 +01:00
.endm
/ *
* void p m u l l _ g h a s h _ u p d a t e ( i n t b l o c k s , u 6 4 d g [ ] , c o n s t c h a r * s r c ,
* struct g h a s h _ k e y c o n s t * k , c o n s t c h a r * h e a d )
* /
ENTRY( p m u l l _ g h a s h _ u p d a t e _ p64 )
vld1 . 6 4 { S H A S H } , [ r3 ]
veor S H A S H 2 _ p64 , S H A S H _ L , S H A S H _ H
vmov. i 8 M A S K , #0xe1
vshl. u 6 4 M A S K , M A S K , #57
ghash_ u p d a t e p64
ENDPROC( p m u l l _ g h a s h _ u p d a t e _ p64 )
ENTRY( p m u l l _ g h a s h _ u p d a t e _ p8 )
vld1 . 6 4 { S H A S H } , [ r3 ]
veor S H A S H 2 _ p8 , S H A S H _ L , S H A S H _ H
vext. 8 s1 l , S H A S H _ L , S H A S H _ L , #1
vext. 8 s2 l , S H A S H _ L , S H A S H _ L , #2
vext. 8 s3 l , S H A S H _ L , S H A S H _ L , #3
vext. 8 s4 l , S H A S H _ L , S H A S H _ L , #4
vext. 8 s1 h , S H A S H _ H , S H A S H _ H , #1
vext. 8 s2 h , S H A S H _ H , S H A S H _ H , #2
vext. 8 s3 h , S H A S H _ H , S H A S H _ H , #3
vext. 8 s4 h , S H A S H _ H , S H A S H _ H , #4
vmov. i 6 4 k 1 6 , #0xffff
vmov. i 6 4 k 3 2 , #0xffffffff
vmov. i 6 4 k 4 8 , #0xffffffff f f f f
ghash_ u p d a t e p8
ENDPROC( p m u l l _ g h a s h _ u p d a t e _ p8 )