2019-06-04 10:11:33 +02:00
/* SPDX-License-Identifier: GPL-2.0-only */
2015-03-10 09:47:48 +01:00
/ *
2017-07-24 11:28:17 +01:00
* Accelerated G H A S H i m p l e m e n t a t i o n w i t h N E O N / A R M v8 v m u l l . p8 / 6 4 i n s t r u c t i o n s .
2015-03-10 09:47:48 +01:00
*
2017-07-24 11:28:17 +01:00
* Copyright ( C ) 2 0 1 5 - 2 0 1 7 L i n a r o L t d . < a r d . b i e s h e u v e l @linaro.org>
2015-03-10 09:47:48 +01:00
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
2020-03-02 00:37:14 +01:00
.arch armv8 - a
.fpu crypto- n e o n - f p - a r m v8
2015-03-10 09:47:48 +01:00
SHASH . r e q q0
2017-07-24 11:28:17 +01:00
T1 . r e q q1
XL . r e q q2
XM . r e q q3
XH . r e q q4
IN1 . r e q q4
2015-03-10 09:47:48 +01:00
SHASH_ L . r e q d0
SHASH_ H . r e q d1
2017-07-24 11:28:17 +01:00
T1 _ L . r e q d2
T1 _ H . r e q d3
XL_ L . r e q d4
XL_ H . r e q d5
XM_ L . r e q d6
XM_ H . r e q d7
XH_ L . r e q d8
t0 l . r e q d10
t0 h . r e q d11
t1 l . r e q d12
t1 h . r e q d13
t2 l . r e q d14
t2 h . r e q d15
t3 l . r e q d16
t3 h . r e q d17
t4 l . r e q d18
t4 h . r e q d19
t0 q . r e q q5
t1 q . r e q q6
t2 q . r e q q7
t3 q . r e q q8
t4 q . r e q q9
T2 . r e q q9
s1 l . r e q d20
s1 h . r e q d21
s2 l . r e q d22
s2 h . r e q d23
s3 l . r e q d24
s3 h . r e q d25
s4 l . r e q d26
s4 h . r e q d27
MASK . r e q d28
SHASH2 _ p8 . r e q d28
k1 6 . r e q d29
k3 2 . r e q d30
k4 8 . r e q d31
SHASH2 _ p64 . r e q d31
2015-03-10 09:47:48 +01:00
2018-08-23 15:48:51 +01:00
HH . r e q q10
HH3 . r e q q11
HH4 . r e q q12
HH3 4 . r e q q13
HH_ L . r e q d20
HH_ H . r e q d21
HH3 _ L . r e q d22
HH3 _ H . r e q d23
HH4 _ L . r e q d24
HH4 _ H . r e q d25
HH3 4 _ L . r e q d26
HH3 4 _ H . r e q d27
SHASH2 _ H . r e q d29
XL2 . r e q q5
XM2 . r e q q6
XH2 . r e q q7
T3 . r e q q8
XL2 _ L . r e q d10
XL2 _ H . r e q d11
XM2 _ L . r e q d12
XM2 _ H . r e q d13
T3 _ L . r e q d16
T3 _ H . r e q d17
2015-03-10 09:47:48 +01:00
.text
2017-07-24 11:28:17 +01:00
.macro _ _ pmull_ p64 , r d , r n , r m , b1 , b2 , b3 , b4
vmull. p64 \ r d , \ r n , \ r m
.endm
2015-03-10 09:47:48 +01:00
/ *
2017-07-24 11:28:17 +01:00
* This i m p l e m e n t a t i o n o f 6 4 x64 - > 1 2 8 b i t p o l y n o m i a l m u l t i p l i c a t i o n
* using v m u l l . p8 i n s t r u c t i o n s ( 8 x8 - > 1 6 ) i s t a k e n f r o m t h e p a p e r
* " Fast S o f t w a r e P o l y n o m i a l M u l t i p l i c a t i o n o n A R M P r o c e s s o r s U s i n g
* the N E O N E n g i n e " b y D a n i l o C a m a r a , C o n r a d o G o u v e a , J u l i o L o p e z a n d
* Ricardo D a h a b ( h t t p s : / / h a l . i n r i a . f r / h a l - 0 1 5 0 6 5 7 2 )
*
* It h a s b e e n s l i g h t l y t w e a k e d f o r i n - o r d e r p e r f o r m a n c e , a n d t o a l l o w
* ' rq' t o o v e r l a p w i t h ' a d ' o r ' b d ' .
2015-03-10 09:47:48 +01:00
* /
2017-07-24 11:28:17 +01:00
.macro _ _ pmull_ p8 , r q , a d , b d , b1 =t4l , b2 =t3l , b3 =t4l , b4 =t3l
vext. 8 t 0 l , \ a d , \ a d , #1 @ A1
.ifc \ b1 , t 4 l
vext. 8 t 4 l , \ b d , \ b d , #1 @ B1
.endif
vmull. p8 t 0 q , t 0 l , \ b d @ F = A1*B
vext. 8 t 1 l , \ a d , \ a d , #2 @ A2
vmull. p8 t 4 q , \ a d , \ b1 @ E = A*B1
.ifc \ b2 , t 3 l
vext. 8 t 3 l , \ b d , \ b d , #2 @ B2
.endif
vmull. p8 t 1 q , t 1 l , \ b d @ H = A2*B
vext. 8 t 2 l , \ a d , \ a d , #3 @ A3
vmull. p8 t 3 q , \ a d , \ b2 @ G = A*B2
veor t 0 q , t 0 q , t 4 q @ L = E + F
.ifc \ b3 , t 4 l
vext. 8 t 4 l , \ b d , \ b d , #3 @ B3
.endif
vmull. p8 t 2 q , t 2 l , \ b d @ J = A3*B
veor t 0 l , t 0 l , t 0 h @ t0 = (L) (P0 + P1) << 8
veor t 1 q , t 1 q , t 3 q @ M = G + H
.ifc \ b4 , t 3 l
vext. 8 t 3 l , \ b d , \ b d , #4 @ B4
.endif
vmull. p8 t 4 q , \ a d , \ b3 @ I = A*B3
veor t 1 l , t 1 l , t 1 h @ t1 = (M) (P2 + P3) << 16
vmull. p8 t 3 q , \ a d , \ b4 @ K = A*B4
vand t 0 h , t 0 h , k 4 8
vand t 1 h , t 1 h , k 3 2
veor t 2 q , t 2 q , t 4 q @ N = I + J
veor t 0 l , t 0 l , t 0 h
veor t 1 l , t 1 l , t 1 h
veor t 2 l , t 2 l , t 2 h @ t2 = (N) (P4 + P5) << 24
vand t 2 h , t 2 h , k 1 6
veor t 3 l , t 3 l , t 3 h @ t3 = (K) (P6 + P7) << 32
vmov. i 6 4 t 3 h , #0
vext. 8 t 0 q , t 0 q , t 0 q , #15
veor t 2 l , t 2 l , t 2 h
vext. 8 t 1 q , t 1 q , t 1 q , #14
vmull. p8 \ r q , \ a d , \ b d @ D = A*B
vext. 8 t 2 q , t 2 q , t 2 q , #13
vext. 8 t 3 q , t 3 q , t 3 q , #12
veor t 0 q , t 0 q , t 1 q
veor t 2 q , t 2 q , t 3 q
veor \ r q , \ r q , t 0 q
veor \ r q , \ r q , t 2 q
.endm
/ /
/ / PMULL ( 6 4 x64 - > 1 2 8 ) b a s e d r e d u c t i o n f o r C P U s t h a t c a n d o
/ / it i n a s i n g l e i n s t r u c t i o n .
/ /
.macro __pmull_reduce_p64
vmull. p64 T 1 , X L _ L , M A S K
veor X H _ L , X H _ L , X M _ H
vext. 8 T 1 , T 1 , T 1 , #8
veor X L _ H , X L _ H , X M _ L
veor T 1 , T 1 , X L
vmull. p64 X L , T 1 _ H , M A S K
.endm
/ /
/ / Alternative r e d u c t i o n f o r C P U s t h a t l a c k s u p p o r t f o r t h e
/ / 6 4 x6 4 - > 1 2 8 P M U L L i n s t r u c t i o n
/ /
.macro __pmull_reduce_p8
veor X L _ H , X L _ H , X M _ L
veor X H _ L , X H _ L , X M _ H
vshl. i 6 4 T 1 , X L , #57
vshl. i 6 4 T 2 , X L , #62
veor T 1 , T 1 , T 2
vshl. i 6 4 T 2 , X L , #63
veor T 1 , T 1 , T 2
veor X L _ H , X L _ H , T 1 _ L
veor X H _ L , X H _ L , T 1 _ H
vshr. u 6 4 T 1 , X L , #1
veor X H , X H , X L
veor X L , X L , T 1
vshr. u 6 4 T 1 , T 1 , #6
vshr. u 6 4 X L , X L , #1
.endm
.macro ghash_ u p d a t e , p n
2015-03-10 09:47:48 +01:00
vld1 . 6 4 { X L } , [ r1 ]
/* do the head block first, if supplied */
ldr i p , [ s p ]
teq i p , #0
beq 0 f
vld1 . 6 4 { T 1 } , [ i p ]
teq r0 , #0
2018-08-23 15:48:51 +01:00
b 3 f
0 : .ifc \ p n , p 64
tst r0 , #3 / / s k i p u n t i l #b l o c k s i s a
bne 2 f / / r o u n d m u l t i p l e o f 4
vld1 . 8 { X L 2 - X M 2 } , [ r2 ] !
1 : vld1 . 8 { T 3 - T 2 } , [ r2 ] !
vrev6 4 . 8 X L 2 , X L 2
vrev6 4 . 8 X M 2 , X M 2
subs r0 , r0 , #4
vext. 8 T 1 , X L 2 , X L 2 , #8
veor X L 2 _ H , X L 2 _ H , X L _ L
veor X L , X L , T 1
vrev6 4 . 8 T 3 , T 3
vrev6 4 . 8 T 1 , T 2
vmull. p64 X H , H H 4 _ H , X L _ H / / a1 * b1
veor X L 2 _ H , X L 2 _ H , X L _ H
vmull. p64 X L , H H 4 _ L , X L _ L / / a0 * b0
vmull. p64 X M , H H 3 4 _ H , X L 2 _ H / / ( a1 + a0 ) ( b1 + b0 )
vmull. p64 X H 2 , H H 3 _ H , X M 2 _ L / / a1 * b1
veor X M 2 _ L , X M 2 _ L , X M 2 _ H
vmull. p64 X L 2 , H H 3 _ L , X M 2 _ H / / a0 * b0
vmull. p64 X M 2 , H H 3 4 _ L , X M 2 _ L / / ( a1 + a0 ) ( b1 + b0 )
veor X H , X H , X H 2
veor X L , X L , X L 2
veor X M , X M , X M 2
vmull. p64 X H 2 , H H _ H , T 3 _ L / / a1 * b1
veor T 3 _ L , T 3 _ L , T 3 _ H
vmull. p64 X L 2 , H H _ L , T 3 _ H / / a0 * b0
vmull. p64 X M 2 , S H A S H 2 _ H , T 3 _ L / / ( a1 + a0 ) ( b1 + b0 )
veor X H , X H , X H 2
veor X L , X L , X L 2
veor X M , X M , X M 2
vmull. p64 X H 2 , S H A S H _ H , T 1 _ L / / a1 * b1
veor T 1 _ L , T 1 _ L , T 1 _ H
vmull. p64 X L 2 , S H A S H _ L , T 1 _ H / / a0 * b0
vmull. p64 X M 2 , S H A S H 2 _ p64 , T 1 _ L / / ( a1 + a0 ) ( b1 + b0 )
veor X H , X H , X H 2
veor X L , X L , X L 2
veor X M , X M , X M 2
2015-03-10 09:47:48 +01:00
2018-08-23 15:48:51 +01:00
beq 4 f
vld1 . 8 { X L 2 - X M 2 } , [ r2 ] !
veor T 1 , X L , X H
veor X M , X M , T 1
_ _ pmull_ r e d u c e _ p64
veor T 1 , T 1 , X H
veor X L , X L , T 1
b 1 b
.endif
2 : vld1 . 6 4 { T 1 } , [ r2 ] !
2015-03-10 09:47:48 +01:00
subs r0 , r0 , #1
2018-08-23 15:48:51 +01:00
3 : /* multiply XL by SHASH in GF(2^128) */
2015-03-10 09:47:48 +01:00
# ifndef C O N F I G _ C P U _ B I G _ E N D I A N
vrev6 4 . 8 T 1 , T 1
# endif
vext. 8 I N 1 , T 1 , T 1 , #8
2017-07-24 11:28:17 +01:00
veor T 1 _ L , T 1 _ L , X L _ H
2015-03-10 09:47:48 +01:00
veor X L , X L , I N 1
2017-07-24 11:28:17 +01:00
_ _ pmull_ \ p n X H , X L _ H , S H A S H _ H , s1 h , s2 h , s3 h , s4 h @ a1 * b1
2015-03-10 09:47:48 +01:00
veor T 1 , T 1 , X L
2017-07-24 11:28:17 +01:00
_ _ pmull_ \ p n X L , X L _ L , S H A S H _ L , s1 l , s2 l , s3 l , s4 l @ a0 * b0
_ _ pmull_ \ p n X M , T 1 _ L , S H A S H 2 _ \ p n @ (a1+a0)(b1+b0)
2015-03-10 09:47:48 +01:00
2018-08-23 15:48:51 +01:00
4 : veor T 1 , X L , X H
2015-03-10 09:47:48 +01:00
veor X M , X M , T 1
2017-07-24 11:28:17 +01:00
_ _ pmull_ r e d u c e _ \ p n
2015-03-10 09:47:48 +01:00
2017-07-24 11:28:17 +01:00
veor T 1 , T 1 , X H
veor X L , X L , T 1
2015-03-10 09:47:48 +01:00
bne 0 b
vst1 . 6 4 { X L } , [ r1 ]
bx l r
2017-07-24 11:28:17 +01:00
.endm
/ *
* void p m u l l _ g h a s h _ u p d a t e ( i n t b l o c k s , u 6 4 d g [ ] , c o n s t c h a r * s r c ,
* struct g h a s h _ k e y c o n s t * k , c o n s t c h a r * h e a d )
* /
ENTRY( p m u l l _ g h a s h _ u p d a t e _ p64 )
2018-08-23 15:48:51 +01:00
vld1 . 6 4 { S H A S H } , [ r3 ] !
vld1 . 6 4 { H H } , [ r3 ] !
vld1 . 6 4 { H H 3 - H H 4 } , [ r3 ]
2017-07-24 11:28:17 +01:00
veor S H A S H 2 _ p64 , S H A S H _ L , S H A S H _ H
2018-08-23 15:48:51 +01:00
veor S H A S H 2 _ H , H H _ L , H H _ H
veor H H 3 4 _ L , H H 3 _ L , H H 3 _ H
veor H H 3 4 _ H , H H 4 _ L , H H 4 _ H
2017-07-24 11:28:17 +01:00
vmov. i 8 M A S K , #0xe1
vshl. u 6 4 M A S K , M A S K , #57
ghash_ u p d a t e p64
ENDPROC( p m u l l _ g h a s h _ u p d a t e _ p64 )
ENTRY( p m u l l _ g h a s h _ u p d a t e _ p8 )
vld1 . 6 4 { S H A S H } , [ r3 ]
veor S H A S H 2 _ p8 , S H A S H _ L , S H A S H _ H
vext. 8 s1 l , S H A S H _ L , S H A S H _ L , #1
vext. 8 s2 l , S H A S H _ L , S H A S H _ L , #2
vext. 8 s3 l , S H A S H _ L , S H A S H _ L , #3
vext. 8 s4 l , S H A S H _ L , S H A S H _ L , #4
vext. 8 s1 h , S H A S H _ H , S H A S H _ H , #1
vext. 8 s2 h , S H A S H _ H , S H A S H _ H , #2
vext. 8 s3 h , S H A S H _ H , S H A S H _ H , #3
vext. 8 s4 h , S H A S H _ H , S H A S H _ H , #4
vmov. i 6 4 k 1 6 , #0xffff
vmov. i 6 4 k 3 2 , #0xffffffff
vmov. i 6 4 k 4 8 , #0xffffffff f f f f
ghash_ u p d a t e p8
ENDPROC( p m u l l _ g h a s h _ u p d a t e _ p8 )