2014-03-26 23:53:05 +04:00
/ *
* Accelerated G H A S H i m p l e m e n t a t i o n w i t h A R M v8 P M U L L i n s t r u c t i o n s .
*
2018-07-31 00:06:42 +03:00
* Copyright ( C ) 2 0 1 4 - 2 0 1 8 L i n a r o L t d . < a r d . b i e s h e u v e l @linaro.org>
2014-03-26 23:53:05 +04:00
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify it
* under t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 a s p u b l i s h e d
* by t h e F r e e S o f t w a r e F o u n d a t i o n .
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
2017-07-24 13:28:18 +03:00
SHASH . r e q v0
SHASH2 . r e q v1
T1 . r e q v2
T2 . r e q v3
MASK . r e q v4
XL . r e q v5
XM . r e q v6
XH . r e q v7
IN1 . r e q v7
k0 0 _ 1 6 . r e q v8
k3 2 _ 4 8 . r e q v9
t3 . r e q v10
t4 . r e q v11
t5 . r e q v12
t6 . r e q v13
t7 . r e q v14
t8 . r e q v15
t9 . r e q v16
perm1 . r e q v17
perm2 . r e q v18
perm3 . r e q v19
sh1 . r e q v20
sh2 . r e q v21
sh3 . r e q v22
sh4 . r e q v23
ss1 . r e q v24
ss2 . r e q v25
ss3 . r e q v26
ss4 . r e q v27
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:25 +03:00
XL2 . r e q v8
XM2 . r e q v9
XH2 . r e q v10
XL3 . r e q v11
XM3 . r e q v12
XH3 . r e q v13
TT3 . r e q v14
TT4 . r e q v15
HH . r e q v16
HH3 . r e q v17
HH4 . r e q v18
HH3 4 . r e q v19
2014-03-26 23:53:05 +04:00
.text
.arch armv8 - a + c r y p t o
2017-07-24 13:28:18 +03:00
.macro _ _ pmull_ p64 , r d , r n , r m
pmull \ r d \ ( ) . 1 q , \ r n \ ( ) . 1 d , \ r m \ ( ) . 1 d
.endm
.macro _ _ pmull2 _ p64 , r d , r n , r m
pmull2 \ r d \ ( ) . 1 q , \ r n \ ( ) . 2 d , \ r m \ ( ) . 2 d
.endm
.macro _ _ pmull_ p8 , r q , a d , b d
ext t 3 . 8 b , \ a d \ ( ) . 8 b , \ a d \ ( ) . 8 b , #1 / / A 1
ext t 5 . 8 b , \ a d \ ( ) . 8 b , \ a d \ ( ) . 8 b , #2 / / A 2
ext t 7 . 8 b , \ a d \ ( ) . 8 b , \ a d \ ( ) . 8 b , #3 / / A 3
_ _ pmull_ p8 _ \ b d \ r q , \ a d
.endm
.macro _ _ pmull2 _ p8 , r q , a d , b d
tbl t 3 . 1 6 b , { \ a d \ ( ) . 1 6 b } , p e r m 1 . 1 6 b / / A 1
tbl t 5 . 1 6 b , { \ a d \ ( ) . 1 6 b } , p e r m 2 . 1 6 b / / A 2
tbl t 7 . 1 6 b , { \ a d \ ( ) . 1 6 b } , p e r m 3 . 1 6 b / / A 3
_ _ pmull2 _ p8 _ \ b d \ r q , \ a d
.endm
.macro _ _ pmull_ p8 _ S H A S H , r q , a d
_ _ pmull_ p8 _ t a i l \ r q , \ a d \ ( ) . 8 b , S H A S H . 8 b , 8 b ,, s h1 , s h2 , s h3 , s h4
.endm
.macro _ _ pmull_ p8 _ S H A S H 2 , r q , a d
_ _ pmull_ p8 _ t a i l \ r q , \ a d \ ( ) . 8 b , S H A S H 2 . 8 b , 8 b ,, s s1 , s s2 , s s3 , s s4
.endm
.macro _ _ pmull2 _ p8 _ S H A S H , r q , a d
_ _ pmull_ p8 _ t a i l \ r q , \ a d \ ( ) . 1 6 b , S H A S H . 1 6 b , 1 6 b , 2 , s h1 , s h2 , s h3 , s h4
.endm
.macro _ _ pmull_ p8 _ t a i l , r q , a d , b d , n b , t , b1 , b2 , b3 , b4
pmull\ t t 3 . 8 h , t 3 . \ n b , \ b d / / F = A 1 * B
pmull\ t t 4 . 8 h , \ a d , \ b1 \ ( ) . \ n b / / E = A * B 1
pmull\ t t 5 . 8 h , t 5 . \ n b , \ b d / / H = A 2 * B
pmull\ t t 6 . 8 h , \ a d , \ b2 \ ( ) . \ n b / / G = A * B 2
pmull\ t t 7 . 8 h , t 7 . \ n b , \ b d / / J = A 3 * B
pmull\ t t 8 . 8 h , \ a d , \ b3 \ ( ) . \ n b / / I = A * B 3
pmull\ t t 9 . 8 h , \ a d , \ b4 \ ( ) . \ n b / / K = A * B 4
pmull\ t \ r q \ ( ) . 8 h , \ a d , \ b d / / D = A * B
eor t 3 . 1 6 b , t 3 . 1 6 b , t 4 . 1 6 b / / L = E + F
eor t 5 . 1 6 b , t 5 . 1 6 b , t 6 . 1 6 b / / M = G + H
eor t 7 . 1 6 b , t 7 . 1 6 b , t 8 . 1 6 b / / N = I + J
uzp1 t 4 . 2 d , t 3 . 2 d , t 5 . 2 d
uzp2 t 3 . 2 d , t 3 . 2 d , t 5 . 2 d
uzp1 t 6 . 2 d , t 7 . 2 d , t 9 . 2 d
uzp2 t 7 . 2 d , t 7 . 2 d , t 9 . 2 d
/ / t3 = ( L ) ( P 0 + P 1 ) < < 8
/ / t5 = ( M ) ( P 2 + P 3 ) < < 1 6
eor t 4 . 1 6 b , t 4 . 1 6 b , t 3 . 1 6 b
and t 3 . 1 6 b , t 3 . 1 6 b , k 3 2 _ 4 8 . 1 6 b
/ / t7 = ( N ) ( P 4 + P 5 ) < < 2 4
/ / t9 = ( K ) ( P 6 + P 7 ) < < 3 2
eor t 6 . 1 6 b , t 6 . 1 6 b , t 7 . 1 6 b
and t 7 . 1 6 b , t 7 . 1 6 b , k 0 0 _ 1 6 . 1 6 b
eor t 4 . 1 6 b , t 4 . 1 6 b , t 3 . 1 6 b
eor t 6 . 1 6 b , t 6 . 1 6 b , t 7 . 1 6 b
zip2 t 5 . 2 d , t 4 . 2 d , t 3 . 2 d
zip1 t 3 . 2 d , t 4 . 2 d , t 3 . 2 d
zip2 t 9 . 2 d , t 6 . 2 d , t 7 . 2 d
zip1 t 7 . 2 d , t 6 . 2 d , t 7 . 2 d
ext t 3 . 1 6 b , t 3 . 1 6 b , t 3 . 1 6 b , #15
ext t 5 . 1 6 b , t 5 . 1 6 b , t 5 . 1 6 b , #14
ext t 7 . 1 6 b , t 7 . 1 6 b , t 7 . 1 6 b , #13
ext t 9 . 1 6 b , t 9 . 1 6 b , t 9 . 1 6 b , #12
eor t 3 . 1 6 b , t 3 . 1 6 b , t 5 . 1 6 b
eor t 7 . 1 6 b , t 7 . 1 6 b , t 9 . 1 6 b
eor \ r q \ ( ) . 1 6 b , \ r q \ ( ) . 1 6 b , t 3 . 1 6 b
eor \ r q \ ( ) . 1 6 b , \ r q \ ( ) . 1 6 b , t 7 . 1 6 b
.endm
.macro __pmull_pre_p64
2018-08-04 21:46:25 +03:00
add x8 , x3 , #16
ld1 { H H . 2 d - H H 4 . 2 d } , [ x8 ]
trn1 S H A S H 2 . 2 d , S H A S H . 2 d , H H . 2 d
trn2 T 1 . 2 d , S H A S H . 2 d , H H . 2 d
eor S H A S H 2 . 1 6 b , S H A S H 2 . 1 6 b , T 1 . 1 6 b
trn1 H H 3 4 . 2 d , H H 3 . 2 d , H H 4 . 2 d
trn2 T 1 . 2 d , H H 3 . 2 d , H H 4 . 2 d
eor H H 3 4 . 1 6 b , H H 3 4 . 1 6 b , T 1 . 1 6 b
2017-07-24 13:28:18 +03:00
movi M A S K . 1 6 b , #0xe1
shl M A S K . 2 d , M A S K . 2 d , #57
.endm
.macro __pmull_pre_p8
2018-08-04 21:46:25 +03:00
ext S H A S H 2 . 1 6 b , S H A S H . 1 6 b , S H A S H . 1 6 b , #8
eor S H A S H 2 . 1 6 b , S H A S H 2 . 1 6 b , S H A S H . 1 6 b
2017-07-24 13:28:18 +03:00
/ / k0 0 _ 1 6 : = 0 x00 0 0 0 0 0 0 0 0 0 0 0 0 0 0 _ 0 0 0 0 0 0 0 0 0 0 0 0 f f f f
/ / k3 2 _ 4 8 : = 0 x00 0 0 0 0 0 0 f f f f f f f f _ 0 0 0 0 f f f f f f f f f f f f
movi k 3 2 _ 4 8 . 2 d , #0xffffffff
mov k 3 2 _ 4 8 . h [ 2 ] , k 3 2 _ 4 8 . h [ 0 ]
ushr k 0 0 _ 1 6 . 2 d , k 3 2 _ 4 8 . 2 d , #32
/ / prepare t h e p e r m u t a t i o n v e c t o r s
mov_ q x5 , 0 x08 0 f0 e 0 d0 c0 b0 a09
movi T 1 . 8 b , #8
dup p e r m 1 . 2 d , x5
eor p e r m 1 . 1 6 b , p e r m 1 . 1 6 b , T 1 . 1 6 b
ushr p e r m 2 . 2 d , p e r m 1 . 2 d , #8
ushr p e r m 3 . 2 d , p e r m 1 . 2 d , #16
ushr T 1 . 2 d , p e r m 1 . 2 d , #24
sli p e r m 2 . 2 d , p e r m 1 . 2 d , #56
sli p e r m 3 . 2 d , p e r m 1 . 2 d , #48
sli T 1 . 2 d , p e r m 1 . 2 d , #40
/ / precompute l o o p i n v a r i a n t s
tbl s h1 . 1 6 b , { S H A S H . 1 6 b } , p e r m 1 . 1 6 b
tbl s h2 . 1 6 b , { S H A S H . 1 6 b } , p e r m 2 . 1 6 b
tbl s h3 . 1 6 b , { S H A S H . 1 6 b } , p e r m 3 . 1 6 b
tbl s h4 . 1 6 b , { S H A S H . 1 6 b } , T 1 . 1 6 b
ext s s1 . 8 b , S H A S H 2 . 8 b , S H A S H 2 . 8 b , #1
ext s s2 . 8 b , S H A S H 2 . 8 b , S H A S H 2 . 8 b , #2
ext s s3 . 8 b , S H A S H 2 . 8 b , S H A S H 2 . 8 b , #3
ext s s4 . 8 b , S H A S H 2 . 8 b , S H A S H 2 . 8 b , #4
.endm
/ /
/ / PMULL ( 6 4 x64 - > 1 2 8 ) b a s e d r e d u c t i o n f o r C P U s t h a t c a n d o
/ / it i n a s i n g l e i n s t r u c t i o n .
/ /
.macro __pmull_reduce_p64
pmull T 2 . 1 q , X L . 1 d , M A S K . 1 d
eor X M . 1 6 b , X M . 1 6 b , T 1 . 1 6 b
mov X H . d [ 0 ] , X M . d [ 1 ]
mov X M . d [ 1 ] , X L . d [ 0 ]
eor X L . 1 6 b , X M . 1 6 b , T 2 . 1 6 b
ext T 2 . 1 6 b , X L . 1 6 b , X L . 1 6 b , #8
pmull X L . 1 q , X L . 1 d , M A S K . 1 d
.endm
/ /
/ / Alternative r e d u c t i o n f o r C P U s t h a t l a c k s u p p o r t f o r t h e
/ / 6 4 x6 4 - > 1 2 8 P M U L L i n s t r u c t i o n
/ /
.macro __pmull_reduce_p8
eor X M . 1 6 b , X M . 1 6 b , T 1 . 1 6 b
mov X L . d [ 1 ] , X M . d [ 0 ]
mov X H . d [ 0 ] , X M . d [ 1 ]
shl T 1 . 2 d , X L . 2 d , #57
shl T 2 . 2 d , X L . 2 d , #62
eor T 2 . 1 6 b , T 2 . 1 6 b , T 1 . 1 6 b
shl T 1 . 2 d , X L . 2 d , #63
eor T 2 . 1 6 b , T 2 . 1 6 b , T 1 . 1 6 b
ext T 1 . 1 6 b , X L . 1 6 b , X H . 1 6 b , #8
eor T 2 . 1 6 b , T 2 . 1 6 b , T 1 . 1 6 b
mov X L . d [ 1 ] , T 2 . d [ 0 ]
mov X H . d [ 0 ] , T 2 . d [ 1 ]
ushr T 2 . 2 d , X L . 2 d , #1
eor X H . 1 6 b , X H . 1 6 b , X L . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , T 2 . 1 6 b
ushr T 2 . 2 d , T 2 . 2 d , #6
ushr X L . 2 d , X L . 2 d , #1
.endm
.macro _ _ pmull_ g h a s h , p n
2018-08-04 21:46:24 +03:00
ld1 { S H A S H . 2 d } , [ x3 ]
ld1 { X L . 2 d } , [ x1 ]
2014-03-26 23:53:05 +04:00
2017-07-24 13:28:18 +03:00
_ _ pmull_ p r e _ \ p n
2014-03-26 23:53:05 +04:00
/* do the head block first, if supplied */
2018-08-04 21:46:24 +03:00
cbz x4 , 0 f
ld1 { T 1 . 2 d } , [ x4 ]
mov x4 , x z r
2018-08-04 21:46:25 +03:00
b 3 f
0 : .ifc \ p n , p 64
tbnz w0 , #0 , 2 f / / s k i p u n t i l #b l o c k s i s a
tbnz w0 , #1 , 2 f / / r o u n d m u l t i p l e o f 4
1 : ld1 { X M 3 . 1 6 b - T T 4 . 1 6 b } , [ x2 ] , #64
sub w0 , w0 , #4
rev6 4 T 1 . 1 6 b , X M 3 . 1 6 b
rev6 4 T 2 . 1 6 b , X H 3 . 1 6 b
rev6 4 T T 4 . 1 6 b , T T 4 . 1 6 b
rev6 4 T T 3 . 1 6 b , T T 3 . 1 6 b
ext I N 1 . 1 6 b , T T 4 . 1 6 b , T T 4 . 1 6 b , #8
ext X L 3 . 1 6 b , T T 3 . 1 6 b , T T 3 . 1 6 b , #8
eor T T 4 . 1 6 b , T T 4 . 1 6 b , I N 1 . 1 6 b
pmull2 X H 2 . 1 q , S H A S H . 2 d , I N 1 . 2 d / / a1 * b1
pmull X L 2 . 1 q , S H A S H . 1 d , I N 1 . 1 d / / a0 * b0
pmull X M 2 . 1 q , S H A S H 2 . 1 d , T T 4 . 1 d / / ( a1 + a0 ) ( b1 + b0 )
eor T T 3 . 1 6 b , T T 3 . 1 6 b , X L 3 . 1 6 b
pmull2 X H 3 . 1 q , H H . 2 d , X L 3 . 2 d / / a1 * b1
pmull X L 3 . 1 q , H H . 1 d , X L 3 . 1 d / / a0 * b0
pmull2 X M 3 . 1 q , S H A S H 2 . 2 d , T T 3 . 2 d / / ( a1 + a0 ) ( b1 + b0 )
ext I N 1 . 1 6 b , T 2 . 1 6 b , T 2 . 1 6 b , #8
eor X L 2 . 1 6 b , X L 2 . 1 6 b , X L 3 . 1 6 b
eor X H 2 . 1 6 b , X H 2 . 1 6 b , X H 3 . 1 6 b
eor X M 2 . 1 6 b , X M 2 . 1 6 b , X M 3 . 1 6 b
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:25 +03:00
eor T 2 . 1 6 b , T 2 . 1 6 b , I N 1 . 1 6 b
pmull2 X H 3 . 1 q , H H 3 . 2 d , I N 1 . 2 d / / a1 * b1
pmull X L 3 . 1 q , H H 3 . 1 d , I N 1 . 1 d / / a0 * b0
pmull X M 3 . 1 q , H H 3 4 . 1 d , T 2 . 1 d / / ( a1 + a0 ) ( b1 + b0 )
eor X L 2 . 1 6 b , X L 2 . 1 6 b , X L 3 . 1 6 b
eor X H 2 . 1 6 b , X H 2 . 1 6 b , X H 3 . 1 6 b
eor X M 2 . 1 6 b , X M 2 . 1 6 b , X M 3 . 1 6 b
ext I N 1 . 1 6 b , T 1 . 1 6 b , T 1 . 1 6 b , #8
ext T T 3 . 1 6 b , X L . 1 6 b , X L . 1 6 b , #8
eor X L . 1 6 b , X L . 1 6 b , I N 1 . 1 6 b
eor T 1 . 1 6 b , T 1 . 1 6 b , T T 3 . 1 6 b
pmull2 X H . 1 q , H H 4 . 2 d , X L . 2 d / / a1 * b1
eor T 1 . 1 6 b , T 1 . 1 6 b , X L . 1 6 b
pmull X L . 1 q , H H 4 . 1 d , X L . 1 d / / a0 * b0
pmull2 X M . 1 q , H H 3 4 . 2 d , T 1 . 2 d / / ( a1 + a0 ) ( b1 + b0 )
eor X L . 1 6 b , X L . 1 6 b , X L 2 . 1 6 b
eor X H . 1 6 b , X H . 1 6 b , X H 2 . 1 6 b
eor X M . 1 6 b , X M . 1 6 b , X M 2 . 1 6 b
eor T 2 . 1 6 b , X L . 1 6 b , X H . 1 6 b
ext T 1 . 1 6 b , X L . 1 6 b , X H . 1 6 b , #8
eor X M . 1 6 b , X M . 1 6 b , T 2 . 1 6 b
_ _ pmull_ r e d u c e _ p64
eor T 2 . 1 6 b , T 2 . 1 6 b , X H . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , T 2 . 1 6 b
cbz w0 , 5 f
b 1 b
.endif
2 : ld1 { T 1 . 2 d } , [ x2 ] , #16
2018-08-04 21:46:24 +03:00
sub w0 , w0 , #1
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:25 +03:00
3 : /* multiply XL by SHASH in GF(2^128) */
2014-06-16 14:02:16 +04:00
CPU_ L E ( r e v64 T 1 . 1 6 b , T 1 . 1 6 b )
2014-03-26 23:53:05 +04:00
2014-06-16 14:02:16 +04:00
ext T 2 . 1 6 b , X L . 1 6 b , X L . 1 6 b , #8
ext I N 1 . 1 6 b , T 1 . 1 6 b , T 1 . 1 6 b , #8
eor T 1 . 1 6 b , T 1 . 1 6 b , T 2 . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , I N 1 . 1 6 b
2014-03-26 23:53:05 +04:00
2017-07-24 13:28:18 +03:00
_ _ pmull2 _ \ p n X H , X L , S H A S H / / a1 * b1
2014-06-16 14:02:16 +04:00
eor T 1 . 1 6 b , T 1 . 1 6 b , X L . 1 6 b
2017-07-24 13:28:18 +03:00
_ _ pmull_ \ p n X L , X L , S H A S H / / a0 * b0
_ _ pmull_ \ p n X M , T 1 , S H A S H 2 / / ( a1 + a0 ) ( b1 + b0 )
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:25 +03:00
4 : eor T 2 . 1 6 b , X L . 1 6 b , X H . 1 6 b
2017-07-24 13:28:18 +03:00
ext T 1 . 1 6 b , X L . 1 6 b , X H . 1 6 b , #8
2014-06-16 14:02:16 +04:00
eor X M . 1 6 b , X M . 1 6 b , T 2 . 1 6 b
2014-03-26 23:53:05 +04:00
2017-07-24 13:28:18 +03:00
_ _ pmull_ r e d u c e _ \ p n
2014-06-16 14:02:16 +04:00
eor T 2 . 1 6 b , T 2 . 1 6 b , X H . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , T 2 . 1 6 b
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:24 +03:00
cbnz w0 , 0 b
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:25 +03:00
5 : st1 { X L . 2 d } , [ x1 ]
2014-03-26 23:53:05 +04:00
ret
2017-07-24 13:28:18 +03:00
.endm
/ *
* void p m u l l _ g h a s h _ u p d a t e ( i n t b l o c k s , u 6 4 d g [ ] , c o n s t c h a r * s r c ,
* struct g h a s h _ k e y c o n s t * k , c o n s t c h a r * h e a d )
* /
ENTRY( p m u l l _ g h a s h _ u p d a t e _ p64 )
_ _ pmull_ g h a s h p64
ENDPROC( p m u l l _ g h a s h _ u p d a t e _ p64 )
ENTRY( p m u l l _ g h a s h _ u p d a t e _ p8 )
_ _ pmull_ g h a s h p8
ENDPROC( p m u l l _ g h a s h _ u p d a t e _ p8 )
2017-07-24 13:28:16 +03:00
2018-08-04 21:46:25 +03:00
KS0 . r e q v12
KS1 . r e q v13
INP0 . r e q v14
INP1 . r e q v15
2017-07-24 13:28:16 +03:00
.macro load_ r o u n d _ k e y s , r o u n d s , r k
cmp \ r o u n d s , #12
blo 2 2 2 2 f / * 1 2 8 b i t s * /
beq 1 1 1 1 f / * 1 9 2 b i t s * /
ld1 { v17 . 4 s - v18 . 4 s } , [ \ r k ] , #32
1111 : ld1 { v19 . 4 s - v20 . 4 s } , [ \ r k ] , #32
2222 : ld1 { v21 . 4 s - v24 . 4 s } , [ \ r k ] , #64
ld1 { v25 . 4 s - v28 . 4 s } , [ \ r k ] , #64
ld1 { v29 . 4 s - v31 . 4 s } , [ \ r k ]
.endm
.macro enc_ r o u n d , s t a t e , k e y
aese \ s t a t e \ ( ) . 1 6 b , \ k e y \ ( ) . 1 6 b
aesmc \ s t a t e \ ( ) . 1 6 b , \ s t a t e \ ( ) . 1 6 b
.endm
.macro enc_ b l o c k , s t a t e , r o u n d s
cmp \ r o u n d s , #12
b. l o 2 2 2 2 f / * 1 2 8 b i t s * /
b. e q 1 1 1 1 f / * 1 9 2 b i t s * /
enc_ r o u n d \ s t a t e , v17
enc_ r o u n d \ s t a t e , v18
1111 : enc_ r o u n d \ s t a t e , v19
enc_ r o u n d \ s t a t e , v20
2222 : .irp k e y , v 21 , v2 2 , v23 , v24 , v25 , v26 , v27 , v28 , v29
enc_ r o u n d \ s t a t e , \ k e y
.endr
aese \ s t a t e \ ( ) . 1 6 b , v30 . 1 6 b
eor \ s t a t e \ ( ) . 1 6 b , \ s t a t e \ ( ) . 1 6 b , v31 . 1 6 b
.endm
.macro pmull_ g c m _ d o _ c r y p t , e n c
2018-08-04 21:46:25 +03:00
ld1 { S H A S H . 2 d } , [ x4 ] , #16
ld1 { H H . 2 d } , [ x4 ]
2018-07-29 17:52:30 +03:00
ld1 { X L . 2 d } , [ x1 ]
ldr x8 , [ x5 , #8 ] / / l o a d l o w e r c o u n t e r
2018-04-30 19:18:26 +03:00
2017-07-24 13:28:16 +03:00
movi M A S K . 1 6 b , #0xe1
2018-07-31 00:06:41 +03:00
trn1 S H A S H 2 . 2 d , S H A S H . 2 d , H H . 2 d
trn2 T 1 . 2 d , S H A S H . 2 d , H H . 2 d
2018-07-29 17:52:30 +03:00
CPU_ L E ( r e v x8 , x8 )
2017-07-24 13:28:16 +03:00
shl M A S K . 2 d , M A S K . 2 d , #57
2018-07-31 00:06:41 +03:00
eor S H A S H 2 . 1 6 b , S H A S H 2 . 1 6 b , T 1 . 1 6 b
2017-07-24 13:28:16 +03:00
.if \ enc = = 1
2018-07-29 17:52:30 +03:00
ldr x10 , [ s p ]
2018-07-31 00:06:40 +03:00
ld1 { K S 0 . 1 6 b - K S 1 . 1 6 b } , [ x10 ]
2017-07-24 13:28:16 +03:00
.endif
2018-07-31 00:06:42 +03:00
cbnz x6 , 4 f
2018-07-31 00:06:40 +03:00
0 : ld1 { I N P 0 . 1 6 b - I N P 1 . 1 6 b } , [ x3 ] , #32
2018-07-29 17:52:30 +03:00
rev x9 , x8
2018-07-31 00:06:40 +03:00
add x11 , x8 , #1
add x8 , x8 , #2
2017-07-24 13:28:16 +03:00
.if \ enc = = 1
2018-07-31 00:06:40 +03:00
eor I N P 0 . 1 6 b , I N P 0 . 1 6 b , K S 0 . 1 6 b / / e n c r y p t i n p u t
eor I N P 1 . 1 6 b , I N P 1 . 1 6 b , K S 1 . 1 6 b
2017-07-24 13:28:16 +03:00
.endif
2018-07-31 00:06:40 +03:00
ld1 { K S 0 . 8 b } , [ x5 ] / / l o a d u p p e r c o u n t e r
rev x11 , x11
sub w0 , w0 , #2
mov K S 1 . 8 b , K S 0 . 8 b
ins K S 0 . d [ 1 ] , x9 / / s e t l o w e r c o u n t e r
ins K S 1 . d [ 1 ] , x11
2018-07-31 00:06:41 +03:00
rev6 4 T 1 . 1 6 b , I N P 1 . 1 6 b
2017-07-24 13:28:16 +03:00
2018-07-29 17:52:30 +03:00
cmp w7 , #12
b. g e 2 f / / A E S - 1 9 2 / 2 5 6 ?
2017-07-24 13:28:16 +03:00
2018-07-31 00:06:40 +03:00
1 : enc_ r o u n d K S 0 , v21
2017-07-24 13:28:16 +03:00
ext I N 1 . 1 6 b , T 1 . 1 6 b , T 1 . 1 6 b , #8
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 1 , v21
2018-07-31 00:06:41 +03:00
pmull2 X H 2 . 1 q , S H A S H . 2 d , I N 1 . 2 d / / a1 * b1
2017-07-24 13:28:16 +03:00
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 0 , v22
2018-07-31 00:06:41 +03:00
eor T 1 . 1 6 b , T 1 . 1 6 b , I N 1 . 1 6 b
2017-07-24 13:28:16 +03:00
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 1 , v22
2018-07-31 00:06:41 +03:00
pmull X L 2 . 1 q , S H A S H . 1 d , I N 1 . 1 d / / a0 * b0
2017-07-24 13:28:16 +03:00
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 0 , v23
2018-07-31 00:06:41 +03:00
pmull X M 2 . 1 q , S H A S H 2 . 1 d , T 1 . 1 d / / ( a1 + a0 ) ( b1 + b0 )
2017-07-24 13:28:16 +03:00
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 1 , v23
2018-07-31 00:06:41 +03:00
rev6 4 T 1 . 1 6 b , I N P 0 . 1 6 b
ext T 2 . 1 6 b , X L . 1 6 b , X L . 1 6 b , #8
2017-07-24 13:28:16 +03:00
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 0 , v24
2018-07-31 00:06:41 +03:00
ext I N 1 . 1 6 b , T 1 . 1 6 b , T 1 . 1 6 b , #8
eor T 1 . 1 6 b , T 1 . 1 6 b , T 2 . 1 6 b
2017-07-24 13:28:16 +03:00
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 1 , v24
2018-07-31 00:06:41 +03:00
eor X L . 1 6 b , X L . 1 6 b , I N 1 . 1 6 b
2017-07-24 13:28:16 +03:00
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 0 , v25
2018-07-31 00:06:41 +03:00
eor T 1 . 1 6 b , T 1 . 1 6 b , X L . 1 6 b
2017-07-24 13:28:16 +03:00
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 1 , v25
2018-07-31 00:06:41 +03:00
pmull2 X H . 1 q , H H . 2 d , X L . 2 d / / a1 * b1
2017-07-24 13:28:16 +03:00
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 0 , v26
2018-07-31 00:06:41 +03:00
pmull X L . 1 q , H H . 1 d , X L . 1 d / / a0 * b0
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 1 , v26
2018-07-31 00:06:41 +03:00
pmull2 X M . 1 q , S H A S H 2 . 2 d , T 1 . 2 d / / ( a1 + a0 ) ( b1 + b0 )
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 0 , v27
2018-07-31 00:06:41 +03:00
eor X L . 1 6 b , X L . 1 6 b , X L 2 . 1 6 b
eor X H . 1 6 b , X H . 1 6 b , X H 2 . 1 6 b
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 1 , v27
2018-07-31 00:06:41 +03:00
eor X M . 1 6 b , X M . 1 6 b , X M 2 . 1 6 b
ext T 1 . 1 6 b , X L . 1 6 b , X H . 1 6 b , #8
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 0 , v28
eor T 2 . 1 6 b , X L . 1 6 b , X H . 1 6 b
eor X M . 1 6 b , X M . 1 6 b , T 1 . 1 6 b
2018-07-31 00:06:41 +03:00
enc_ r o u n d K S 1 , v28
2018-07-31 00:06:40 +03:00
eor X M . 1 6 b , X M . 1 6 b , T 2 . 1 6 b
2018-07-31 00:06:41 +03:00
enc_ r o u n d K S 0 , v29
2018-07-31 00:06:40 +03:00
pmull T 2 . 1 q , X L . 1 d , M A S K . 1 d
enc_ r o u n d K S 1 , v29
mov X H . d [ 0 ] , X M . d [ 1 ]
mov X M . d [ 1 ] , X L . d [ 0 ]
aese K S 0 . 1 6 b , v30 . 1 6 b
eor X L . 1 6 b , X M . 1 6 b , T 2 . 1 6 b
aese K S 1 . 1 6 b , v30 . 1 6 b
ext T 2 . 1 6 b , X L . 1 6 b , X L . 1 6 b , #8
eor K S 0 . 1 6 b , K S 0 . 1 6 b , v31 . 1 6 b
pmull X L . 1 q , X L . 1 d , M A S K . 1 d
eor T 2 . 1 6 b , T 2 . 1 6 b , X H . 1 6 b
eor K S 1 . 1 6 b , K S 1 . 1 6 b , v31 . 1 6 b
2017-07-24 13:28:16 +03:00
eor X L . 1 6 b , X L . 1 6 b , T 2 . 1 6 b
.if \ enc = = 0
2018-07-31 00:06:40 +03:00
eor I N P 0 . 1 6 b , I N P 0 . 1 6 b , K S 0 . 1 6 b
eor I N P 1 . 1 6 b , I N P 1 . 1 6 b , K S 1 . 1 6 b
2017-07-24 13:28:16 +03:00
.endif
2018-07-31 00:06:40 +03:00
st1 { I N P 0 . 1 6 b - I N P 1 . 1 6 b } , [ x2 ] , #32
2018-07-29 17:52:30 +03:00
cbnz w0 , 0 b
2017-07-24 13:28:16 +03:00
2018-07-29 17:52:30 +03:00
CPU_ L E ( r e v x8 , x8 )
st1 { X L . 2 d } , [ x1 ]
str x8 , [ x5 , #8 ] / / s t o r e l o w e r c o u n t e r
2017-07-24 13:28:16 +03:00
.if \ enc = = 1
2018-07-31 00:06:40 +03:00
st1 { K S 0 . 1 6 b - K S 1 . 1 6 b } , [ x10 ]
2017-07-24 13:28:16 +03:00
.endif
ret
2018-07-29 17:52:30 +03:00
2 : b. e q 3 f / / A E S - 1 9 2 ?
2018-07-31 00:06:40 +03:00
enc_ r o u n d K S 0 , v17
enc_ r o u n d K S 1 , v17
enc_ r o u n d K S 0 , v18
enc_ r o u n d K S 1 , v18
3 : enc_ r o u n d K S 0 , v19
enc_ r o u n d K S 1 , v19
enc_ r o u n d K S 0 , v20
enc_ r o u n d K S 1 , v20
2018-07-29 17:52:30 +03:00
b 1 b
2018-07-31 00:06:42 +03:00
4 : load_ r o u n d _ k e y s w7 , x6
b 0 b
2017-07-24 13:28:16 +03:00
.endm
/ *
* void p m u l l _ g c m _ e n c r y p t ( i n t b l o c k s , u 6 4 d g [ ] , u 8 d s t [ ] , c o n s t u 8 s r c [ ] ,
* struct g h a s h _ k e y c o n s t * k , u 8 c t r [ ] ,
* int r o u n d s , u 8 k s [ ] )
* /
ENTRY( p m u l l _ g c m _ e n c r y p t )
pmull_ g c m _ d o _ c r y p t 1
ENDPROC( p m u l l _ g c m _ e n c r y p t )
/ *
* void p m u l l _ g c m _ d e c r y p t ( i n t b l o c k s , u 6 4 d g [ ] , u 8 d s t [ ] , c o n s t u 8 s r c [ ] ,
* struct g h a s h _ k e y c o n s t * k , u 8 c t r [ ] ,
* int r o u n d s )
* /
ENTRY( p m u l l _ g c m _ d e c r y p t )
pmull_ g c m _ d o _ c r y p t 0
ENDPROC( p m u l l _ g c m _ d e c r y p t )
/ *
* void p m u l l _ g c m _ e n c r y p t _ b l o c k ( u 8 d s t [ ] , u 8 s r c [ ] , u 8 r k [ ] , i n t r o u n d s )
* /
ENTRY( p m u l l _ g c m _ e n c r y p t _ b l o c k )
cbz x2 , 0 f
load_ r o u n d _ k e y s w3 , x2
0 : ld1 { v0 . 1 6 b } , [ x1 ]
enc_ b l o c k v0 , w3
st1 { v0 . 1 6 b } , [ x0 ]
ret
ENDPROC( p m u l l _ g c m _ e n c r y p t _ b l o c k )