2019-06-04 11:11:33 +03:00
/* SPDX-License-Identifier: GPL-2.0-only */
2014-03-26 23:53:05 +04:00
/ *
* Accelerated G H A S H i m p l e m e n t a t i o n w i t h A R M v8 P M U L L i n s t r u c t i o n s .
*
2018-07-31 00:06:42 +03:00
* Copyright ( C ) 2 0 1 4 - 2 0 1 8 L i n a r o L t d . < a r d . b i e s h e u v e l @linaro.org>
2014-03-26 23:53:05 +04:00
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
2017-07-24 13:28:18 +03:00
SHASH . r e q v0
SHASH2 . r e q v1
T1 . r e q v2
T2 . r e q v3
MASK . r e q v4
2019-09-11 02:19:00 +03:00
XM . r e q v5
XL . r e q v6
2017-07-24 13:28:18 +03:00
XH . r e q v7
IN1 . r e q v7
k0 0 _ 1 6 . r e q v8
k3 2 _ 4 8 . r e q v9
t3 . r e q v10
t4 . r e q v11
t5 . r e q v12
t6 . r e q v13
t7 . r e q v14
t8 . r e q v15
t9 . r e q v16
perm1 . r e q v17
perm2 . r e q v18
perm3 . r e q v19
sh1 . r e q v20
sh2 . r e q v21
sh3 . r e q v22
sh4 . r e q v23
ss1 . r e q v24
ss2 . r e q v25
ss3 . r e q v26
ss4 . r e q v27
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:25 +03:00
XL2 . r e q v8
XM2 . r e q v9
XH2 . r e q v10
XL3 . r e q v11
XM3 . r e q v12
XH3 . r e q v13
TT3 . r e q v14
TT4 . r e q v15
HH . r e q v16
HH3 . r e q v17
HH4 . r e q v18
HH3 4 . r e q v19
2014-03-26 23:53:05 +04:00
.text
.arch armv8 - a + c r y p t o
2017-07-24 13:28:18 +03:00
.macro _ _ pmull_ p64 , r d , r n , r m
pmull \ r d \ ( ) . 1 q , \ r n \ ( ) . 1 d , \ r m \ ( ) . 1 d
.endm
.macro _ _ pmull2 _ p64 , r d , r n , r m
pmull2 \ r d \ ( ) . 1 q , \ r n \ ( ) . 2 d , \ r m \ ( ) . 2 d
.endm
.macro _ _ pmull_ p8 , r q , a d , b d
ext t 3 . 8 b , \ a d \ ( ) . 8 b , \ a d \ ( ) . 8 b , #1 / / A 1
ext t 5 . 8 b , \ a d \ ( ) . 8 b , \ a d \ ( ) . 8 b , #2 / / A 2
ext t 7 . 8 b , \ a d \ ( ) . 8 b , \ a d \ ( ) . 8 b , #3 / / A 3
_ _ pmull_ p8 _ \ b d \ r q , \ a d
.endm
.macro _ _ pmull2 _ p8 , r q , a d , b d
tbl t 3 . 1 6 b , { \ a d \ ( ) . 1 6 b } , p e r m 1 . 1 6 b / / A 1
tbl t 5 . 1 6 b , { \ a d \ ( ) . 1 6 b } , p e r m 2 . 1 6 b / / A 2
tbl t 7 . 1 6 b , { \ a d \ ( ) . 1 6 b } , p e r m 3 . 1 6 b / / A 3
_ _ pmull2 _ p8 _ \ b d \ r q , \ a d
.endm
.macro _ _ pmull_ p8 _ S H A S H , r q , a d
_ _ pmull_ p8 _ t a i l \ r q , \ a d \ ( ) . 8 b , S H A S H . 8 b , 8 b ,, s h1 , s h2 , s h3 , s h4
.endm
.macro _ _ pmull_ p8 _ S H A S H 2 , r q , a d
_ _ pmull_ p8 _ t a i l \ r q , \ a d \ ( ) . 8 b , S H A S H 2 . 8 b , 8 b ,, s s1 , s s2 , s s3 , s s4
.endm
.macro _ _ pmull2 _ p8 _ S H A S H , r q , a d
_ _ pmull_ p8 _ t a i l \ r q , \ a d \ ( ) . 1 6 b , S H A S H . 1 6 b , 1 6 b , 2 , s h1 , s h2 , s h3 , s h4
.endm
.macro _ _ pmull_ p8 _ t a i l , r q , a d , b d , n b , t , b1 , b2 , b3 , b4
pmull\ t t 3 . 8 h , t 3 . \ n b , \ b d / / F = A 1 * B
pmull\ t t 4 . 8 h , \ a d , \ b1 \ ( ) . \ n b / / E = A * B 1
pmull\ t t 5 . 8 h , t 5 . \ n b , \ b d / / H = A 2 * B
pmull\ t t 6 . 8 h , \ a d , \ b2 \ ( ) . \ n b / / G = A * B 2
pmull\ t t 7 . 8 h , t 7 . \ n b , \ b d / / J = A 3 * B
pmull\ t t 8 . 8 h , \ a d , \ b3 \ ( ) . \ n b / / I = A * B 3
pmull\ t t 9 . 8 h , \ a d , \ b4 \ ( ) . \ n b / / K = A * B 4
pmull\ t \ r q \ ( ) . 8 h , \ a d , \ b d / / D = A * B
eor t 3 . 1 6 b , t 3 . 1 6 b , t 4 . 1 6 b / / L = E + F
eor t 5 . 1 6 b , t 5 . 1 6 b , t 6 . 1 6 b / / M = G + H
eor t 7 . 1 6 b , t 7 . 1 6 b , t 8 . 1 6 b / / N = I + J
uzp1 t 4 . 2 d , t 3 . 2 d , t 5 . 2 d
uzp2 t 3 . 2 d , t 3 . 2 d , t 5 . 2 d
uzp1 t 6 . 2 d , t 7 . 2 d , t 9 . 2 d
uzp2 t 7 . 2 d , t 7 . 2 d , t 9 . 2 d
/ / t3 = ( L ) ( P 0 + P 1 ) < < 8
/ / t5 = ( M ) ( P 2 + P 3 ) < < 1 6
eor t 4 . 1 6 b , t 4 . 1 6 b , t 3 . 1 6 b
and t 3 . 1 6 b , t 3 . 1 6 b , k 3 2 _ 4 8 . 1 6 b
/ / t7 = ( N ) ( P 4 + P 5 ) < < 2 4
/ / t9 = ( K ) ( P 6 + P 7 ) < < 3 2
eor t 6 . 1 6 b , t 6 . 1 6 b , t 7 . 1 6 b
and t 7 . 1 6 b , t 7 . 1 6 b , k 0 0 _ 1 6 . 1 6 b
eor t 4 . 1 6 b , t 4 . 1 6 b , t 3 . 1 6 b
eor t 6 . 1 6 b , t 6 . 1 6 b , t 7 . 1 6 b
zip2 t 5 . 2 d , t 4 . 2 d , t 3 . 2 d
zip1 t 3 . 2 d , t 4 . 2 d , t 3 . 2 d
zip2 t 9 . 2 d , t 6 . 2 d , t 7 . 2 d
zip1 t 7 . 2 d , t 6 . 2 d , t 7 . 2 d
ext t 3 . 1 6 b , t 3 . 1 6 b , t 3 . 1 6 b , #15
ext t 5 . 1 6 b , t 5 . 1 6 b , t 5 . 1 6 b , #14
ext t 7 . 1 6 b , t 7 . 1 6 b , t 7 . 1 6 b , #13
ext t 9 . 1 6 b , t 9 . 1 6 b , t 9 . 1 6 b , #12
eor t 3 . 1 6 b , t 3 . 1 6 b , t 5 . 1 6 b
eor t 7 . 1 6 b , t 7 . 1 6 b , t 9 . 1 6 b
eor \ r q \ ( ) . 1 6 b , \ r q \ ( ) . 1 6 b , t 3 . 1 6 b
eor \ r q \ ( ) . 1 6 b , \ r q \ ( ) . 1 6 b , t 7 . 1 6 b
.endm
.macro __pmull_pre_p64
2018-08-04 21:46:25 +03:00
add x8 , x3 , #16
ld1 { H H . 2 d - H H 4 . 2 d } , [ x8 ]
trn1 S H A S H 2 . 2 d , S H A S H . 2 d , H H . 2 d
trn2 T 1 . 2 d , S H A S H . 2 d , H H . 2 d
eor S H A S H 2 . 1 6 b , S H A S H 2 . 1 6 b , T 1 . 1 6 b
trn1 H H 3 4 . 2 d , H H 3 . 2 d , H H 4 . 2 d
trn2 T 1 . 2 d , H H 3 . 2 d , H H 4 . 2 d
eor H H 3 4 . 1 6 b , H H 3 4 . 1 6 b , T 1 . 1 6 b
2017-07-24 13:28:18 +03:00
movi M A S K . 1 6 b , #0xe1
shl M A S K . 2 d , M A S K . 2 d , #57
.endm
.macro __pmull_pre_p8
2018-08-04 21:46:25 +03:00
ext S H A S H 2 . 1 6 b , S H A S H . 1 6 b , S H A S H . 1 6 b , #8
eor S H A S H 2 . 1 6 b , S H A S H 2 . 1 6 b , S H A S H . 1 6 b
2017-07-24 13:28:18 +03:00
/ / k0 0 _ 1 6 : = 0 x00 0 0 0 0 0 0 0 0 0 0 0 0 0 0 _ 0 0 0 0 0 0 0 0 0 0 0 0 f f f f
/ / k3 2 _ 4 8 : = 0 x00 0 0 0 0 0 0 f f f f f f f f _ 0 0 0 0 f f f f f f f f f f f f
movi k 3 2 _ 4 8 . 2 d , #0xffffffff
mov k 3 2 _ 4 8 . h [ 2 ] , k 3 2 _ 4 8 . h [ 0 ]
ushr k 0 0 _ 1 6 . 2 d , k 3 2 _ 4 8 . 2 d , #32
/ / prepare t h e p e r m u t a t i o n v e c t o r s
mov_ q x5 , 0 x08 0 f0 e 0 d0 c0 b0 a09
movi T 1 . 8 b , #8
dup p e r m 1 . 2 d , x5
eor p e r m 1 . 1 6 b , p e r m 1 . 1 6 b , T 1 . 1 6 b
ushr p e r m 2 . 2 d , p e r m 1 . 2 d , #8
ushr p e r m 3 . 2 d , p e r m 1 . 2 d , #16
ushr T 1 . 2 d , p e r m 1 . 2 d , #24
sli p e r m 2 . 2 d , p e r m 1 . 2 d , #56
sli p e r m 3 . 2 d , p e r m 1 . 2 d , #48
sli T 1 . 2 d , p e r m 1 . 2 d , #40
/ / precompute l o o p i n v a r i a n t s
tbl s h1 . 1 6 b , { S H A S H . 1 6 b } , p e r m 1 . 1 6 b
tbl s h2 . 1 6 b , { S H A S H . 1 6 b } , p e r m 2 . 1 6 b
tbl s h3 . 1 6 b , { S H A S H . 1 6 b } , p e r m 3 . 1 6 b
tbl s h4 . 1 6 b , { S H A S H . 1 6 b } , T 1 . 1 6 b
ext s s1 . 8 b , S H A S H 2 . 8 b , S H A S H 2 . 8 b , #1
ext s s2 . 8 b , S H A S H 2 . 8 b , S H A S H 2 . 8 b , #2
ext s s3 . 8 b , S H A S H 2 . 8 b , S H A S H 2 . 8 b , #3
ext s s4 . 8 b , S H A S H 2 . 8 b , S H A S H 2 . 8 b , #4
.endm
/ /
/ / PMULL ( 6 4 x64 - > 1 2 8 ) b a s e d r e d u c t i o n f o r C P U s t h a t c a n d o
/ / it i n a s i n g l e i n s t r u c t i o n .
/ /
.macro __pmull_reduce_p64
pmull T 2 . 1 q , X L . 1 d , M A S K . 1 d
eor X M . 1 6 b , X M . 1 6 b , T 1 . 1 6 b
mov X H . d [ 0 ] , X M . d [ 1 ]
mov X M . d [ 1 ] , X L . d [ 0 ]
eor X L . 1 6 b , X M . 1 6 b , T 2 . 1 6 b
ext T 2 . 1 6 b , X L . 1 6 b , X L . 1 6 b , #8
pmull X L . 1 q , X L . 1 d , M A S K . 1 d
.endm
/ /
/ / Alternative r e d u c t i o n f o r C P U s t h a t l a c k s u p p o r t f o r t h e
/ / 6 4 x6 4 - > 1 2 8 P M U L L i n s t r u c t i o n
/ /
.macro __pmull_reduce_p8
eor X M . 1 6 b , X M . 1 6 b , T 1 . 1 6 b
mov X L . d [ 1 ] , X M . d [ 0 ]
mov X H . d [ 0 ] , X M . d [ 1 ]
shl T 1 . 2 d , X L . 2 d , #57
shl T 2 . 2 d , X L . 2 d , #62
eor T 2 . 1 6 b , T 2 . 1 6 b , T 1 . 1 6 b
shl T 1 . 2 d , X L . 2 d , #63
eor T 2 . 1 6 b , T 2 . 1 6 b , T 1 . 1 6 b
ext T 1 . 1 6 b , X L . 1 6 b , X H . 1 6 b , #8
eor T 2 . 1 6 b , T 2 . 1 6 b , T 1 . 1 6 b
mov X L . d [ 1 ] , T 2 . d [ 0 ]
mov X H . d [ 0 ] , T 2 . d [ 1 ]
ushr T 2 . 2 d , X L . 2 d , #1
eor X H . 1 6 b , X H . 1 6 b , X L . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , T 2 . 1 6 b
ushr T 2 . 2 d , T 2 . 2 d , #6
ushr X L . 2 d , X L . 2 d , #1
.endm
.macro _ _ pmull_ g h a s h , p n
2018-08-04 21:46:24 +03:00
ld1 { S H A S H . 2 d } , [ x3 ]
ld1 { X L . 2 d } , [ x1 ]
2014-03-26 23:53:05 +04:00
2017-07-24 13:28:18 +03:00
_ _ pmull_ p r e _ \ p n
2014-03-26 23:53:05 +04:00
/* do the head block first, if supplied */
2018-08-04 21:46:24 +03:00
cbz x4 , 0 f
ld1 { T 1 . 2 d } , [ x4 ]
mov x4 , x z r
2018-08-04 21:46:25 +03:00
b 3 f
0 : .ifc \ p n , p 64
tbnz w0 , #0 , 2 f / / s k i p u n t i l #b l o c k s i s a
tbnz w0 , #1 , 2 f / / r o u n d m u l t i p l e o f 4
1 : ld1 { X M 3 . 1 6 b - T T 4 . 1 6 b } , [ x2 ] , #64
sub w0 , w0 , #4
rev6 4 T 1 . 1 6 b , X M 3 . 1 6 b
rev6 4 T 2 . 1 6 b , X H 3 . 1 6 b
rev6 4 T T 4 . 1 6 b , T T 4 . 1 6 b
rev6 4 T T 3 . 1 6 b , T T 3 . 1 6 b
ext I N 1 . 1 6 b , T T 4 . 1 6 b , T T 4 . 1 6 b , #8
ext X L 3 . 1 6 b , T T 3 . 1 6 b , T T 3 . 1 6 b , #8
eor T T 4 . 1 6 b , T T 4 . 1 6 b , I N 1 . 1 6 b
pmull2 X H 2 . 1 q , S H A S H . 2 d , I N 1 . 2 d / / a1 * b1
pmull X L 2 . 1 q , S H A S H . 1 d , I N 1 . 1 d / / a0 * b0
pmull X M 2 . 1 q , S H A S H 2 . 1 d , T T 4 . 1 d / / ( a1 + a0 ) ( b1 + b0 )
eor T T 3 . 1 6 b , T T 3 . 1 6 b , X L 3 . 1 6 b
pmull2 X H 3 . 1 q , H H . 2 d , X L 3 . 2 d / / a1 * b1
pmull X L 3 . 1 q , H H . 1 d , X L 3 . 1 d / / a0 * b0
pmull2 X M 3 . 1 q , S H A S H 2 . 2 d , T T 3 . 2 d / / ( a1 + a0 ) ( b1 + b0 )
ext I N 1 . 1 6 b , T 2 . 1 6 b , T 2 . 1 6 b , #8
eor X L 2 . 1 6 b , X L 2 . 1 6 b , X L 3 . 1 6 b
eor X H 2 . 1 6 b , X H 2 . 1 6 b , X H 3 . 1 6 b
eor X M 2 . 1 6 b , X M 2 . 1 6 b , X M 3 . 1 6 b
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:25 +03:00
eor T 2 . 1 6 b , T 2 . 1 6 b , I N 1 . 1 6 b
pmull2 X H 3 . 1 q , H H 3 . 2 d , I N 1 . 2 d / / a1 * b1
pmull X L 3 . 1 q , H H 3 . 1 d , I N 1 . 1 d / / a0 * b0
pmull X M 3 . 1 q , H H 3 4 . 1 d , T 2 . 1 d / / ( a1 + a0 ) ( b1 + b0 )
eor X L 2 . 1 6 b , X L 2 . 1 6 b , X L 3 . 1 6 b
eor X H 2 . 1 6 b , X H 2 . 1 6 b , X H 3 . 1 6 b
eor X M 2 . 1 6 b , X M 2 . 1 6 b , X M 3 . 1 6 b
ext I N 1 . 1 6 b , T 1 . 1 6 b , T 1 . 1 6 b , #8
ext T T 3 . 1 6 b , X L . 1 6 b , X L . 1 6 b , #8
eor X L . 1 6 b , X L . 1 6 b , I N 1 . 1 6 b
eor T 1 . 1 6 b , T 1 . 1 6 b , T T 3 . 1 6 b
pmull2 X H . 1 q , H H 4 . 2 d , X L . 2 d / / a1 * b1
eor T 1 . 1 6 b , T 1 . 1 6 b , X L . 1 6 b
pmull X L . 1 q , H H 4 . 1 d , X L . 1 d / / a0 * b0
pmull2 X M . 1 q , H H 3 4 . 2 d , T 1 . 2 d / / ( a1 + a0 ) ( b1 + b0 )
eor X L . 1 6 b , X L . 1 6 b , X L 2 . 1 6 b
eor X H . 1 6 b , X H . 1 6 b , X H 2 . 1 6 b
eor X M . 1 6 b , X M . 1 6 b , X M 2 . 1 6 b
eor T 2 . 1 6 b , X L . 1 6 b , X H . 1 6 b
ext T 1 . 1 6 b , X L . 1 6 b , X H . 1 6 b , #8
eor X M . 1 6 b , X M . 1 6 b , T 2 . 1 6 b
_ _ pmull_ r e d u c e _ p64
eor T 2 . 1 6 b , T 2 . 1 6 b , X H . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , T 2 . 1 6 b
cbz w0 , 5 f
b 1 b
.endif
2 : ld1 { T 1 . 2 d } , [ x2 ] , #16
2018-08-04 21:46:24 +03:00
sub w0 , w0 , #1
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:25 +03:00
3 : /* multiply XL by SHASH in GF(2^128) */
2014-06-16 14:02:16 +04:00
CPU_ L E ( r e v64 T 1 . 1 6 b , T 1 . 1 6 b )
2014-03-26 23:53:05 +04:00
2014-06-16 14:02:16 +04:00
ext T 2 . 1 6 b , X L . 1 6 b , X L . 1 6 b , #8
ext I N 1 . 1 6 b , T 1 . 1 6 b , T 1 . 1 6 b , #8
eor T 1 . 1 6 b , T 1 . 1 6 b , T 2 . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , I N 1 . 1 6 b
2014-03-26 23:53:05 +04:00
2017-07-24 13:28:18 +03:00
_ _ pmull2 _ \ p n X H , X L , S H A S H / / a1 * b1
2014-06-16 14:02:16 +04:00
eor T 1 . 1 6 b , T 1 . 1 6 b , X L . 1 6 b
2017-07-24 13:28:18 +03:00
_ _ pmull_ \ p n X L , X L , S H A S H / / a0 * b0
_ _ pmull_ \ p n X M , T 1 , S H A S H 2 / / ( a1 + a0 ) ( b1 + b0 )
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:25 +03:00
4 : eor T 2 . 1 6 b , X L . 1 6 b , X H . 1 6 b
2017-07-24 13:28:18 +03:00
ext T 1 . 1 6 b , X L . 1 6 b , X H . 1 6 b , #8
2014-06-16 14:02:16 +04:00
eor X M . 1 6 b , X M . 1 6 b , T 2 . 1 6 b
2014-03-26 23:53:05 +04:00
2017-07-24 13:28:18 +03:00
_ _ pmull_ r e d u c e _ \ p n
2014-06-16 14:02:16 +04:00
eor T 2 . 1 6 b , T 2 . 1 6 b , X H . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , T 2 . 1 6 b
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:24 +03:00
cbnz w0 , 0 b
2014-03-26 23:53:05 +04:00
2018-08-04 21:46:25 +03:00
5 : st1 { X L . 2 d } , [ x1 ]
2014-03-26 23:53:05 +04:00
ret
2017-07-24 13:28:18 +03:00
.endm
/ *
* void p m u l l _ g h a s h _ u p d a t e ( i n t b l o c k s , u 6 4 d g [ ] , c o n s t c h a r * s r c ,
* struct g h a s h _ k e y c o n s t * k , c o n s t c h a r * h e a d )
* /
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ S T A R T ( p m u l l _ g h a s h _ u p d a t e _ p64 )
2017-07-24 13:28:18 +03:00
_ _ pmull_ g h a s h p64
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ E N D ( p m u l l _ g h a s h _ u p d a t e _ p64 )
2017-07-24 13:28:18 +03:00
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ S T A R T ( p m u l l _ g h a s h _ u p d a t e _ p8 )
2017-07-24 13:28:18 +03:00
_ _ pmull_ g h a s h p8
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ E N D ( p m u l l _ g h a s h _ u p d a t e _ p8 )
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
KS0 . r e q v8
KS1 . r e q v9
KS2 . r e q v10
KS3 . r e q v11
INP0 . r e q v21
INP1 . r e q v22
INP2 . r e q v23
INP3 . r e q v24
K0 . r e q v25
K1 . r e q v26
K2 . r e q v27
K3 . r e q v28
K4 . r e q v12
K5 . r e q v13
K6 . r e q v4
K7 . r e q v5
K8 . r e q v14
K9 . r e q v15
KK . r e q v29
KL . r e q v30
KM . r e q v31
.macro load_ r o u n d _ k e y s , r o u n d s , r k , t m p
add \ t m p , \ r k , #64
ld1 { K 0 . 4 s - K 3 . 4 s } , [ \ r k ]
ld1 { K 4 . 4 s - K 5 . 4 s } , [ \ t m p ]
add \ t m p , \ r k , \ r o u n d s , l s l #4
sub \ t m p , \ t m p , #32
ld1 { K K . 4 s - K M . 4 s } , [ \ t m p ]
2017-07-24 13:28:16 +03:00
.endm
.macro enc_ r o u n d , s t a t e , k e y
aese \ s t a t e \ ( ) . 1 6 b , \ k e y \ ( ) . 1 6 b
aesmc \ s t a t e \ ( ) . 1 6 b , \ s t a t e \ ( ) . 1 6 b
.endm
2019-09-11 02:19:00 +03:00
.macro enc_ q r o u n d , s0 , s1 , s2 , s3 , k e y
enc_ r o u n d \ s0 , \ k e y
enc_ r o u n d \ s1 , \ k e y
enc_ r o u n d \ s2 , \ k e y
enc_ r o u n d \ s3 , \ k e y
.endm
.macro enc_ b l o c k , s t a t e , r o u n d s , r k , t m p
add \ t m p , \ r k , #96
ld1 { K 6 . 4 s - K 7 . 4 s } , [ \ t m p ] , #32
.irp key, K 0 , K 1 , K 2 , K 3 , K 4 K 5
2017-07-24 13:28:16 +03:00
enc_ r o u n d \ s t a t e , \ k e y
.endr
2019-09-11 02:19:00 +03:00
tbnz \ r o u n d s , #2 , . L n o t 1 2 8 _ \ @
.Lout256_ \ @:
enc_ r o u n d \ s t a t e , K 6
enc_ r o u n d \ s t a t e , K 7
.Lout192_ \ @:
enc_ r o u n d \ s t a t e , K K
aese \ s t a t e \ ( ) . 1 6 b , K L . 1 6 b
eor \ s t a t e \ ( ) . 1 6 b , \ s t a t e \ ( ) . 1 6 b , K M . 1 6 b
.subsection 1
.Lnot128_ \ @:
ld1 { K 8 . 4 s - K 9 . 4 s } , [ \ t m p ] , #32
enc_ r o u n d \ s t a t e , K 6
enc_ r o u n d \ s t a t e , K 7
ld1 { K 6 . 4 s - K 7 . 4 s } , [ \ t m p ]
enc_ r o u n d \ s t a t e , K 8
enc_ r o u n d \ s t a t e , K 9
tbz \ r o u n d s , #1 , . L o u t 1 9 2 _ \ @
b . L o u t 2 5 6 _ \ @
.previous
2017-07-24 13:28:16 +03:00
.endm
2019-09-11 02:19:00 +03:00
.align 6
2017-07-24 13:28:16 +03:00
.macro pmull_ g c m _ d o _ c r y p t , e n c
2019-09-11 02:19:00 +03:00
stp x29 , x30 , [ s p , #- 32 ] !
mov x29 , s p
str x19 , [ s p , #24 ]
load_ r o u n d _ k e y s x7 , x6 , x8
ld1 { S H A S H . 2 d } , [ x3 ] , #16
ld1 { H H . 2 d - H H 4 . 2 d } , [ x3 ]
2018-04-30 19:18:26 +03:00
2018-07-31 00:06:41 +03:00
trn1 S H A S H 2 . 2 d , S H A S H . 2 d , H H . 2 d
trn2 T 1 . 2 d , S H A S H . 2 d , H H . 2 d
eor S H A S H 2 . 1 6 b , S H A S H 2 . 1 6 b , T 1 . 1 6 b
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
trn1 H H 3 4 . 2 d , H H 3 . 2 d , H H 4 . 2 d
trn2 T 1 . 2 d , H H 3 . 2 d , H H 4 . 2 d
eor H H 3 4 . 1 6 b , H H 3 4 . 1 6 b , T 1 . 1 6 b
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
ld1 { X L . 2 d } , [ x4 ]
2018-07-31 00:06:42 +03:00
2019-09-11 02:19:00 +03:00
cbz x0 , 3 f / / t a g o n l y ?
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
ldr w8 , [ x5 , #12 ] / / l o a d l o w e r c o u n t e r
CPU_ L E ( r e v w8 , w8 )
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
0 : mov w9 , #4 / / m a x b l o c k s p e r r o u n d
add x10 , x0 , #0xf
lsr x10 , x10 , #4 / / r e m a i n i n g b l o c k s
subs x0 , x0 , #64
csel w9 , w10 , w9 , m i
add w8 , w8 , w9
bmi 1 f
ld1 { I N P 0 . 1 6 b - I N P 3 . 1 6 b } , [ x2 ] , #64
.subsection 1
/ *
* Populate t h e f o u r i n p u t r e g i s t e r s r i g h t t o l e f t w i t h u p t o 6 3 b y t e s
* of d a t a , u s i n g o v e r l a p p i n g l o a d s t o a v o i d b r a n c h e s .
*
* INP0 I N P 1 I N P 2 I N P 3
* 1 byte | | | | x |
* 1 6 bytes | | | | x x x x x x x x |
* 1 7 bytes | | | x x x x x x x x | x |
* 4 7 bytes | | x x x x x x x x | x x x x x x x x | x x x x x x x |
* etc e t c
*
* Note t h a t t h i s c o d e m a y r e a d u p t o 1 5 b y t e s b e f o r e t h e s t a r t o f
* the i n p u t . I t i s u p t o t h e c a l l i n g c o d e t o e n s u r e t h i s i s s a f e i f
* this h a p p e n s i n t h e f i r s t i t e r a t i o n o f t h e l o o p ( i . e . , w h e n t h e
* input s i z e i s < 1 6 b y t e s )
* /
1 : mov x15 , #16
ands x19 , x0 , #0xf
csel x19 , x19 , x15 , n e
adr_ l x17 , . L p e r m u t e _ t a b l e + 1 6
sub x11 , x15 , x19
add x12 , x17 , x11
sub x17 , x17 , x11
ld1 { T 1 . 1 6 b } , [ x12 ]
sub x10 , x1 , x11
sub x11 , x2 , x11
cmp x0 , #- 16
csel x14 , x15 , x z r , g t
cmp x0 , #- 32
csel x15 , x15 , x z r , g t
cmp x0 , #- 48
csel x16 , x19 , x z r , g t
csel x1 , x1 , x10 , g t
csel x2 , x2 , x11 , g t
ld1 { I N P 0 . 1 6 b } , [ x2 ] , x14
ld1 { I N P 1 . 1 6 b } , [ x2 ] , x15
ld1 { I N P 2 . 1 6 b } , [ x2 ] , x16
ld1 { I N P 3 . 1 6 b } , [ x2 ]
tbl I N P 3 . 1 6 b , { I N P 3 . 1 6 b } , T 1 . 1 6 b
b 2 f
.previous
2 : .if \ e n c = = 0
bl p m u l l _ g c m _ g h a s h _ 4 x
2017-07-24 13:28:16 +03:00
.endif
2019-09-11 02:19:00 +03:00
bl p m u l l _ g c m _ e n c _ 4 x
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
tbnz x0 , #63 , 6 f
st1 { I N P 0 . 1 6 b - I N P 3 . 1 6 b } , [ x1 ] , #64
.if \ enc = = 1
bl p m u l l _ g c m _ g h a s h _ 4 x
.endif
bne 0 b
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
3 : ldp x19 , x10 , [ s p , #24 ]
cbz x10 , 5 f / / o u t p u t t a g ?
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
ld1 { I N P 3 . 1 6 b } , [ x10 ] / / l o a d l e n g t h s [ ]
mov w9 , #1
bl p m u l l _ g c m _ g h a s h _ 4 x
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
mov w11 , #( 0x1 < < 2 4 ) / / B E ' 1 U '
ld1 { K S 0 . 1 6 b } , [ x5 ]
mov K S 0 . s [ 3 ] , w11
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
enc_ b l o c k K S 0 , x7 , x6 , x12
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
ext X L . 1 6 b , X L . 1 6 b , X L . 1 6 b , #8
rev6 4 X L . 1 6 b , X L . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , K S 0 . 1 6 b
2020-11-10 12:10:42 +03:00
.if \ enc = = 1
2019-09-11 02:19:00 +03:00
st1 { X L . 1 6 b } , [ x10 ] / / s t o r e t a g
2020-11-10 12:10:42 +03:00
.else
ldp x11 , x12 , [ s p , #40 ] / / l o a d t a g p o i n t e r a n d a u t h s i z e
adr_ l x17 , . L p e r m u t e _ t a b l e
ld1 { K S 0 . 1 6 b } , [ x11 ] / / l o a d s u p p l i e d t a g
add x17 , x17 , x12
ld1 { K S 1 . 1 6 b } , [ x17 ] / / l o a d p e r m u t e v e c t o r
cmeq X L . 1 6 b , X L . 1 6 b , K S 0 . 1 6 b / / c o m p a r e t a g s
mvn X L . 1 6 b , X L . 1 6 b / / - 1 f o r f a i l , 0 f o r p a s s
tbl X L . 1 6 b , { X L . 1 6 b } , K S 1 . 1 6 b / / k e e p a u t h s i z e b y t e s o n l y
sminv b0 , X L . 1 6 b / / s i g n e d m i n i m u m a c r o s s X L
smov w0 , v0 . b [ 0 ] / / r e t u r n b0
.endif
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
4 : ldp x29 , x30 , [ s p ] , #32
ret
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
5 :
CPU_ L E ( r e v w8 , w8 )
str w8 , [ x5 , #12 ] / / s t o r e l o w e r c o u n t e r
st1 { X L . 2 d } , [ x4 ]
b 4 b
6 : ld1 { T 1 . 1 6 b - T 2 . 1 6 b } , [ x17 ] , #32 / / p e r m u t e v e c t o r s
sub x17 , x17 , x19 , l s l #1
cmp w9 , #1
beq 7 f
.subsection 1
7 : ld1 { I N P 2 . 1 6 b } , [ x1 ]
tbx I N P 2 . 1 6 b , { I N P 3 . 1 6 b } , T 1 . 1 6 b
mov I N P 3 . 1 6 b , I N P 2 . 1 6 b
b 8 f
.previous
st1 { I N P 0 . 1 6 b } , [ x1 ] , x14
st1 { I N P 1 . 1 6 b } , [ x1 ] , x15
st1 { I N P 2 . 1 6 b } , [ x1 ] , x16
tbl I N P 3 . 1 6 b , { I N P 3 . 1 6 b } , T 1 . 1 6 b
tbx I N P 3 . 1 6 b , { I N P 2 . 1 6 b } , T 2 . 1 6 b
8 : st1 { I N P 3 . 1 6 b } , [ x1 ]
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
.if \ enc = = 1
ld1 { T 1 . 1 6 b } , [ x17 ]
tbl I N P 3 . 1 6 b , { I N P 3 . 1 6 b } , T 1 . 1 6 b / / c l e a r n o n - d a t a b i t s
bl p m u l l _ g c m _ g h a s h _ 4 x
.endif
b 3 b
.endm
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
/ *
* void p m u l l _ g c m _ e n c r y p t ( i n t b l o c k s , u 8 d s t [ ] , c o n s t u 8 s r c [ ] ,
* struct g h a s h _ k e y c o n s t * k , u 6 4 d g [ ] , u 8 c t r [ ] ,
* int r o u n d s , u 8 t a g )
* /
2020-02-18 22:58:25 +03:00
SYM_ F U N C _ S T A R T ( p m u l l _ g c m _ e n c r y p t )
2019-09-11 02:19:00 +03:00
pmull_ g c m _ d o _ c r y p t 1
2020-02-18 22:58:25 +03:00
SYM_ F U N C _ E N D ( p m u l l _ g c m _ e n c r y p t )
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
/ *
* void p m u l l _ g c m _ d e c r y p t ( i n t b l o c k s , u 8 d s t [ ] , c o n s t u 8 s r c [ ] ,
* struct g h a s h _ k e y c o n s t * k , u 6 4 d g [ ] , u 8 c t r [ ] ,
* int r o u n d s , u 8 t a g )
* /
2020-02-18 22:58:25 +03:00
SYM_ F U N C _ S T A R T ( p m u l l _ g c m _ d e c r y p t )
2019-09-11 02:19:00 +03:00
pmull_ g c m _ d o _ c r y p t 0
2020-02-18 22:58:25 +03:00
SYM_ F U N C _ E N D ( p m u l l _ g c m _ d e c r y p t )
2017-07-24 13:28:16 +03:00
2020-02-18 22:58:25 +03:00
SYM_ F U N C _ S T A R T _ L O C A L ( p m u l l _ g c m _ g h a s h _ 4 x )
2019-09-11 02:19:00 +03:00
movi M A S K . 1 6 b , #0xe1
shl M A S K . 2 d , M A S K . 2 d , #57
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
rev6 4 T 1 . 1 6 b , I N P 0 . 1 6 b
rev6 4 T 2 . 1 6 b , I N P 1 . 1 6 b
rev6 4 T T 3 . 1 6 b , I N P 2 . 1 6 b
rev6 4 T T 4 . 1 6 b , I N P 3 . 1 6 b
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
ext X L . 1 6 b , X L . 1 6 b , X L . 1 6 b , #8
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
tbz w9 , #2 , 0 f / / < 4 b l o c k s ?
.subsection 1
0 : movi X H 2 . 1 6 b , #0
movi X M 2 . 1 6 b , #0
movi X L 2 . 1 6 b , #0
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
tbz w9 , #0 , 1 f / / 2 b l o c k s ?
tbz w9 , #1 , 2 f / / 1 b l o c k ?
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
eor T 2 . 1 6 b , T 2 . 1 6 b , X L . 1 6 b
ext T 1 . 1 6 b , T 2 . 1 6 b , T 2 . 1 6 b , #8
b . L g h3
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
1 : eor T T 3 . 1 6 b , T T 3 . 1 6 b , X L . 1 6 b
ext T 2 . 1 6 b , T T 3 . 1 6 b , T T 3 . 1 6 b , #8
b . L g h2
2018-07-31 00:06:41 +03:00
2019-09-11 02:19:00 +03:00
2 : eor T T 4 . 1 6 b , T T 4 . 1 6 b , X L . 1 6 b
ext I N 1 . 1 6 b , T T 4 . 1 6 b , T T 4 . 1 6 b , #8
b . L g h1
.previous
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
eor T 1 . 1 6 b , T 1 . 1 6 b , X L . 1 6 b
ext I N 1 . 1 6 b , T 1 . 1 6 b , T 1 . 1 6 b , #8
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
pmull2 X H 2 . 1 q , H H 4 . 2 d , I N 1 . 2 d / / a1 * b1
eor T 1 . 1 6 b , T 1 . 1 6 b , I N 1 . 1 6 b
pmull X L 2 . 1 q , H H 4 . 1 d , I N 1 . 1 d / / a0 * b0
pmull2 X M 2 . 1 q , H H 3 4 . 2 d , T 1 . 2 d / / ( a1 + a0 ) ( b1 + b0 )
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
ext T 1 . 1 6 b , T 2 . 1 6 b , T 2 . 1 6 b , #8
.Lgh3 : eor T 2 . 1 6 b , T 2 . 1 6 b , T 1 . 1 6 b
pmull2 X H . 1 q , H H 3 . 2 d , T 1 . 2 d / / a1 * b1
pmull X L . 1 q , H H 3 . 1 d , T 1 . 1 d / / a0 * b0
pmull X M . 1 q , H H 3 4 . 1 d , T 2 . 1 d / / ( a1 + a0 ) ( b1 + b0 )
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
eor X H 2 . 1 6 b , X H 2 . 1 6 b , X H . 1 6 b
eor X L 2 . 1 6 b , X L 2 . 1 6 b , X L . 1 6 b
eor X M 2 . 1 6 b , X M 2 . 1 6 b , X M . 1 6 b
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
ext T 2 . 1 6 b , T T 3 . 1 6 b , T T 3 . 1 6 b , #8
.Lgh2 : eor T T 3 . 1 6 b , T T 3 . 1 6 b , T 2 . 1 6 b
pmull2 X H . 1 q , H H . 2 d , T 2 . 2 d / / a1 * b1
pmull X L . 1 q , H H . 1 d , T 2 . 1 d / / a0 * b0
pmull2 X M . 1 q , S H A S H 2 . 2 d , T T 3 . 2 d / / ( a1 + a0 ) ( b1 + b0 )
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
eor X H 2 . 1 6 b , X H 2 . 1 6 b , X H . 1 6 b
eor X L 2 . 1 6 b , X L 2 . 1 6 b , X L . 1 6 b
eor X M 2 . 1 6 b , X M 2 . 1 6 b , X M . 1 6 b
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
ext I N 1 . 1 6 b , T T 4 . 1 6 b , T T 4 . 1 6 b , #8
.Lgh1 : eor T T 4 . 1 6 b , T T 4 . 1 6 b , I N 1 . 1 6 b
pmull X L . 1 q , S H A S H . 1 d , I N 1 . 1 d / / a0 * b0
pmull2 X H . 1 q , S H A S H . 2 d , I N 1 . 2 d / / a1 * b1
pmull X M . 1 q , S H A S H 2 . 1 d , T T 4 . 1 d / / ( a1 + a0 ) ( b1 + b0 )
2018-07-31 00:06:40 +03:00
2019-09-11 02:19:00 +03:00
eor X H . 1 6 b , X H . 1 6 b , X H 2 . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , X L 2 . 1 6 b
eor X M . 1 6 b , X M . 1 6 b , X M 2 . 1 6 b
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
eor T 2 . 1 6 b , X L . 1 6 b , X H . 1 6 b
ext T 1 . 1 6 b , X L . 1 6 b , X H . 1 6 b , #8
eor X M . 1 6 b , X M . 1 6 b , T 2 . 1 6 b
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
_ _ pmull_ r e d u c e _ p64
eor T 2 . 1 6 b , T 2 . 1 6 b , X H . 1 6 b
eor X L . 1 6 b , X L . 1 6 b , T 2 . 1 6 b
2017-07-24 13:28:16 +03:00
ret
2020-02-18 22:58:25 +03:00
SYM_ F U N C _ E N D ( p m u l l _ g c m _ g h a s h _ 4 x )
2019-09-11 02:19:00 +03:00
2020-02-18 22:58:25 +03:00
SYM_ F U N C _ S T A R T _ L O C A L ( p m u l l _ g c m _ e n c _ 4 x )
2019-09-11 02:19:00 +03:00
ld1 { K S 0 . 1 6 b } , [ x5 ] / / l o a d u p p e r c o u n t e r
sub w10 , w8 , #4
sub w11 , w8 , #3
sub w12 , w8 , #2
sub w13 , w8 , #1
rev w10 , w10
rev w11 , w11
rev w12 , w12
rev w13 , w13
mov K S 1 . 1 6 b , K S 0 . 1 6 b
mov K S 2 . 1 6 b , K S 0 . 1 6 b
mov K S 3 . 1 6 b , K S 0 . 1 6 b
ins K S 0 . s [ 3 ] , w10 / / s e t l o w e r c o u n t e r
ins K S 1 . s [ 3 ] , w11
ins K S 2 . s [ 3 ] , w12
ins K S 3 . s [ 3 ] , w13
add x10 , x6 , #96 / / r o u n d k e y p o i n t e r
ld1 { K 6 . 4 s - K 7 . 4 s } , [ x10 ] , #32
.irp key, K 0 , K 1 , K 2 , K 3 , K 4 , K 5
enc_ q r o u n d K S 0 , K S 1 , K S 2 , K S 3 , \ k e y
.endr
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
tbnz x7 , #2 , . L n o t 1 2 8
.subsection 1
.Lnot128 :
ld1 { K 8 . 4 s - K 9 . 4 s } , [ x10 ] , #32
.irp key, K 6 , K 7
enc_ q r o u n d K S 0 , K S 1 , K S 2 , K S 3 , \ k e y
.endr
ld1 { K 6 . 4 s - K 7 . 4 s } , [ x10 ]
.irp key, K 8 , K 9
enc_ q r o u n d K S 0 , K S 1 , K S 2 , K S 3 , \ k e y
.endr
tbz x7 , #1 , . L o u t 1 9 2
b . L o u t 2 5 6
.previous
2018-07-31 00:06:42 +03:00
2019-09-11 02:19:00 +03:00
.Lout256 :
.irp key, K 6 , K 7
enc_ q r o u n d K S 0 , K S 1 , K S 2 , K S 3 , \ k e y
.endr
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
.Lout192 :
enc_ q r o u n d K S 0 , K S 1 , K S 2 , K S 3 , K K
2017-07-24 13:28:16 +03:00
2019-09-11 02:19:00 +03:00
aese K S 0 . 1 6 b , K L . 1 6 b
aese K S 1 . 1 6 b , K L . 1 6 b
aese K S 2 . 1 6 b , K L . 1 6 b
aese K S 3 . 1 6 b , K L . 1 6 b
eor K S 0 . 1 6 b , K S 0 . 1 6 b , K M . 1 6 b
eor K S 1 . 1 6 b , K S 1 . 1 6 b , K M . 1 6 b
eor K S 2 . 1 6 b , K S 2 . 1 6 b , K M . 1 6 b
eor K S 3 . 1 6 b , K S 3 . 1 6 b , K M . 1 6 b
eor I N P 0 . 1 6 b , I N P 0 . 1 6 b , K S 0 . 1 6 b
eor I N P 1 . 1 6 b , I N P 1 . 1 6 b , K S 1 . 1 6 b
eor I N P 2 . 1 6 b , I N P 2 . 1 6 b , K S 2 . 1 6 b
eor I N P 3 . 1 6 b , I N P 3 . 1 6 b , K S 3 . 1 6 b
2017-07-24 13:28:16 +03:00
ret
2020-02-18 22:58:25 +03:00
SYM_ F U N C _ E N D ( p m u l l _ g c m _ e n c _ 4 x )
2019-09-11 02:19:00 +03:00
.section " .rodata " , " a"
.align 6
.Lpermute_table :
.byte 0 xff, 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f
.byte 0 xff, 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f
.byte 0 x0 , 0 x1 , 0 x2 , 0 x3 , 0 x4 , 0 x5 , 0 x6 , 0 x7
.byte 0 x8 , 0 x9 , 0 x a , 0 x b , 0 x c , 0 x d , 0 x e , 0 x f
.byte 0 xff, 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f
.byte 0 xff, 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f , 0 x f f
.byte 0 x0 , 0 x1 , 0 x2 , 0 x3 , 0 x4 , 0 x5 , 0 x6 , 0 x7
.byte 0 x8 , 0 x9 , 0 x a , 0 x b , 0 x c , 0 x d , 0 x e , 0 x f
.previous