2019-06-04 11:11:33 +03:00
/* SPDX-License-Identifier: GPL-2.0-only */
2014-03-21 13:19:17 +04:00
/ *
* linux/ a r c h / a r m 6 4 / c r y p t o / a e s - c e . S - A E S c i p h e r f o r A R M v8 w i t h
* Crypto E x t e n s i o n s
*
2017-07-24 13:28:10 +03:00
* Copyright ( C ) 2 0 1 3 - 2 0 1 7 L i n a r o L t d < a r d . b i e s h e u v e l @linaro.org>
2014-03-21 13:19:17 +04:00
* /
# include < l i n u x / l i n k a g e . h >
2016-10-11 21:15:19 +03:00
# include < a s m / a s s e m b l e r . h >
2014-03-21 13:19:17 +04:00
2020-02-18 22:58:26 +03:00
# define A E S _ F U N C _ S T A R T ( f u n c ) S Y M _ F U N C _ S T A R T ( c e _ ## f u n c )
# define A E S _ F U N C _ E N D ( f u n c ) S Y M _ F U N C _ E N D ( c e _ ## f u n c )
2014-03-21 13:19:17 +04:00
.arch armv8 - a + c r y p t o
2018-09-10 17:41:15 +03:00
xtsmask . r e q v16
crypto: arm64/aes-ce - implement 5 way interleave for ECB, CBC and CTR
This implements 5-way interleaving for ECB, CBC decryption and CTR,
resulting in a speedup of ~11% on Marvell ThunderX2, which has a
very deep pipeline and therefore a high issue latency for NEON
instructions operating on the same registers.
Note that XTS is left alone: implementing 5-way interleave there
would either involve spilling of the calculated tweaks to the
stack, or recalculating them after the encryption operation, and
doing either of those would most likely penalize low end cores.
For ECB, this is not a concern at all, given that we have plenty
of spare registers. For CTR and CBC decryption, we take advantage
of the fact that v16 is not used by the CE version of the code
(which is the only one targeted by the optimization), and so we
can reshuffle the code a bit and avoid having to spill to memory
(with the exception of one extra reload in the CBC routine)
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2019-06-24 20:38:31 +03:00
cbciv . r e q v16
vctr . r e q v16
2018-09-10 17:41:15 +03:00
.macro xts_ r e l o a d _ m a s k , t m p
.endm
2019-09-03 19:43:34 +03:00
.macro xts_ c t s _ s k i p _ t w , r e g , l b l
.endm
2014-03-21 13:19:17 +04:00
/* preload all round keys */
.macro load_ r o u n d _ k e y s , r o u n d s , r k
cmp \ r o u n d s , #12
blo 2 2 2 2 f / * 1 2 8 b i t s * /
beq 1 1 1 1 f / * 1 9 2 b i t s * /
2017-07-24 13:28:10 +03:00
ld1 { v17 . 4 s - v18 . 4 s } , [ \ r k ] , #32
1111 : ld1 { v19 . 4 s - v20 . 4 s } , [ \ r k ] , #32
2222 : ld1 { v21 . 4 s - v24 . 4 s } , [ \ r k ] , #64
ld1 { v25 . 4 s - v28 . 4 s } , [ \ r k ] , #64
ld1 { v29 . 4 s - v31 . 4 s } , [ \ r k ]
2014-03-21 13:19:17 +04:00
.endm
/* prepare for encryption with key in rk[] */
2018-04-30 19:18:24 +03:00
.macro enc_ p r e p a r e , r o u n d s , r k , t e m p
mov \ t e m p , \ r k
load_ r o u n d _ k e y s \ r o u n d s , \ t e m p
2014-03-21 13:19:17 +04:00
.endm
/* prepare for encryption (again) but with new key in rk[] */
2018-04-30 19:18:24 +03:00
.macro enc_ s w i t c h _ k e y , r o u n d s , r k , t e m p
mov \ t e m p , \ r k
load_ r o u n d _ k e y s \ r o u n d s , \ t e m p
2014-03-21 13:19:17 +04:00
.endm
/* prepare for decryption with key in rk[] */
2018-04-30 19:18:24 +03:00
.macro dec_ p r e p a r e , r o u n d s , r k , t e m p
mov \ t e m p , \ r k
load_ r o u n d _ k e y s \ r o u n d s , \ t e m p
2014-03-21 13:19:17 +04:00
.endm
2019-06-24 20:38:30 +03:00
.macro do_ e n c _ N x , d e , m c , k , i 0 , i 1 , i 2 , i 3 , i 4
2014-03-21 13:19:17 +04:00
aes\ d e \ i 0 \ ( ) . 1 6 b , \ k \ ( ) . 1 6 b
aes\ m c \ i 0 \ ( ) . 1 6 b , \ i 0 \ ( ) . 1 6 b
.ifnb \ i1
2015-03-17 21:05:13 +03:00
aes\ d e \ i 1 \ ( ) . 1 6 b , \ k \ ( ) . 1 6 b
2014-03-21 13:19:17 +04:00
aes\ m c \ i 1 \ ( ) . 1 6 b , \ i 1 \ ( ) . 1 6 b
.ifnb \ i3
2015-03-17 21:05:13 +03:00
aes\ d e \ i 2 \ ( ) . 1 6 b , \ k \ ( ) . 1 6 b
2014-03-21 13:19:17 +04:00
aes\ m c \ i 2 \ ( ) . 1 6 b , \ i 2 \ ( ) . 1 6 b
2015-03-17 21:05:13 +03:00
aes\ d e \ i 3 \ ( ) . 1 6 b , \ k \ ( ) . 1 6 b
2014-03-21 13:19:17 +04:00
aes\ m c \ i 3 \ ( ) . 1 6 b , \ i 3 \ ( ) . 1 6 b
2019-06-24 20:38:30 +03:00
.ifnb \ i4
aes\ d e \ i 4 \ ( ) . 1 6 b , \ k \ ( ) . 1 6 b
aes\ m c \ i 4 \ ( ) . 1 6 b , \ i 4 \ ( ) . 1 6 b
.endif
2014-03-21 13:19:17 +04:00
.endif
.endif
.endm
2019-06-24 20:38:30 +03:00
/* up to 5 interleaved encryption rounds with the same round key */
.macro round_ N x , e n c , k , i 0 , i 1 , i 2 , i 3 , i 4
2014-03-21 13:19:17 +04:00
.ifc \ enc, e
2019-06-24 20:38:30 +03:00
do_ e n c _ N x e , m c , \ k , \ i 0 , \ i 1 , \ i 2 , \ i 3 , \ i 4
2014-03-21 13:19:17 +04:00
.else
2019-06-24 20:38:30 +03:00
do_ e n c _ N x d , i m c , \ k , \ i 0 , \ i 1 , \ i 2 , \ i 3 , \ i 4
2014-03-21 13:19:17 +04:00
.endif
.endm
2019-06-24 20:38:30 +03:00
/* up to 5 interleaved final rounds */
.macro fin_ r o u n d _ N x , d e , k , k 2 , i 0 , i 1 , i 2 , i 3 , i 4
2014-03-21 13:19:17 +04:00
aes\ d e \ i 0 \ ( ) . 1 6 b , \ k \ ( ) . 1 6 b
.ifnb \ i1
aes\ d e \ i 1 \ ( ) . 1 6 b , \ k \ ( ) . 1 6 b
.ifnb \ i3
aes\ d e \ i 2 \ ( ) . 1 6 b , \ k \ ( ) . 1 6 b
aes\ d e \ i 3 \ ( ) . 1 6 b , \ k \ ( ) . 1 6 b
2019-06-24 20:38:30 +03:00
.ifnb \ i4
aes\ d e \ i 4 \ ( ) . 1 6 b , \ k \ ( ) . 1 6 b
.endif
2014-03-21 13:19:17 +04:00
.endif
.endif
eor \ i 0 \ ( ) . 1 6 b , \ i 0 \ ( ) . 1 6 b , \ k 2 \ ( ) . 1 6 b
.ifnb \ i1
eor \ i 1 \ ( ) . 1 6 b , \ i 1 \ ( ) . 1 6 b , \ k 2 \ ( ) . 1 6 b
.ifnb \ i3
eor \ i 2 \ ( ) . 1 6 b , \ i 2 \ ( ) . 1 6 b , \ k 2 \ ( ) . 1 6 b
eor \ i 3 \ ( ) . 1 6 b , \ i 3 \ ( ) . 1 6 b , \ k 2 \ ( ) . 1 6 b
2019-06-24 20:38:30 +03:00
.ifnb \ i4
eor \ i 4 \ ( ) . 1 6 b , \ i 4 \ ( ) . 1 6 b , \ k 2 \ ( ) . 1 6 b
.endif
2014-03-21 13:19:17 +04:00
.endif
.endif
.endm
2019-06-24 20:38:30 +03:00
/* up to 5 interleaved blocks */
.macro do_ b l o c k _ N x , e n c , r o u n d s , i 0 , i 1 , i 2 , i 3 , i 4
2014-03-21 13:19:17 +04:00
cmp \ r o u n d s , #12
blo 2 2 2 2 f / * 1 2 8 b i t s * /
beq 1 1 1 1 f / * 1 9 2 b i t s * /
2019-06-24 20:38:30 +03:00
round_ N x \ e n c , v17 , \ i 0 , \ i 1 , \ i 2 , \ i 3 , \ i 4
round_ N x \ e n c , v18 , \ i 0 , \ i 1 , \ i 2 , \ i 3 , \ i 4
1111 : round_ N x \ e n c , v19 , \ i 0 , \ i 1 , \ i 2 , \ i 3 , \ i 4
round_ N x \ e n c , v20 , \ i 0 , \ i 1 , \ i 2 , \ i 3 , \ i 4
2014-03-21 13:19:17 +04:00
2222 : .irp k e y , v 21 , v2 2 , v23 , v24 , v25 , v26 , v27 , v28 , v29
2019-06-24 20:38:30 +03:00
round_ N x \ e n c , \ k e y , \ i 0 , \ i 1 , \ i 2 , \ i 3 , \ i 4
2014-03-21 13:19:17 +04:00
.endr
2019-06-24 20:38:30 +03:00
fin_ r o u n d _ N x \ e n c , v30 , v31 , \ i 0 , \ i 1 , \ i 2 , \ i 3 , \ i 4
2014-03-21 13:19:17 +04:00
.endm
.macro encrypt_ b l o c k , i n , r o u n d s , t 0 , t 1 , t 2
do_ b l o c k _ N x e , \ r o u n d s , \ i n
.endm
.macro encrypt_ b l o c k 4 x , i 0 , i 1 , i 2 , i 3 , r o u n d s , t 0 , t 1 , t 2
do_ b l o c k _ N x e , \ r o u n d s , \ i 0 , \ i 1 , \ i 2 , \ i 3
.endm
2019-06-24 20:38:30 +03:00
.macro encrypt_ b l o c k 5 x , i 0 , i 1 , i 2 , i 3 , i 4 , r o u n d s , t 0 , t 1 , t 2
do_ b l o c k _ N x e , \ r o u n d s , \ i 0 , \ i 1 , \ i 2 , \ i 3 , \ i 4
2014-03-21 13:19:17 +04:00
.endm
2019-06-24 20:38:30 +03:00
.macro decrypt_ b l o c k , i n , r o u n d s , t 0 , t 1 , t 2
do_ b l o c k _ N x d , \ r o u n d s , \ i n
2014-03-21 13:19:17 +04:00
.endm
.macro decrypt_ b l o c k 4 x , i 0 , i 1 , i 2 , i 3 , r o u n d s , t 0 , t 1 , t 2
do_ b l o c k _ N x d , \ r o u n d s , \ i 0 , \ i 1 , \ i 2 , \ i 3
.endm
2019-06-24 20:38:30 +03:00
.macro decrypt_ b l o c k 5 x , i 0 , i 1 , i 2 , i 3 , i 4 , r o u n d s , t 0 , t 1 , t 2
do_ b l o c k _ N x d , \ r o u n d s , \ i 0 , \ i 1 , \ i 2 , \ i 3 , \ i 4
.endm
# define M A X _ S T R I D E 5
2014-03-21 13:19:17 +04:00
# include " a e s - m o d e s . S "