crypto: arm64/aes-ce - add 5 way interleave routines
In preparation of tweaking the accelerated AES chaining mode routines to be able to use a 5-way stride, implement the core routines to support processing 5 blocks of input at a time. While at it, drop the 2 way versions, which have been unused for a while now. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
d45b1714e2
commit
e217413964
@ -52,7 +52,7 @@
|
||||
load_round_keys \rounds, \temp
|
||||
.endm
|
||||
|
||||
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
|
||||
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
|
||||
aes\de \i0\().16b, \k\().16b
|
||||
aes\mc \i0\().16b, \i0\().16b
|
||||
.ifnb \i1
|
||||
@ -63,27 +63,34 @@
|
||||
aes\mc \i2\().16b, \i2\().16b
|
||||
aes\de \i3\().16b, \k\().16b
|
||||
aes\mc \i3\().16b, \i3\().16b
|
||||
.ifnb \i4
|
||||
aes\de \i4\().16b, \k\().16b
|
||||
aes\mc \i4\().16b, \i4\().16b
|
||||
.endif
|
||||
.endif
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* up to 4 interleaved encryption rounds with the same round key */
|
||||
.macro round_Nx, enc, k, i0, i1, i2, i3
|
||||
/* up to 5 interleaved encryption rounds with the same round key */
|
||||
.macro round_Nx, enc, k, i0, i1, i2, i3, i4
|
||||
.ifc \enc, e
|
||||
do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3
|
||||
do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3, \i4
|
||||
.else
|
||||
do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3
|
||||
do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3, \i4
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* up to 4 interleaved final rounds */
|
||||
.macro fin_round_Nx, de, k, k2, i0, i1, i2, i3
|
||||
/* up to 5 interleaved final rounds */
|
||||
.macro fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4
|
||||
aes\de \i0\().16b, \k\().16b
|
||||
.ifnb \i1
|
||||
aes\de \i1\().16b, \k\().16b
|
||||
.ifnb \i3
|
||||
aes\de \i2\().16b, \k\().16b
|
||||
aes\de \i3\().16b, \k\().16b
|
||||
.ifnb \i4
|
||||
aes\de \i4\().16b, \k\().16b
|
||||
.endif
|
||||
.endif
|
||||
.endif
|
||||
eor \i0\().16b, \i0\().16b, \k2\().16b
|
||||
@ -92,47 +99,52 @@
|
||||
.ifnb \i3
|
||||
eor \i2\().16b, \i2\().16b, \k2\().16b
|
||||
eor \i3\().16b, \i3\().16b, \k2\().16b
|
||||
.ifnb \i4
|
||||
eor \i4\().16b, \i4\().16b, \k2\().16b
|
||||
.endif
|
||||
.endif
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* up to 4 interleaved blocks */
|
||||
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3
|
||||
/* up to 5 interleaved blocks */
|
||||
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
|
||||
cmp \rounds, #12
|
||||
blo 2222f /* 128 bits */
|
||||
beq 1111f /* 192 bits */
|
||||
round_Nx \enc, v17, \i0, \i1, \i2, \i3
|
||||
round_Nx \enc, v18, \i0, \i1, \i2, \i3
|
||||
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3
|
||||
round_Nx \enc, v20, \i0, \i1, \i2, \i3
|
||||
round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4
|
||||
round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4
|
||||
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4
|
||||
round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4
|
||||
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
|
||||
round_Nx \enc, \key, \i0, \i1, \i2, \i3
|
||||
round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4
|
||||
.endr
|
||||
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3
|
||||
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4
|
||||
.endm
|
||||
|
||||
.macro encrypt_block, in, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \in
|
||||
.endm
|
||||
|
||||
.macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \i0, \i1
|
||||
.endm
|
||||
|
||||
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
|
||||
.endm
|
||||
|
||||
.macro encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3, \i4
|
||||
.endm
|
||||
|
||||
.macro decrypt_block, in, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \in
|
||||
.endm
|
||||
|
||||
.macro decrypt_block2x, i0, i1, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \i0, \i1
|
||||
.endm
|
||||
|
||||
.macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
|
||||
.endm
|
||||
|
||||
.macro decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3, \i4
|
||||
.endm
|
||||
|
||||
#define MAX_STRIDE 5
|
||||
|
||||
#include "aes-modes.S"
|
||||
|
@ -13,6 +13,10 @@
|
||||
.text
|
||||
.align 4
|
||||
|
||||
#ifndef MAX_STRIDE
|
||||
#define MAX_STRIDE 4
|
||||
#endif
|
||||
|
||||
aes_encrypt_block4x:
|
||||
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
|
||||
ret
|
||||
@ -23,6 +27,18 @@ aes_decrypt_block4x:
|
||||
ret
|
||||
ENDPROC(aes_decrypt_block4x)
|
||||
|
||||
#if MAX_STRIDE == 5
|
||||
aes_encrypt_block5x:
|
||||
encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
|
||||
ret
|
||||
ENDPROC(aes_encrypt_block5x)
|
||||
|
||||
aes_decrypt_block5x:
|
||||
decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
|
||||
ret
|
||||
ENDPROC(aes_decrypt_block5x)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks)
|
||||
|
@ -117,26 +117,9 @@
|
||||
|
||||
/*
|
||||
* Interleaved versions: functionally equivalent to the
|
||||
* ones above, but applied to 2 or 4 AES states in parallel.
|
||||
* ones above, but applied to AES states in parallel.
|
||||
*/
|
||||
|
||||
.macro sub_bytes_2x, in0, in1
|
||||
sub v8.16b, \in0\().16b, v15.16b
|
||||
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
|
||||
sub v9.16b, \in1\().16b, v15.16b
|
||||
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
|
||||
sub v10.16b, v8.16b, v15.16b
|
||||
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
|
||||
sub v11.16b, v9.16b, v15.16b
|
||||
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v8.16b, v10.16b, v15.16b
|
||||
tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
sub v9.16b, v11.16b, v15.16b
|
||||
tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
|
||||
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
|
||||
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
|
||||
.endm
|
||||
|
||||
.macro sub_bytes_4x, in0, in1, in2, in3
|
||||
sub v8.16b, \in0\().16b, v15.16b
|
||||
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
|
||||
@ -215,25 +198,6 @@
|
||||
eor \in1\().16b, \in1\().16b, v11.16b
|
||||
.endm
|
||||
|
||||
.macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
|
||||
ld1 {v15.4s}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
movi v15.16b, #0x40
|
||||
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
|
||||
sub_bytes_2x \in0, \in1
|
||||
subs \i, \i, #1
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
beq 2222f
|
||||
mix_columns_2x \in0, \in1, \enc
|
||||
b 1111b
|
||||
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
.endm
|
||||
|
||||
.macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
|
||||
ld1 {v15.4s}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
@ -260,14 +224,6 @@
|
||||
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
|
||||
.endm
|
||||
|
||||
.macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
|
||||
do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
||||
.macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
|
||||
do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
||||
.macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
|
||||
do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
Loading…
Reference in New Issue
Block a user