crypto: x86/aria - Use RIP-relative addressing

Prefer RIP-relative addressing where possible, which removes the need
for boot time relocation fixups.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Ard Biesheuvel 2023-04-12 13:00:25 +02:00 committed by Herbert Xu
parent c75962f1c4
commit 52fc482a12
3 changed files with 40 additions and 40 deletions

View File

@ -80,7 +80,7 @@
transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \
\ \
vmovdqu .Lshufb_16x16b, a0; \ vmovdqu .Lshufb_16x16b(%rip), a0; \
vmovdqu st1, a1; \ vmovdqu st1, a1; \
vpshufb a0, a2, a2; \ vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \ vpshufb a0, a3, a3; \
@ -132,7 +132,7 @@
transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \
\ \
vmovdqu .Lshufb_16x16b, a0; \ vmovdqu .Lshufb_16x16b(%rip), a0; \
vmovdqu st1, a1; \ vmovdqu st1, a1; \
vpshufb a0, a2, a2; \ vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \ vpshufb a0, a3, a3; \
@ -300,11 +300,11 @@
x4, x5, x6, x7, \ x4, x5, x6, x7, \
t0, t1, t2, t3, \ t0, t1, t2, t3, \
t4, t5, t6, t7) \ t4, t5, t6, t7) \
vmovdqa .Ltf_s2_bitmatrix, t0; \ vmovdqa .Ltf_s2_bitmatrix(%rip), t0; \
vmovdqa .Ltf_inv_bitmatrix, t1; \ vmovdqa .Ltf_inv_bitmatrix(%rip), t1; \
vmovdqa .Ltf_id_bitmatrix, t2; \ vmovdqa .Ltf_id_bitmatrix(%rip), t2; \
vmovdqa .Ltf_aff_bitmatrix, t3; \ vmovdqa .Ltf_aff_bitmatrix(%rip), t3; \
vmovdqa .Ltf_x2_bitmatrix, t4; \ vmovdqa .Ltf_x2_bitmatrix(%rip), t4; \
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
@ -324,13 +324,13 @@
x4, x5, x6, x7, \ x4, x5, x6, x7, \
t0, t1, t2, t3, \ t0, t1, t2, t3, \
t4, t5, t6, t7) \ t4, t5, t6, t7) \
vmovdqa .Linv_shift_row, t0; \ vmovdqa .Linv_shift_row(%rip), t0; \
vmovdqa .Lshift_row, t1; \ vmovdqa .Lshift_row(%rip), t1; \
vbroadcastss .L0f0f0f0f, t6; \ vbroadcastss .L0f0f0f0f(%rip), t6; \
vmovdqa .Ltf_lo__inv_aff__and__s2, t2; \ vmovdqa .Ltf_lo__inv_aff__and__s2(%rip), t2; \
vmovdqa .Ltf_hi__inv_aff__and__s2, t3; \ vmovdqa .Ltf_hi__inv_aff__and__s2(%rip), t3; \
vmovdqa .Ltf_lo__x2__and__fwd_aff, t4; \ vmovdqa .Ltf_lo__x2__and__fwd_aff(%rip), t4; \
vmovdqa .Ltf_hi__x2__and__fwd_aff, t5; \ vmovdqa .Ltf_hi__x2__and__fwd_aff(%rip), t5; \
\ \
vaesenclast t7, x0, x0; \ vaesenclast t7, x0, x0; \
vaesenclast t7, x4, x4; \ vaesenclast t7, x4, x4; \

View File

@ -96,7 +96,7 @@
transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \
\ \
vbroadcasti128 .Lshufb_16x16b, a0; \ vbroadcasti128 .Lshufb_16x16b(%rip), a0; \
vmovdqu st1, a1; \ vmovdqu st1, a1; \
vpshufb a0, a2, a2; \ vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \ vpshufb a0, a3, a3; \
@ -148,7 +148,7 @@
transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \
\ \
vbroadcasti128 .Lshufb_16x16b, a0; \ vbroadcasti128 .Lshufb_16x16b(%rip), a0; \
vmovdqu st1, a1; \ vmovdqu st1, a1; \
vpshufb a0, a2, a2; \ vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \ vpshufb a0, a3, a3; \
@ -307,11 +307,11 @@
x4, x5, x6, x7, \ x4, x5, x6, x7, \
t0, t1, t2, t3, \ t0, t1, t2, t3, \
t4, t5, t6, t7) \ t4, t5, t6, t7) \
vpbroadcastq .Ltf_s2_bitmatrix, t0; \ vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \
vpbroadcastq .Ltf_inv_bitmatrix, t1; \ vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \
vpbroadcastq .Ltf_id_bitmatrix, t2; \ vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \
vpbroadcastq .Ltf_aff_bitmatrix, t3; \ vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \
vpbroadcastq .Ltf_x2_bitmatrix, t4; \ vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
@ -332,12 +332,12 @@
t4, t5, t6, t7) \ t4, t5, t6, t7) \
vpxor t7, t7, t7; \ vpxor t7, t7, t7; \
vpxor t6, t6, t6; \ vpxor t6, t6, t6; \
vbroadcasti128 .Linv_shift_row, t0; \ vbroadcasti128 .Linv_shift_row(%rip), t0; \
vbroadcasti128 .Lshift_row, t1; \ vbroadcasti128 .Lshift_row(%rip), t1; \
vbroadcasti128 .Ltf_lo__inv_aff__and__s2, t2; \ vbroadcasti128 .Ltf_lo__inv_aff__and__s2(%rip), t2; \
vbroadcasti128 .Ltf_hi__inv_aff__and__s2, t3; \ vbroadcasti128 .Ltf_hi__inv_aff__and__s2(%rip), t3; \
vbroadcasti128 .Ltf_lo__x2__and__fwd_aff, t4; \ vbroadcasti128 .Ltf_lo__x2__and__fwd_aff(%rip), t4; \
vbroadcasti128 .Ltf_hi__x2__and__fwd_aff, t5; \ vbroadcasti128 .Ltf_hi__x2__and__fwd_aff(%rip), t5; \
\ \
vextracti128 $1, x0, t6##_x; \ vextracti128 $1, x0, t6##_x; \
vaesenclast t7##_x, x0##_x, x0##_x; \ vaesenclast t7##_x, x0##_x, x0##_x; \
@ -369,7 +369,7 @@
vaesdeclast t7##_x, t6##_x, t6##_x; \ vaesdeclast t7##_x, t6##_x, t6##_x; \
vinserti128 $1, t6##_x, x6, x6; \ vinserti128 $1, t6##_x, x6, x6; \
\ \
vpbroadcastd .L0f0f0f0f, t6; \ vpbroadcastd .L0f0f0f0f(%rip), t6; \
\ \
/* AES inverse shift rows */ \ /* AES inverse shift rows */ \
vpshufb t0, x0, x0; \ vpshufb t0, x0, x0; \

View File

@ -80,7 +80,7 @@
transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \
\ \
vbroadcasti64x2 .Lshufb_16x16b, a0; \ vbroadcasti64x2 .Lshufb_16x16b(%rip), a0; \
vmovdqu64 st1, a1; \ vmovdqu64 st1, a1; \
vpshufb a0, a2, a2; \ vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \ vpshufb a0, a3, a3; \
@ -132,7 +132,7 @@
transpose_4x4(c0, c1, c2, c3, a0, a1); \ transpose_4x4(c0, c1, c2, c3, a0, a1); \
transpose_4x4(d0, d1, d2, d3, a0, a1); \ transpose_4x4(d0, d1, d2, d3, a0, a1); \
\ \
vbroadcasti64x2 .Lshufb_16x16b, a0; \ vbroadcasti64x2 .Lshufb_16x16b(%rip), a0; \
vmovdqu64 st1, a1; \ vmovdqu64 st1, a1; \
vpshufb a0, a2, a2; \ vpshufb a0, a2, a2; \
vpshufb a0, a3, a3; \ vpshufb a0, a3, a3; \
@ -308,11 +308,11 @@
x4, x5, x6, x7, \ x4, x5, x6, x7, \
t0, t1, t2, t3, \ t0, t1, t2, t3, \
t4, t5, t6, t7) \ t4, t5, t6, t7) \
vpbroadcastq .Ltf_s2_bitmatrix, t0; \ vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \
vpbroadcastq .Ltf_inv_bitmatrix, t1; \ vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \
vpbroadcastq .Ltf_id_bitmatrix, t2; \ vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \
vpbroadcastq .Ltf_aff_bitmatrix, t3; \ vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \
vpbroadcastq .Ltf_x2_bitmatrix, t4; \ vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
@ -332,11 +332,11 @@
y4, y5, y6, y7, \ y4, y5, y6, y7, \
t0, t1, t2, t3, \ t0, t1, t2, t3, \
t4, t5, t6, t7) \ t4, t5, t6, t7) \
vpbroadcastq .Ltf_s2_bitmatrix, t0; \ vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \
vpbroadcastq .Ltf_inv_bitmatrix, t1; \ vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \
vpbroadcastq .Ltf_id_bitmatrix, t2; \ vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \
vpbroadcastq .Ltf_aff_bitmatrix, t3; \ vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \
vpbroadcastq .Ltf_x2_bitmatrix, t4; \ vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \ vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \ vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \ vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \