crypto: x86/aria - Use RIP-relative addressing
Prefer RIP-relative addressing where possible, which removes the need for boot time relocation fixups. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
c75962f1c4
commit
52fc482a12
@ -80,7 +80,7 @@
|
|||||||
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
||||||
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
||||||
\
|
\
|
||||||
vmovdqu .Lshufb_16x16b, a0; \
|
vmovdqu .Lshufb_16x16b(%rip), a0; \
|
||||||
vmovdqu st1, a1; \
|
vmovdqu st1, a1; \
|
||||||
vpshufb a0, a2, a2; \
|
vpshufb a0, a2, a2; \
|
||||||
vpshufb a0, a3, a3; \
|
vpshufb a0, a3, a3; \
|
||||||
@ -132,7 +132,7 @@
|
|||||||
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
||||||
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
||||||
\
|
\
|
||||||
vmovdqu .Lshufb_16x16b, a0; \
|
vmovdqu .Lshufb_16x16b(%rip), a0; \
|
||||||
vmovdqu st1, a1; \
|
vmovdqu st1, a1; \
|
||||||
vpshufb a0, a2, a2; \
|
vpshufb a0, a2, a2; \
|
||||||
vpshufb a0, a3, a3; \
|
vpshufb a0, a3, a3; \
|
||||||
@ -300,11 +300,11 @@
|
|||||||
x4, x5, x6, x7, \
|
x4, x5, x6, x7, \
|
||||||
t0, t1, t2, t3, \
|
t0, t1, t2, t3, \
|
||||||
t4, t5, t6, t7) \
|
t4, t5, t6, t7) \
|
||||||
vmovdqa .Ltf_s2_bitmatrix, t0; \
|
vmovdqa .Ltf_s2_bitmatrix(%rip), t0; \
|
||||||
vmovdqa .Ltf_inv_bitmatrix, t1; \
|
vmovdqa .Ltf_inv_bitmatrix(%rip), t1; \
|
||||||
vmovdqa .Ltf_id_bitmatrix, t2; \
|
vmovdqa .Ltf_id_bitmatrix(%rip), t2; \
|
||||||
vmovdqa .Ltf_aff_bitmatrix, t3; \
|
vmovdqa .Ltf_aff_bitmatrix(%rip), t3; \
|
||||||
vmovdqa .Ltf_x2_bitmatrix, t4; \
|
vmovdqa .Ltf_x2_bitmatrix(%rip), t4; \
|
||||||
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
|
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
|
||||||
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
|
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
|
||||||
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
|
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
|
||||||
@ -324,13 +324,13 @@
|
|||||||
x4, x5, x6, x7, \
|
x4, x5, x6, x7, \
|
||||||
t0, t1, t2, t3, \
|
t0, t1, t2, t3, \
|
||||||
t4, t5, t6, t7) \
|
t4, t5, t6, t7) \
|
||||||
vmovdqa .Linv_shift_row, t0; \
|
vmovdqa .Linv_shift_row(%rip), t0; \
|
||||||
vmovdqa .Lshift_row, t1; \
|
vmovdqa .Lshift_row(%rip), t1; \
|
||||||
vbroadcastss .L0f0f0f0f, t6; \
|
vbroadcastss .L0f0f0f0f(%rip), t6; \
|
||||||
vmovdqa .Ltf_lo__inv_aff__and__s2, t2; \
|
vmovdqa .Ltf_lo__inv_aff__and__s2(%rip), t2; \
|
||||||
vmovdqa .Ltf_hi__inv_aff__and__s2, t3; \
|
vmovdqa .Ltf_hi__inv_aff__and__s2(%rip), t3; \
|
||||||
vmovdqa .Ltf_lo__x2__and__fwd_aff, t4; \
|
vmovdqa .Ltf_lo__x2__and__fwd_aff(%rip), t4; \
|
||||||
vmovdqa .Ltf_hi__x2__and__fwd_aff, t5; \
|
vmovdqa .Ltf_hi__x2__and__fwd_aff(%rip), t5; \
|
||||||
\
|
\
|
||||||
vaesenclast t7, x0, x0; \
|
vaesenclast t7, x0, x0; \
|
||||||
vaesenclast t7, x4, x4; \
|
vaesenclast t7, x4, x4; \
|
||||||
|
@ -96,7 +96,7 @@
|
|||||||
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
||||||
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
||||||
\
|
\
|
||||||
vbroadcasti128 .Lshufb_16x16b, a0; \
|
vbroadcasti128 .Lshufb_16x16b(%rip), a0; \
|
||||||
vmovdqu st1, a1; \
|
vmovdqu st1, a1; \
|
||||||
vpshufb a0, a2, a2; \
|
vpshufb a0, a2, a2; \
|
||||||
vpshufb a0, a3, a3; \
|
vpshufb a0, a3, a3; \
|
||||||
@ -148,7 +148,7 @@
|
|||||||
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
||||||
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
||||||
\
|
\
|
||||||
vbroadcasti128 .Lshufb_16x16b, a0; \
|
vbroadcasti128 .Lshufb_16x16b(%rip), a0; \
|
||||||
vmovdqu st1, a1; \
|
vmovdqu st1, a1; \
|
||||||
vpshufb a0, a2, a2; \
|
vpshufb a0, a2, a2; \
|
||||||
vpshufb a0, a3, a3; \
|
vpshufb a0, a3, a3; \
|
||||||
@ -307,11 +307,11 @@
|
|||||||
x4, x5, x6, x7, \
|
x4, x5, x6, x7, \
|
||||||
t0, t1, t2, t3, \
|
t0, t1, t2, t3, \
|
||||||
t4, t5, t6, t7) \
|
t4, t5, t6, t7) \
|
||||||
vpbroadcastq .Ltf_s2_bitmatrix, t0; \
|
vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \
|
||||||
vpbroadcastq .Ltf_inv_bitmatrix, t1; \
|
vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \
|
||||||
vpbroadcastq .Ltf_id_bitmatrix, t2; \
|
vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \
|
||||||
vpbroadcastq .Ltf_aff_bitmatrix, t3; \
|
vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \
|
||||||
vpbroadcastq .Ltf_x2_bitmatrix, t4; \
|
vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \
|
||||||
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
|
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
|
||||||
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
|
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
|
||||||
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
|
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
|
||||||
@ -332,12 +332,12 @@
|
|||||||
t4, t5, t6, t7) \
|
t4, t5, t6, t7) \
|
||||||
vpxor t7, t7, t7; \
|
vpxor t7, t7, t7; \
|
||||||
vpxor t6, t6, t6; \
|
vpxor t6, t6, t6; \
|
||||||
vbroadcasti128 .Linv_shift_row, t0; \
|
vbroadcasti128 .Linv_shift_row(%rip), t0; \
|
||||||
vbroadcasti128 .Lshift_row, t1; \
|
vbroadcasti128 .Lshift_row(%rip), t1; \
|
||||||
vbroadcasti128 .Ltf_lo__inv_aff__and__s2, t2; \
|
vbroadcasti128 .Ltf_lo__inv_aff__and__s2(%rip), t2; \
|
||||||
vbroadcasti128 .Ltf_hi__inv_aff__and__s2, t3; \
|
vbroadcasti128 .Ltf_hi__inv_aff__and__s2(%rip), t3; \
|
||||||
vbroadcasti128 .Ltf_lo__x2__and__fwd_aff, t4; \
|
vbroadcasti128 .Ltf_lo__x2__and__fwd_aff(%rip), t4; \
|
||||||
vbroadcasti128 .Ltf_hi__x2__and__fwd_aff, t5; \
|
vbroadcasti128 .Ltf_hi__x2__and__fwd_aff(%rip), t5; \
|
||||||
\
|
\
|
||||||
vextracti128 $1, x0, t6##_x; \
|
vextracti128 $1, x0, t6##_x; \
|
||||||
vaesenclast t7##_x, x0##_x, x0##_x; \
|
vaesenclast t7##_x, x0##_x, x0##_x; \
|
||||||
@ -369,7 +369,7 @@
|
|||||||
vaesdeclast t7##_x, t6##_x, t6##_x; \
|
vaesdeclast t7##_x, t6##_x, t6##_x; \
|
||||||
vinserti128 $1, t6##_x, x6, x6; \
|
vinserti128 $1, t6##_x, x6, x6; \
|
||||||
\
|
\
|
||||||
vpbroadcastd .L0f0f0f0f, t6; \
|
vpbroadcastd .L0f0f0f0f(%rip), t6; \
|
||||||
\
|
\
|
||||||
/* AES inverse shift rows */ \
|
/* AES inverse shift rows */ \
|
||||||
vpshufb t0, x0, x0; \
|
vpshufb t0, x0, x0; \
|
||||||
|
@ -80,7 +80,7 @@
|
|||||||
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
||||||
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
||||||
\
|
\
|
||||||
vbroadcasti64x2 .Lshufb_16x16b, a0; \
|
vbroadcasti64x2 .Lshufb_16x16b(%rip), a0; \
|
||||||
vmovdqu64 st1, a1; \
|
vmovdqu64 st1, a1; \
|
||||||
vpshufb a0, a2, a2; \
|
vpshufb a0, a2, a2; \
|
||||||
vpshufb a0, a3, a3; \
|
vpshufb a0, a3, a3; \
|
||||||
@ -132,7 +132,7 @@
|
|||||||
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
transpose_4x4(c0, c1, c2, c3, a0, a1); \
|
||||||
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
transpose_4x4(d0, d1, d2, d3, a0, a1); \
|
||||||
\
|
\
|
||||||
vbroadcasti64x2 .Lshufb_16x16b, a0; \
|
vbroadcasti64x2 .Lshufb_16x16b(%rip), a0; \
|
||||||
vmovdqu64 st1, a1; \
|
vmovdqu64 st1, a1; \
|
||||||
vpshufb a0, a2, a2; \
|
vpshufb a0, a2, a2; \
|
||||||
vpshufb a0, a3, a3; \
|
vpshufb a0, a3, a3; \
|
||||||
@ -308,11 +308,11 @@
|
|||||||
x4, x5, x6, x7, \
|
x4, x5, x6, x7, \
|
||||||
t0, t1, t2, t3, \
|
t0, t1, t2, t3, \
|
||||||
t4, t5, t6, t7) \
|
t4, t5, t6, t7) \
|
||||||
vpbroadcastq .Ltf_s2_bitmatrix, t0; \
|
vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \
|
||||||
vpbroadcastq .Ltf_inv_bitmatrix, t1; \
|
vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \
|
||||||
vpbroadcastq .Ltf_id_bitmatrix, t2; \
|
vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \
|
||||||
vpbroadcastq .Ltf_aff_bitmatrix, t3; \
|
vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \
|
||||||
vpbroadcastq .Ltf_x2_bitmatrix, t4; \
|
vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \
|
||||||
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
|
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
|
||||||
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
|
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
|
||||||
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
|
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
|
||||||
@ -332,11 +332,11 @@
|
|||||||
y4, y5, y6, y7, \
|
y4, y5, y6, y7, \
|
||||||
t0, t1, t2, t3, \
|
t0, t1, t2, t3, \
|
||||||
t4, t5, t6, t7) \
|
t4, t5, t6, t7) \
|
||||||
vpbroadcastq .Ltf_s2_bitmatrix, t0; \
|
vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \
|
||||||
vpbroadcastq .Ltf_inv_bitmatrix, t1; \
|
vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \
|
||||||
vpbroadcastq .Ltf_id_bitmatrix, t2; \
|
vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \
|
||||||
vpbroadcastq .Ltf_aff_bitmatrix, t3; \
|
vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \
|
||||||
vpbroadcastq .Ltf_x2_bitmatrix, t4; \
|
vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \
|
||||||
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
|
vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
|
||||||
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
|
vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
|
||||||
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
|
vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
|
||||||
|
Loading…
Reference in New Issue
Block a user