crypto: x86 - Remove include/asm/inst.h

Current minimum required version of binutils is 2.23,
which supports PSHUFB, PCLMULQDQ, PEXTRD, AESKEYGENASSIST,
AESIMC, AESENC, AESENCLAST, AESDEC, AESDECLAST and MOVQ
instruction mnemonics.

Substitute macros from include/asm/inst.h with a proper
instruction mnemonics in various assmbly files from
x86/crypto directory, and remove now unneeded file.

The patch was tested by calculating and comparing sha256sum
hashes of stripped object files before and after the patch,
to be sure that executable code didn't change.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Uros Bizjak
2020-07-09 17:08:57 +02:00
committed by Herbert Xu
parent 2c2e18369f
commit d7866e503b
7 changed files with 399 additions and 716 deletions

View File

@ -63,7 +63,6 @@
*/
#include <linux/linkage.h>
#include <asm/inst.h>
#define VMOVDQ vmovdqu

File diff suppressed because it is too large Load Diff

View File

@ -120,7 +120,6 @@
##
#include <linux/linkage.h>
#include <asm/inst.h>
# constants in mergeable sections, linker can reorder and merge
.section .rodata.cst16.POLY, "aM", @progbits, 16

View File

@ -38,7 +38,6 @@
*/
#include <linux/linkage.h>
#include <asm/inst.h>
.section .rodata
@ -129,17 +128,17 @@ loop_64:/* 64 bytes Full cache line folding */
#ifdef __x86_64__
movdqa %xmm4, %xmm8
#endif
PCLMULQDQ 00, CONSTANT, %xmm1
PCLMULQDQ 00, CONSTANT, %xmm2
PCLMULQDQ 00, CONSTANT, %xmm3
pclmulqdq $0x00, CONSTANT, %xmm1
pclmulqdq $0x00, CONSTANT, %xmm2
pclmulqdq $0x00, CONSTANT, %xmm3
#ifdef __x86_64__
PCLMULQDQ 00, CONSTANT, %xmm4
pclmulqdq $0x00, CONSTANT, %xmm4
#endif
PCLMULQDQ 0x11, CONSTANT, %xmm5
PCLMULQDQ 0x11, CONSTANT, %xmm6
PCLMULQDQ 0x11, CONSTANT, %xmm7
pclmulqdq $0x11, CONSTANT, %xmm5
pclmulqdq $0x11, CONSTANT, %xmm6
pclmulqdq $0x11, CONSTANT, %xmm7
#ifdef __x86_64__
PCLMULQDQ 0x11, CONSTANT, %xmm8
pclmulqdq $0x11, CONSTANT, %xmm8
#endif
pxor %xmm5, %xmm1
pxor %xmm6, %xmm2
@ -149,8 +148,8 @@ loop_64:/* 64 bytes Full cache line folding */
#else
/* xmm8 unsupported for x32 */
movdqa %xmm4, %xmm5
PCLMULQDQ 00, CONSTANT, %xmm4
PCLMULQDQ 0x11, CONSTANT, %xmm5
pclmulqdq $0x00, CONSTANT, %xmm4
pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm4
#endif
@ -172,20 +171,20 @@ less_64:/* Folding cache line into 128bit */
prefetchnta (BUF)
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pclmulqdq $0x00, CONSTANT, %xmm1
pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm2, %xmm1
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pclmulqdq $0x00, CONSTANT, %xmm1
pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm3, %xmm1
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pclmulqdq $0x00, CONSTANT, %xmm1
pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm4, %xmm1
@ -193,8 +192,8 @@ less_64:/* Folding cache line into 128bit */
jb fold_64
loop_16:/* Folding rest buffer into 128bit */
movdqa %xmm1, %xmm5
PCLMULQDQ 0x00, CONSTANT, %xmm1
PCLMULQDQ 0x11, CONSTANT, %xmm5
pclmulqdq $0x00, CONSTANT, %xmm1
pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor (BUF), %xmm1
sub $0x10, LEN
@ -205,7 +204,7 @@ loop_16:/* Folding rest buffer into 128bit */
fold_64:
/* perform the last 64 bit fold, also adds 32 zeroes
* to the input stream */
PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
psrldq $0x08, %xmm1
pxor CONSTANT, %xmm1
@ -220,7 +219,7 @@ fold_64:
#endif
psrldq $0x04, %xmm2
pand %xmm3, %xmm1
PCLMULQDQ 0x00, CONSTANT, %xmm1
pclmulqdq $0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
@ -231,11 +230,11 @@ fold_64:
#endif
movdqa %xmm1, %xmm2
pand %xmm3, %xmm1
PCLMULQDQ 0x10, CONSTANT, %xmm1
pclmulqdq $0x10, CONSTANT, %xmm1
pand %xmm3, %xmm1
PCLMULQDQ 0x00, CONSTANT, %xmm1
pclmulqdq $0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1
PEXTRD 0x01, %xmm1, %eax
pextrd $0x01, %xmm1, %eax
ret
SYM_FUNC_END(crc32_pclmul_le_16)

View File

@ -43,7 +43,6 @@
* SOFTWARE.
*/
#include <asm/inst.h>
#include <linux/linkage.h>
#include <asm/nospec-branch.h>
@ -225,10 +224,10 @@ LABEL crc_ %i
subq %rax, tmp # tmp -= rax*24
movq crc_init, %xmm1 # CRC for block 1
PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2
pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2
movq crc1, %xmm2 # CRC for block 2
PCLMULQDQ 0x10, %xmm0, %xmm2 # Multiply by K1
pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1
pxor %xmm2,%xmm1
movq %xmm1, %rax

View File

@ -14,7 +14,6 @@
*/
#include <linux/linkage.h>
#include <asm/inst.h>
#include <asm/frame.h>
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
@ -51,9 +50,9 @@ SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
pxor DATA, T2
pxor SHASH, T3
PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0
PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1
PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0)
pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0
pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1
pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
pxor DATA, T2
pxor T1, T2 # T2 = a0 * b1 + a1 * b0
@ -95,9 +94,9 @@ SYM_FUNC_START(clmul_ghash_mul)
movups (%rdi), DATA
movups (%rsi), SHASH
movaps .Lbswap_mask, BSWAP
PSHUFB_XMM BSWAP DATA
pshufb BSWAP, DATA
call __clmul_gf128mul_ble
PSHUFB_XMM BSWAP DATA
pshufb BSWAP, DATA
movups DATA, (%rdi)
FRAME_END
ret
@ -114,18 +113,18 @@ SYM_FUNC_START(clmul_ghash_update)
movaps .Lbswap_mask, BSWAP
movups (%rdi), DATA
movups (%rcx), SHASH
PSHUFB_XMM BSWAP DATA
pshufb BSWAP, DATA
.align 4
.Lupdate_loop:
movups (%rsi), IN1
PSHUFB_XMM BSWAP IN1
pshufb BSWAP, IN1
pxor IN1, DATA
call __clmul_gf128mul_ble
sub $16, %rdx
add $16, %rsi
cmp $16, %rdx
jge .Lupdate_loop
PSHUFB_XMM BSWAP DATA
pshufb BSWAP, DATA
movups DATA, (%rdi)
.Lupdate_just_ret:
FRAME_END