c970d42001
aria-avx512 implementation uses AVX512 and GFNI. It supports 64way parallel processing. So, byteslicing code is changed to support 64way parallel. And it exports some aria-avx2 functions such as encrypt() and decrypt(). AVX and AVX2 have 16 registers. They should use memory to store/load state because of lack of registers. But AVX512 supports 32 registers. So, it doesn't require store/load in the s-box layer. It means that it can reduce overhead of store/load in the s-box layer. Also code become much simpler. Benchmark with modprobe tcrypt mode=610 num_mb=8192, i3-12100: ARIA-AVX512(128bit and 256bit) testing speed of multibuffer ecb(aria) (ecb-aria-avx512) encryption tcrypt: 1 operation in 1504 cycles (1024 bytes) tcrypt: 1 operation in 4595 cycles (4096 bytes) tcrypt: 1 operation in 1763 cycles (1024 bytes) tcrypt: 1 operation in 5540 cycles (4096 bytes) testing speed of multibuffer ecb(aria) (ecb-aria-avx512) decryption tcrypt: 1 operation in 1502 cycles (1024 bytes) tcrypt: 1 operation in 4615 cycles (4096 bytes) tcrypt: 1 operation in 1759 cycles (1024 bytes) tcrypt: 1 operation in 5554 cycles (4096 bytes) ARIA-AVX2 with GFNI(128bit and 256bit) testing speed of multibuffer ecb(aria) (ecb-aria-avx2) encryption tcrypt: 1 operation in 2003 cycles (1024 bytes) tcrypt: 1 operation in 5867 cycles (4096 bytes) tcrypt: 1 operation in 2358 cycles (1024 bytes) tcrypt: 1 operation in 7295 cycles (4096 bytes) testing speed of multibuffer ecb(aria) (ecb-aria-avx2) decryption tcrypt: 1 operation in 2004 cycles (1024 bytes) tcrypt: 1 operation in 5956 cycles (4096 bytes) tcrypt: 1 operation in 2409 cycles (1024 bytes) tcrypt: 1 operation in 7564 cycles (4096 bytes) Signed-off-by: Taehee Yoo <ap420073@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
63 lines
2.6 KiB
C
63 lines
2.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
#ifndef ASM_X86_ARIA_AVX_H
|
|
#define ASM_X86_ARIA_AVX_H
|
|
|
|
#include <linux/types.h>
|
|
|
|
#define ARIA_AESNI_PARALLEL_BLOCKS 16
|
|
#define ARIA_AESNI_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_PARALLEL_BLOCKS)
|
|
|
|
#define ARIA_AESNI_AVX2_PARALLEL_BLOCKS 32
|
|
#define ARIA_AESNI_AVX2_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_AESNI_AVX2_PARALLEL_BLOCKS)
|
|
|
|
#define ARIA_GFNI_AVX512_PARALLEL_BLOCKS 64
|
|
#define ARIA_GFNI_AVX512_PARALLEL_BLOCK_SIZE (ARIA_BLOCK_SIZE * ARIA_GFNI_AVX512_PARALLEL_BLOCKS)
|
|
|
|
asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst,
|
|
const u8 *src);
|
|
asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst,
|
|
const u8 *src);
|
|
asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst,
|
|
const u8 *src,
|
|
u8 *keystream, u8 *iv);
|
|
asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst,
|
|
const u8 *src);
|
|
asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst,
|
|
const u8 *src);
|
|
asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst,
|
|
const u8 *src,
|
|
u8 *keystream, u8 *iv);
|
|
|
|
asmlinkage void aria_aesni_avx2_encrypt_32way(const void *ctx, u8 *dst,
|
|
const u8 *src);
|
|
asmlinkage void aria_aesni_avx2_decrypt_32way(const void *ctx, u8 *dst,
|
|
const u8 *src);
|
|
asmlinkage void aria_aesni_avx2_ctr_crypt_32way(const void *ctx, u8 *dst,
|
|
const u8 *src,
|
|
u8 *keystream, u8 *iv);
|
|
asmlinkage void aria_aesni_avx2_gfni_encrypt_32way(const void *ctx, u8 *dst,
|
|
const u8 *src);
|
|
asmlinkage void aria_aesni_avx2_gfni_decrypt_32way(const void *ctx, u8 *dst,
|
|
const u8 *src);
|
|
asmlinkage void aria_aesni_avx2_gfni_ctr_crypt_32way(const void *ctx, u8 *dst,
|
|
const u8 *src,
|
|
u8 *keystream, u8 *iv);
|
|
|
|
struct aria_avx_ops {
|
|
void (*aria_encrypt_16way)(const void *ctx, u8 *dst, const u8 *src);
|
|
void (*aria_decrypt_16way)(const void *ctx, u8 *dst, const u8 *src);
|
|
void (*aria_ctr_crypt_16way)(const void *ctx, u8 *dst, const u8 *src,
|
|
u8 *keystream, u8 *iv);
|
|
void (*aria_encrypt_32way)(const void *ctx, u8 *dst, const u8 *src);
|
|
void (*aria_decrypt_32way)(const void *ctx, u8 *dst, const u8 *src);
|
|
void (*aria_ctr_crypt_32way)(const void *ctx, u8 *dst, const u8 *src,
|
|
u8 *keystream, u8 *iv);
|
|
void (*aria_encrypt_64way)(const void *ctx, u8 *dst, const u8 *src);
|
|
void (*aria_decrypt_64way)(const void *ctx, u8 *dst, const u8 *src);
|
|
void (*aria_ctr_crypt_64way)(const void *ctx, u8 *dst, const u8 *src,
|
|
u8 *keystream, u8 *iv);
|
|
|
|
|
|
};
|
|
#endif
|