2019-05-30 02:57:49 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2008-08-07 05:57:03 +04:00
/*
* Using hardware provided CRC32 instruction to accelerate the CRC32 disposal .
* CRC32C polynomial : 0x1EDC6F41 ( BE ) / 0x82F63B78 ( LE )
* CRC32 is a new instruction in Intel SSE4 .2 , the reference can be found at :
* http : //www.intel.com/products/processor/manuals/
* Intel ( R ) 64 and IA - 32 Architectures Software Developer ' s Manual
* Volume 2 A : Instruction Set Reference , A - M
*
2008-10-31 11:52:58 +03:00
* Copyright ( C ) 2008 Intel Corporation
* Authors : Austin Zhang < austin_zhang @ linux . intel . com >
* Kent Liu < kent . liu @ intel . com >
2008-08-07 05:57:03 +04:00
*/
# include <linux/init.h>
# include <linux/module.h>
# include <linux/string.h>
# include <linux/kernel.h>
# include <crypto/internal/hash.h>
2019-03-13 08:12:48 +03:00
# include <crypto/internal/simd.h>
2008-08-07 05:57:03 +04:00
2016-01-27 00:12:04 +03:00
# include <asm/cpufeatures.h>
2012-01-26 03:09:06 +04:00
# include <asm/cpu_device_id.h>
2019-03-13 08:12:48 +03:00
# include <asm/simd.h>
2008-08-07 05:57:03 +04:00
# define CHKSUM_BLOCK_SIZE 1
# define CHKSUM_DIGEST_SIZE 4
# define SCALE_F sizeof(unsigned long)
# ifdef CONFIG_X86_64
crypto: x86/crc32c-intel - Use CRC32 mnemonic
Current minimum required version of binutils is 2.23,
which supports CRC32 instruction mnemonic.
Replace the byte-wise specification of CRC32 with this proper mnemonic.
The compiler is now able to pass memory operand to the instruction,
so there is no need for a temporary register anymore.
Some examples of the improvement:
12a: 48 8b 08 mov (%rax),%rcx
12d: f2 48 0f 38 f1 f1 crc32q %rcx,%rsi
133: 48 83 c0 08 add $0x8,%rax
137: 48 39 d0 cmp %rdx,%rax
13a: 75 ee jne 12a <crc32c_intel_update+0x1a>
to:
125: f2 48 0f 38 f1 06 crc32q (%rsi),%rax
12b: 48 83 c6 08 add $0x8,%rsi
12f: 48 39 d6 cmp %rdx,%rsi
132: 75 f1 jne 125 <crc32c_intel_update+0x15>
and:
146: 0f b6 08 movzbl (%rax),%ecx
149: f2 0f 38 f0 f1 crc32b %cl,%esi
14e: 48 83 c0 01 add $0x1,%rax
152: 48 39 d0 cmp %rdx,%rax
155: 75 ef jne 146 <crc32c_intel_update+0x36>
to:
13b: f2 0f 38 f0 02 crc32b (%rdx),%eax
140: 48 83 c2 01 add $0x1,%rdx
144: 48 39 ca cmp %rcx,%rdx
147: 75 f2 jne 13b <crc32c_intel_update+0x2b>
As the compiler has some more freedom w.r.t. register allocation,
there is also a couple of reg-reg moves removed.
There are no hidden states for CRC32 insn, so there is no need to mark
assembly as volatile.
v2: Introduce CRC32_INST define.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-08-05 14:17:29 +03:00
# define CRC32_INST "crc32q %1, %q0"
2008-08-07 05:57:03 +04:00
# else
crypto: x86/crc32c-intel - Use CRC32 mnemonic
Current minimum required version of binutils is 2.23,
which supports CRC32 instruction mnemonic.
Replace the byte-wise specification of CRC32 with this proper mnemonic.
The compiler is now able to pass memory operand to the instruction,
so there is no need for a temporary register anymore.
Some examples of the improvement:
12a: 48 8b 08 mov (%rax),%rcx
12d: f2 48 0f 38 f1 f1 crc32q %rcx,%rsi
133: 48 83 c0 08 add $0x8,%rax
137: 48 39 d0 cmp %rdx,%rax
13a: 75 ee jne 12a <crc32c_intel_update+0x1a>
to:
125: f2 48 0f 38 f1 06 crc32q (%rsi),%rax
12b: 48 83 c6 08 add $0x8,%rsi
12f: 48 39 d6 cmp %rdx,%rsi
132: 75 f1 jne 125 <crc32c_intel_update+0x15>
and:
146: 0f b6 08 movzbl (%rax),%ecx
149: f2 0f 38 f0 f1 crc32b %cl,%esi
14e: 48 83 c0 01 add $0x1,%rax
152: 48 39 d0 cmp %rdx,%rax
155: 75 ef jne 146 <crc32c_intel_update+0x36>
to:
13b: f2 0f 38 f0 02 crc32b (%rdx),%eax
140: 48 83 c2 01 add $0x1,%rdx
144: 48 39 ca cmp %rcx,%rdx
147: 75 f2 jne 13b <crc32c_intel_update+0x2b>
As the compiler has some more freedom w.r.t. register allocation,
there is also a couple of reg-reg moves removed.
There are no hidden states for CRC32 insn, so there is no need to mark
assembly as volatile.
v2: Introduce CRC32_INST define.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-08-05 14:17:29 +03:00
# define CRC32_INST "crc32l %1, %0"
2008-08-07 05:57:03 +04:00
# endif
2012-09-28 02:44:22 +04:00
# ifdef CONFIG_X86_64
/*
* use carryless multiply version of crc32c when buffer
2016-10-05 03:34:33 +03:00
* size is > = 512 to account
2012-09-28 02:44:22 +04:00
* for fpu state save / restore overhead .
*/
2016-10-05 03:34:33 +03:00
# define CRC32C_PCL_BREAKEVEN 512
2012-09-28 02:44:22 +04:00
asmlinkage unsigned int crc_pcl ( const u8 * buffer , int len ,
unsigned int crc_init ) ;
# endif /* CONFIG_X86_64 */
2008-08-07 05:57:03 +04:00
static u32 crc32c_intel_le_hw_byte ( u32 crc , unsigned char const * data , size_t length )
{
while ( length - - ) {
crypto: x86/crc32c-intel - Use CRC32 mnemonic
Current minimum required version of binutils is 2.23,
which supports CRC32 instruction mnemonic.
Replace the byte-wise specification of CRC32 with this proper mnemonic.
The compiler is now able to pass memory operand to the instruction,
so there is no need for a temporary register anymore.
Some examples of the improvement:
12a: 48 8b 08 mov (%rax),%rcx
12d: f2 48 0f 38 f1 f1 crc32q %rcx,%rsi
133: 48 83 c0 08 add $0x8,%rax
137: 48 39 d0 cmp %rdx,%rax
13a: 75 ee jne 12a <crc32c_intel_update+0x1a>
to:
125: f2 48 0f 38 f1 06 crc32q (%rsi),%rax
12b: 48 83 c6 08 add $0x8,%rsi
12f: 48 39 d6 cmp %rdx,%rsi
132: 75 f1 jne 125 <crc32c_intel_update+0x15>
and:
146: 0f b6 08 movzbl (%rax),%ecx
149: f2 0f 38 f0 f1 crc32b %cl,%esi
14e: 48 83 c0 01 add $0x1,%rax
152: 48 39 d0 cmp %rdx,%rax
155: 75 ef jne 146 <crc32c_intel_update+0x36>
to:
13b: f2 0f 38 f0 02 crc32b (%rdx),%eax
140: 48 83 c2 01 add $0x1,%rdx
144: 48 39 ca cmp %rcx,%rdx
147: 75 f2 jne 13b <crc32c_intel_update+0x2b>
As the compiler has some more freedom w.r.t. register allocation,
there is also a couple of reg-reg moves removed.
There are no hidden states for CRC32 insn, so there is no need to mark
assembly as volatile.
v2: Introduce CRC32_INST define.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-08-05 14:17:29 +03:00
asm ( " crc32b %1, %0 "
: " +r " ( crc ) : " rm " ( * data ) ) ;
2008-08-07 05:57:03 +04:00
data + + ;
}
return crc ;
}
static u32 __pure crc32c_intel_le_hw ( u32 crc , unsigned char const * p , size_t len )
{
unsigned int iquotient = len / SCALE_F ;
unsigned int iremainder = len % SCALE_F ;
unsigned long * ptmp = ( unsigned long * ) p ;
while ( iquotient - - ) {
crypto: x86/crc32c-intel - Use CRC32 mnemonic
Current minimum required version of binutils is 2.23,
which supports CRC32 instruction mnemonic.
Replace the byte-wise specification of CRC32 with this proper mnemonic.
The compiler is now able to pass memory operand to the instruction,
so there is no need for a temporary register anymore.
Some examples of the improvement:
12a: 48 8b 08 mov (%rax),%rcx
12d: f2 48 0f 38 f1 f1 crc32q %rcx,%rsi
133: 48 83 c0 08 add $0x8,%rax
137: 48 39 d0 cmp %rdx,%rax
13a: 75 ee jne 12a <crc32c_intel_update+0x1a>
to:
125: f2 48 0f 38 f1 06 crc32q (%rsi),%rax
12b: 48 83 c6 08 add $0x8,%rsi
12f: 48 39 d6 cmp %rdx,%rsi
132: 75 f1 jne 125 <crc32c_intel_update+0x15>
and:
146: 0f b6 08 movzbl (%rax),%ecx
149: f2 0f 38 f0 f1 crc32b %cl,%esi
14e: 48 83 c0 01 add $0x1,%rax
152: 48 39 d0 cmp %rdx,%rax
155: 75 ef jne 146 <crc32c_intel_update+0x36>
to:
13b: f2 0f 38 f0 02 crc32b (%rdx),%eax
140: 48 83 c2 01 add $0x1,%rdx
144: 48 39 ca cmp %rcx,%rdx
147: 75 f2 jne 13b <crc32c_intel_update+0x2b>
As the compiler has some more freedom w.r.t. register allocation,
there is also a couple of reg-reg moves removed.
There are no hidden states for CRC32 insn, so there is no need to mark
assembly as volatile.
v2: Introduce CRC32_INST define.
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: "David S. Miller" <davem@davemloft.net>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-08-05 14:17:29 +03:00
asm ( CRC32_INST
: " +r " ( crc ) : " rm " ( * ptmp ) ) ;
2008-08-07 05:57:03 +04:00
ptmp + + ;
}
if ( iremainder )
crc = crc32c_intel_le_hw_byte ( crc , ( unsigned char * ) ptmp ,
iremainder ) ;
return crc ;
}
/*
* Setting the seed allows arbitrary accumulators and flexible XOR policy
* If your algorithm starts with ~ 0 , then XOR with ~ 0 before you set
* the seed .
*/
2008-11-06 11:56:41 +03:00
static int crc32c_intel_setkey ( struct crypto_shash * hash , const u8 * key ,
2008-08-07 05:57:03 +04:00
unsigned int keylen )
{
2008-11-06 11:56:41 +03:00
u32 * mctx = crypto_shash_ctx ( hash ) ;
2008-08-07 05:57:03 +04:00
crypto: remove CRYPTO_TFM_RES_BAD_KEY_LEN
The CRYPTO_TFM_RES_BAD_KEY_LEN flag was apparently meant as a way to
make the ->setkey() functions provide more information about errors.
However, no one actually checks for this flag, which makes it pointless.
Also, many algorithms fail to set this flag when given a bad length key.
Reviewing just the generic implementations, this is the case for
aes-fixed-time, cbcmac, echainiv, nhpoly1305, pcrypt, rfc3686, rfc4309,
rfc7539, rfc7539esp, salsa20, seqiv, and xcbc. But there are probably
many more in arch/*/crypto/ and drivers/crypto/.
Some algorithms can even set this flag when the key is the correct
length. For example, authenc and authencesn set it when the key payload
is malformed in any way (not just a bad length), the atmel-sha and ccree
drivers can set it if a memory allocation fails, and the chelsio driver
sets it for bad auth tag lengths, not just bad key lengths.
So even if someone actually wanted to start checking this flag (which
seems unlikely, since it's been unused for a long time), there would be
a lot of work needed to get it working correctly. But it would probably
be much better to go back to the drawing board and just define different
return values, like -EINVAL if the key is invalid for the algorithm vs.
-EKEYREJECTED if the key was rejected by a policy like "no weak keys".
That would be much simpler, less error-prone, and easier to test.
So just remove this flag.
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2019-12-31 06:19:36 +03:00
if ( keylen ! = sizeof ( u32 ) )
2008-08-07 05:57:03 +04:00
return - EINVAL ;
* mctx = le32_to_cpup ( ( __le32 * ) key ) ;
return 0 ;
}
2008-11-06 11:56:41 +03:00
static int crc32c_intel_init ( struct shash_desc * desc )
2008-08-07 05:57:03 +04:00
{
2008-11-06 11:56:41 +03:00
u32 * mctx = crypto_shash_ctx ( desc - > tfm ) ;
u32 * crcp = shash_desc_ctx ( desc ) ;
2008-08-07 05:57:03 +04:00
* crcp = * mctx ;
return 0 ;
}
2008-11-06 11:56:41 +03:00
static int crc32c_intel_update ( struct shash_desc * desc , const u8 * data ,
unsigned int len )
2008-08-07 05:57:03 +04:00
{
2008-11-06 11:56:41 +03:00
u32 * crcp = shash_desc_ctx ( desc ) ;
2008-08-07 05:57:03 +04:00
2008-11-06 11:56:41 +03:00
* crcp = crc32c_intel_le_hw ( * crcp , data , len ) ;
2008-08-07 05:57:03 +04:00
return 0 ;
}
2008-11-06 11:56:41 +03:00
static int __crc32c_intel_finup ( u32 * crcp , const u8 * data , unsigned int len ,
u8 * out )
2008-08-07 05:57:03 +04:00
{
2008-11-06 11:56:41 +03:00
* ( __le32 * ) out = ~ cpu_to_le32 ( crc32c_intel_le_hw ( * crcp , data , len ) ) ;
2008-08-07 05:57:03 +04:00
return 0 ;
}
2008-11-06 11:56:41 +03:00
static int crc32c_intel_finup ( struct shash_desc * desc , const u8 * data ,
unsigned int len , u8 * out )
2008-08-07 05:57:03 +04:00
{
2008-11-06 11:56:41 +03:00
return __crc32c_intel_finup ( shash_desc_ctx ( desc ) , data , len , out ) ;
}
2008-08-07 05:57:03 +04:00
2008-11-06 11:56:41 +03:00
static int crc32c_intel_final ( struct shash_desc * desc , u8 * out )
{
u32 * crcp = shash_desc_ctx ( desc ) ;
2008-08-07 05:57:03 +04:00
2008-11-06 11:56:41 +03:00
* ( __le32 * ) out = ~ cpu_to_le32p ( crcp ) ;
2008-08-07 05:57:03 +04:00
return 0 ;
}
2008-11-06 11:56:41 +03:00
static int crc32c_intel_digest ( struct shash_desc * desc , const u8 * data ,
unsigned int len , u8 * out )
{
return __crc32c_intel_finup ( crypto_shash_ctx ( desc - > tfm ) , data , len ,
out ) ;
}
2008-08-07 05:57:03 +04:00
static int crc32c_intel_cra_init ( struct crypto_tfm * tfm )
{
u32 * key = crypto_tfm_ctx ( tfm ) ;
* key = ~ 0 ;
return 0 ;
}
2012-09-28 02:44:22 +04:00
# ifdef CONFIG_X86_64
static int crc32c_pcl_intel_update ( struct shash_desc * desc , const u8 * data ,
unsigned int len )
{
u32 * crcp = shash_desc_ctx ( desc ) ;
/*
* use faster PCL version if datasize is large enough to
* overcome kernel fpu state save / restore overhead
*/
2019-03-13 08:12:48 +03:00
if ( len > = CRC32C_PCL_BREAKEVEN & & crypto_simd_usable ( ) ) {
2012-09-28 02:44:22 +04:00
kernel_fpu_begin ( ) ;
* crcp = crc_pcl ( data , len , * crcp ) ;
kernel_fpu_end ( ) ;
} else
* crcp = crc32c_intel_le_hw ( * crcp , data , len ) ;
return 0 ;
}
static int __crc32c_pcl_intel_finup ( u32 * crcp , const u8 * data , unsigned int len ,
u8 * out )
{
2019-03-13 08:12:48 +03:00
if ( len > = CRC32C_PCL_BREAKEVEN & & crypto_simd_usable ( ) ) {
2012-09-28 02:44:22 +04:00
kernel_fpu_begin ( ) ;
* ( __le32 * ) out = ~ cpu_to_le32 ( crc_pcl ( data , len , * crcp ) ) ;
kernel_fpu_end ( ) ;
} else
* ( __le32 * ) out =
~ cpu_to_le32 ( crc32c_intel_le_hw ( * crcp , data , len ) ) ;
return 0 ;
}
static int crc32c_pcl_intel_finup ( struct shash_desc * desc , const u8 * data ,
unsigned int len , u8 * out )
{
return __crc32c_pcl_intel_finup ( shash_desc_ctx ( desc ) , data , len , out ) ;
}
static int crc32c_pcl_intel_digest ( struct shash_desc * desc , const u8 * data ,
unsigned int len , u8 * out )
{
return __crc32c_pcl_intel_finup ( crypto_shash_ctx ( desc - > tfm ) , data , len ,
out ) ;
}
# endif /* CONFIG_X86_64 */
2008-11-06 11:56:41 +03:00
static struct shash_alg alg = {
. setkey = crc32c_intel_setkey ,
. init = crc32c_intel_init ,
. update = crc32c_intel_update ,
. final = crc32c_intel_final ,
. finup = crc32c_intel_finup ,
. digest = crc32c_intel_digest ,
. descsize = sizeof ( u32 ) ,
. digestsize = CHKSUM_DIGEST_SIZE ,
. base = {
. cra_name = " crc32c " ,
. cra_driver_name = " crc32c-intel " ,
. cra_priority = 200 ,
2018-01-03 22:16:26 +03:00
. cra_flags = CRYPTO_ALG_OPTIONAL_KEY ,
2008-11-06 11:56:41 +03:00
. cra_blocksize = CHKSUM_BLOCK_SIZE ,
. cra_ctxsize = sizeof ( u32 ) ,
. cra_module = THIS_MODULE ,
. cra_init = crc32c_intel_cra_init ,
2008-08-07 05:57:03 +04:00
}
} ;
2012-01-26 03:09:06 +04:00
static const struct x86_cpu_id crc32c_cpu_id [ ] = {
2020-03-20 16:14:05 +03:00
X86_MATCH_FEATURE ( X86_FEATURE_XMM4_2 , NULL ) ,
2012-01-26 03:09:06 +04:00
{ }
} ;
MODULE_DEVICE_TABLE ( x86cpu , crc32c_cpu_id ) ;
2008-08-07 05:57:03 +04:00
static int __init crc32c_intel_mod_init ( void )
{
2012-01-26 03:09:06 +04:00
if ( ! x86_match_cpu ( crc32c_cpu_id ) )
2008-08-07 05:57:03 +04:00
return - ENODEV ;
2012-09-28 02:44:22 +04:00
# ifdef CONFIG_X86_64
2015-12-07 12:39:41 +03:00
if ( boot_cpu_has ( X86_FEATURE_PCLMULQDQ ) ) {
2012-09-28 02:44:22 +04:00
alg . update = crc32c_pcl_intel_update ;
alg . finup = crc32c_pcl_intel_finup ;
alg . digest = crc32c_pcl_intel_digest ;
}
# endif
2012-01-26 03:09:06 +04:00
return crypto_register_shash ( & alg ) ;
2008-08-07 05:57:03 +04:00
}
static void __exit crc32c_intel_mod_fini ( void )
{
2008-11-06 11:56:41 +03:00
crypto_unregister_shash ( & alg ) ;
2008-08-07 05:57:03 +04:00
}
module_init ( crc32c_intel_mod_init ) ;
module_exit ( crc32c_intel_mod_fini ) ;
MODULE_AUTHOR ( " Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com> " ) ;
MODULE_DESCRIPTION ( " CRC32c (Castagnoli) optimization using Intel Hardware. " ) ;
MODULE_LICENSE ( " GPL " ) ;
2014-11-21 04:05:53 +03:00
MODULE_ALIAS_CRYPTO ( " crc32c " ) ;
MODULE_ALIAS_CRYPTO ( " crc32c-intel " ) ;