2019-11-08 13:22:14 +01:00
// SPDX-License-Identifier: GPL-2.0
/*
* ARM NEON accelerated ChaCha and XChaCha stream ciphers ,
* including ChaCha20 ( RFC7539 )
*
* Copyright ( C ) 2016 - 2019 Linaro , Ltd . < ard . biesheuvel @ linaro . org >
* Copyright ( C ) 2015 Martin Willi
*/
# include <crypto/algapi.h>
# include <crypto/internal/chacha.h>
# include <crypto/internal/simd.h>
# include <crypto/internal/skcipher.h>
2019-11-08 13:22:15 +01:00
# include <linux/jump_label.h>
2019-11-08 13:22:14 +01:00
# include <linux/kernel.h>
# include <linux/module.h>
# include <asm/cputype.h>
# include <asm/hwcap.h>
# include <asm/neon.h>
# include <asm/simd.h>
asmlinkage void chacha_block_xor_neon ( const u32 * state , u8 * dst , const u8 * src ,
int nrounds ) ;
asmlinkage void chacha_4block_xor_neon ( const u32 * state , u8 * dst , const u8 * src ,
int nrounds ) ;
asmlinkage void hchacha_block_arm ( const u32 * state , u32 * out , int nrounds ) ;
asmlinkage void hchacha_block_neon ( const u32 * state , u32 * out , int nrounds ) ;
asmlinkage void chacha_doarm ( u8 * dst , const u8 * src , unsigned int bytes ,
const u32 * state , int nrounds ) ;
2019-11-08 13:22:15 +01:00
static __ro_after_init DEFINE_STATIC_KEY_FALSE ( use_neon ) ;
2019-11-08 13:22:14 +01:00
static inline bool neon_usable ( void )
{
2019-11-08 13:22:15 +01:00
return static_branch_likely ( & use_neon ) & & crypto_simd_usable ( ) ;
2019-11-08 13:22:14 +01:00
}
static void chacha_doneon ( u32 * state , u8 * dst , const u8 * src ,
unsigned int bytes , int nrounds )
{
u8 buf [ CHACHA_BLOCK_SIZE ] ;
while ( bytes > = CHACHA_BLOCK_SIZE * 4 ) {
chacha_4block_xor_neon ( state , dst , src , nrounds ) ;
bytes - = CHACHA_BLOCK_SIZE * 4 ;
src + = CHACHA_BLOCK_SIZE * 4 ;
dst + = CHACHA_BLOCK_SIZE * 4 ;
state [ 12 ] + = 4 ;
}
while ( bytes > = CHACHA_BLOCK_SIZE ) {
chacha_block_xor_neon ( state , dst , src , nrounds ) ;
bytes - = CHACHA_BLOCK_SIZE ;
src + = CHACHA_BLOCK_SIZE ;
dst + = CHACHA_BLOCK_SIZE ;
state [ 12 ] + + ;
}
if ( bytes ) {
memcpy ( buf , src , bytes ) ;
chacha_block_xor_neon ( state , buf , buf , nrounds ) ;
memcpy ( dst , buf , bytes ) ;
}
}
2019-11-08 13:22:15 +01:00
void hchacha_block_arch ( const u32 * state , u32 * stream , int nrounds )
{
if ( ! IS_ENABLED ( CONFIG_KERNEL_MODE_NEON ) | | ! neon_usable ( ) ) {
hchacha_block_arm ( state , stream , nrounds ) ;
} else {
kernel_neon_begin ( ) ;
hchacha_block_neon ( state , stream , nrounds ) ;
kernel_neon_end ( ) ;
}
}
EXPORT_SYMBOL ( hchacha_block_arch ) ;
void chacha_init_arch ( u32 * state , const u32 * key , const u8 * iv )
{
chacha_init_generic ( state , key , iv ) ;
}
EXPORT_SYMBOL ( chacha_init_arch ) ;
void chacha_crypt_arch ( u32 * state , u8 * dst , const u8 * src , unsigned int bytes ,
int nrounds )
{
if ( ! IS_ENABLED ( CONFIG_KERNEL_MODE_NEON ) | | ! neon_usable ( ) | |
bytes < = CHACHA_BLOCK_SIZE ) {
chacha_doarm ( dst , src , bytes , state , nrounds ) ;
state [ 12 ] + = DIV_ROUND_UP ( bytes , CHACHA_BLOCK_SIZE ) ;
return ;
}
crypto: arch/lib - limit simd usage to 4k chunks
The initial Zinc patchset, after some mailing list discussion, contained
code to ensure that kernel_fpu_enable would not be kept on for more than
a 4k chunk, since it disables preemption. The choice of 4k isn't totally
scientific, but it's not a bad guess either, and it's what's used in
both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form
of PAGE_SIZE, which this commit corrects to be explicitly 4k for the
former two).
Ard did some back of the envelope calculations and found that
at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k
means we have a maximum preemption disabling of 20us, which Sebastian
confirmed was probably a good limit.
Unfortunately the chunking appears to have been left out of the final
patchset that added the glue code. So, this commit adds it back in.
Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function")
Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel")
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation")
Cc: Eric Biggers <ebiggers@google.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-04-22 17:18:53 -06:00
do {
unsigned int todo = min_t ( unsigned int , bytes , SZ_4K ) ;
kernel_neon_begin ( ) ;
chacha_doneon ( state , dst , src , todo , nrounds ) ;
kernel_neon_end ( ) ;
bytes - = todo ;
src + = todo ;
dst + = todo ;
} while ( bytes ) ;
2019-11-08 13:22:15 +01:00
}
EXPORT_SYMBOL ( chacha_crypt_arch ) ;
2019-11-08 13:22:14 +01:00
static int chacha_stream_xor ( struct skcipher_request * req ,
const struct chacha_ctx * ctx , const u8 * iv ,
bool neon )
{
struct skcipher_walk walk ;
u32 state [ 16 ] ;
int err ;
err = skcipher_walk_virt ( & walk , req , false ) ;
chacha_init_generic ( state , ctx - > key , iv ) ;
while ( walk . nbytes > 0 ) {
unsigned int nbytes = walk . nbytes ;
if ( nbytes < walk . total )
nbytes = round_down ( nbytes , walk . stride ) ;
2020-01-17 17:43:18 +01:00
if ( ! IS_ENABLED ( CONFIG_KERNEL_MODE_NEON ) | | ! neon ) {
2019-11-08 13:22:14 +01:00
chacha_doarm ( walk . dst . virt . addr , walk . src . virt . addr ,
nbytes , state , ctx - > nrounds ) ;
state [ 12 ] + = DIV_ROUND_UP ( nbytes , CHACHA_BLOCK_SIZE ) ;
} else {
kernel_neon_begin ( ) ;
chacha_doneon ( state , walk . dst . virt . addr ,
walk . src . virt . addr , nbytes , ctx - > nrounds ) ;
kernel_neon_end ( ) ;
}
err = skcipher_walk_done ( & walk , walk . nbytes - nbytes ) ;
}
return err ;
}
static int do_chacha ( struct skcipher_request * req , bool neon )
{
struct crypto_skcipher * tfm = crypto_skcipher_reqtfm ( req ) ;
struct chacha_ctx * ctx = crypto_skcipher_ctx ( tfm ) ;
return chacha_stream_xor ( req , ctx , req - > iv , neon ) ;
}
static int chacha_arm ( struct skcipher_request * req )
{
return do_chacha ( req , false ) ;
}
static int chacha_neon ( struct skcipher_request * req )
{
return do_chacha ( req , neon_usable ( ) ) ;
}
static int do_xchacha ( struct skcipher_request * req , bool neon )
{
struct crypto_skcipher * tfm = crypto_skcipher_reqtfm ( req ) ;
struct chacha_ctx * ctx = crypto_skcipher_ctx ( tfm ) ;
struct chacha_ctx subctx ;
u32 state [ 16 ] ;
u8 real_iv [ 16 ] ;
chacha_init_generic ( state , ctx - > key , req - > iv ) ;
2020-01-17 17:43:18 +01:00
if ( ! IS_ENABLED ( CONFIG_KERNEL_MODE_NEON ) | | ! neon ) {
2019-11-08 13:22:14 +01:00
hchacha_block_arm ( state , subctx . key , ctx - > nrounds ) ;
} else {
kernel_neon_begin ( ) ;
hchacha_block_neon ( state , subctx . key , ctx - > nrounds ) ;
kernel_neon_end ( ) ;
}
subctx . nrounds = ctx - > nrounds ;
memcpy ( & real_iv [ 0 ] , req - > iv + 24 , 8 ) ;
memcpy ( & real_iv [ 8 ] , req - > iv + 16 , 8 ) ;
return chacha_stream_xor ( req , & subctx , real_iv , neon ) ;
}
static int xchacha_arm ( struct skcipher_request * req )
{
return do_xchacha ( req , false ) ;
}
static int xchacha_neon ( struct skcipher_request * req )
{
return do_xchacha ( req , neon_usable ( ) ) ;
}
static struct skcipher_alg arm_algs [ ] = {
{
. base . cra_name = " chacha20 " ,
. base . cra_driver_name = " chacha20-arm " ,
. base . cra_priority = 200 ,
. base . cra_blocksize = 1 ,
. base . cra_ctxsize = sizeof ( struct chacha_ctx ) ,
. base . cra_module = THIS_MODULE ,
. min_keysize = CHACHA_KEY_SIZE ,
. max_keysize = CHACHA_KEY_SIZE ,
. ivsize = CHACHA_IV_SIZE ,
. chunksize = CHACHA_BLOCK_SIZE ,
. setkey = chacha20_setkey ,
. encrypt = chacha_arm ,
. decrypt = chacha_arm ,
} , {
. base . cra_name = " xchacha20 " ,
. base . cra_driver_name = " xchacha20-arm " ,
. base . cra_priority = 200 ,
. base . cra_blocksize = 1 ,
. base . cra_ctxsize = sizeof ( struct chacha_ctx ) ,
. base . cra_module = THIS_MODULE ,
. min_keysize = CHACHA_KEY_SIZE ,
. max_keysize = CHACHA_KEY_SIZE ,
. ivsize = XCHACHA_IV_SIZE ,
. chunksize = CHACHA_BLOCK_SIZE ,
. setkey = chacha20_setkey ,
. encrypt = xchacha_arm ,
. decrypt = xchacha_arm ,
} , {
. base . cra_name = " xchacha12 " ,
. base . cra_driver_name = " xchacha12-arm " ,
. base . cra_priority = 200 ,
. base . cra_blocksize = 1 ,
. base . cra_ctxsize = sizeof ( struct chacha_ctx ) ,
. base . cra_module = THIS_MODULE ,
. min_keysize = CHACHA_KEY_SIZE ,
. max_keysize = CHACHA_KEY_SIZE ,
. ivsize = XCHACHA_IV_SIZE ,
. chunksize = CHACHA_BLOCK_SIZE ,
. setkey = chacha12_setkey ,
. encrypt = xchacha_arm ,
. decrypt = xchacha_arm ,
} ,
} ;
static struct skcipher_alg neon_algs [ ] = {
{
. base . cra_name = " chacha20 " ,
. base . cra_driver_name = " chacha20-neon " ,
. base . cra_priority = 300 ,
. base . cra_blocksize = 1 ,
. base . cra_ctxsize = sizeof ( struct chacha_ctx ) ,
. base . cra_module = THIS_MODULE ,
. min_keysize = CHACHA_KEY_SIZE ,
. max_keysize = CHACHA_KEY_SIZE ,
. ivsize = CHACHA_IV_SIZE ,
. chunksize = CHACHA_BLOCK_SIZE ,
. walksize = 4 * CHACHA_BLOCK_SIZE ,
. setkey = chacha20_setkey ,
. encrypt = chacha_neon ,
. decrypt = chacha_neon ,
} , {
. base . cra_name = " xchacha20 " ,
. base . cra_driver_name = " xchacha20-neon " ,
. base . cra_priority = 300 ,
. base . cra_blocksize = 1 ,
. base . cra_ctxsize = sizeof ( struct chacha_ctx ) ,
. base . cra_module = THIS_MODULE ,
. min_keysize = CHACHA_KEY_SIZE ,
. max_keysize = CHACHA_KEY_SIZE ,
. ivsize = XCHACHA_IV_SIZE ,
. chunksize = CHACHA_BLOCK_SIZE ,
. walksize = 4 * CHACHA_BLOCK_SIZE ,
. setkey = chacha20_setkey ,
. encrypt = xchacha_neon ,
. decrypt = xchacha_neon ,
} , {
. base . cra_name = " xchacha12 " ,
. base . cra_driver_name = " xchacha12-neon " ,
. base . cra_priority = 300 ,
. base . cra_blocksize = 1 ,
. base . cra_ctxsize = sizeof ( struct chacha_ctx ) ,
. base . cra_module = THIS_MODULE ,
. min_keysize = CHACHA_KEY_SIZE ,
. max_keysize = CHACHA_KEY_SIZE ,
. ivsize = XCHACHA_IV_SIZE ,
. chunksize = CHACHA_BLOCK_SIZE ,
. walksize = 4 * CHACHA_BLOCK_SIZE ,
. setkey = chacha12_setkey ,
. encrypt = xchacha_neon ,
. decrypt = xchacha_neon ,
}
} ;
static int __init chacha_simd_mod_init ( void )
{
2019-11-25 11:31:12 +01:00
int err = 0 ;
2019-11-08 13:22:14 +01:00
2019-11-25 11:31:12 +01:00
if ( IS_REACHABLE ( CONFIG_CRYPTO_SKCIPHER ) ) {
err = crypto_register_skciphers ( arm_algs , ARRAY_SIZE ( arm_algs ) ) ;
if ( err )
return err ;
}
2019-11-08 13:22:14 +01:00
if ( IS_ENABLED ( CONFIG_KERNEL_MODE_NEON ) & & ( elf_hwcap & HWCAP_NEON ) ) {
int i ;
switch ( read_cpuid_part ( ) ) {
case ARM_CPU_PART_CORTEX_A7 :
case ARM_CPU_PART_CORTEX_A5 :
/*
* The Cortex - A7 and Cortex - A5 do not perform well with
* the NEON implementation but do incredibly with the
* scalar one and use less power .
*/
for ( i = 0 ; i < ARRAY_SIZE ( neon_algs ) ; i + + )
neon_algs [ i ] . base . cra_priority = 0 ;
break ;
2019-11-08 13:22:15 +01:00
default :
static_branch_enable ( & use_neon ) ;
2019-11-08 13:22:14 +01:00
}
2019-11-25 11:31:12 +01:00
if ( IS_REACHABLE ( CONFIG_CRYPTO_SKCIPHER ) ) {
err = crypto_register_skciphers ( neon_algs , ARRAY_SIZE ( neon_algs ) ) ;
if ( err )
crypto_unregister_skciphers ( arm_algs , ARRAY_SIZE ( arm_algs ) ) ;
}
2019-11-08 13:22:14 +01:00
}
return err ;
}
static void __exit chacha_simd_mod_fini ( void )
{
2019-11-25 11:31:12 +01:00
if ( IS_REACHABLE ( CONFIG_CRYPTO_SKCIPHER ) ) {
crypto_unregister_skciphers ( arm_algs , ARRAY_SIZE ( arm_algs ) ) ;
if ( IS_ENABLED ( CONFIG_KERNEL_MODE_NEON ) & & ( elf_hwcap & HWCAP_NEON ) )
crypto_unregister_skciphers ( neon_algs , ARRAY_SIZE ( neon_algs ) ) ;
}
2019-11-08 13:22:14 +01:00
}
module_init ( chacha_simd_mod_init ) ;
module_exit ( chacha_simd_mod_fini ) ;
MODULE_DESCRIPTION ( " ChaCha and XChaCha stream ciphers (scalar and NEON accelerated) " ) ;
MODULE_AUTHOR ( " Ard Biesheuvel <ard.biesheuvel@linaro.org> " ) ;
MODULE_LICENSE ( " GPL v2 " ) ;
MODULE_ALIAS_CRYPTO ( " chacha20 " ) ;
MODULE_ALIAS_CRYPTO ( " chacha20-arm " ) ;
MODULE_ALIAS_CRYPTO ( " xchacha20 " ) ;
MODULE_ALIAS_CRYPTO ( " xchacha20-arm " ) ;
MODULE_ALIAS_CRYPTO ( " xchacha12 " ) ;
MODULE_ALIAS_CRYPTO ( " xchacha12-arm " ) ;
# ifdef CONFIG_KERNEL_MODE_NEON
MODULE_ALIAS_CRYPTO ( " chacha20-neon " ) ;
MODULE_ALIAS_CRYPTO ( " xchacha20-neon " ) ;
MODULE_ALIAS_CRYPTO ( " xchacha12-neon " ) ;
# endif