2017-01-11 19:41:49 +03:00
/*
2019-11-08 15:22:14 +03:00
* ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers ,
2018-12-04 06:52:51 +03:00
* including ChaCha20 ( RFC7539 )
2017-01-11 19:41:49 +03:00
*
2017-07-24 13:28:14 +03:00
* Copyright ( C ) 2016 - 2017 Linaro , Ltd . < ard . biesheuvel @ linaro . org >
2017-01-11 19:41:49 +03:00
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* Based on :
* ChaCha20 256 - bit cipher algorithm , RFC7539 , SIMD glue code
*
* Copyright ( C ) 2015 Martin Willi
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*/
# include <crypto/algapi.h>
2019-11-08 15:22:08 +03:00
# include <crypto/internal/chacha.h>
2019-03-13 08:12:50 +03:00
# include <crypto/internal/simd.h>
2017-01-11 19:41:49 +03:00
# include <crypto/internal/skcipher.h>
2019-11-08 15:22:12 +03:00
# include <linux/jump_label.h>
2017-01-11 19:41:49 +03:00
# include <linux/kernel.h>
# include <linux/module.h>
# include <asm/hwcap.h>
# include <asm/neon.h>
2017-07-24 13:28:14 +03:00
# include <asm/simd.h>
2017-01-11 19:41:49 +03:00
2018-12-04 06:52:51 +03:00
asmlinkage void chacha_block_xor_neon ( u32 * state , u8 * dst , const u8 * src ,
int nrounds ) ;
asmlinkage void chacha_4block_xor_neon ( u32 * state , u8 * dst , const u8 * src ,
2018-12-04 16:13:32 +03:00
int nrounds , int bytes ) ;
2018-12-04 06:52:51 +03:00
asmlinkage void hchacha_block_neon ( const u32 * state , u32 * out , int nrounds ) ;
2017-01-11 19:41:49 +03:00
2019-11-08 15:22:12 +03:00
static __ro_after_init DEFINE_STATIC_KEY_FALSE ( have_neon ) ;
2018-12-04 06:52:51 +03:00
static void chacha_doneon ( u32 * state , u8 * dst , const u8 * src ,
2018-12-04 16:13:32 +03:00
int bytes , int nrounds )
2017-01-11 19:41:49 +03:00
{
2018-12-04 16:13:32 +03:00
while ( bytes > 0 ) {
2018-12-04 16:13:33 +03:00
int l = min ( bytes , CHACHA_BLOCK_SIZE * 5 ) ;
if ( l < = CHACHA_BLOCK_SIZE ) {
u8 buf [ CHACHA_BLOCK_SIZE ] ;
memcpy ( buf , src , l ) ;
chacha_block_xor_neon ( state , buf , buf , nrounds ) ;
memcpy ( dst , buf , l ) ;
state [ 12 ] + = 1 ;
break ;
}
chacha_4block_xor_neon ( state , dst , src , nrounds , l ) ;
crypto: arm64/chacha - correctly walk through blocks
Prior, passing in chunks of 2, 3, or 4, followed by any additional
chunks would result in the chacha state counter getting out of sync,
resulting in incorrect encryption/decryption, which is a pretty nasty
crypto vuln: "why do images look weird on webpages?" WireGuard users
never experienced this prior, because we have always, out of tree, used
a different crypto library, until the recent Frankenzinc addition. This
commit fixes the issue by advancing the pointers and state counter by
the actual size processed. It also fixes up a bug in the (optional,
costly) stride test that prevented it from running on arm64.
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
Reported-and-tested-by: Emil Renner Berthing <kernel@esmil.dk>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: stable@vger.kernel.org # v5.5+
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-03-19 05:27:32 +03:00
bytes - = l ;
src + = l ;
dst + = l ;
state [ 12 ] + = DIV_ROUND_UP ( l , CHACHA_BLOCK_SIZE ) ;
2017-01-11 19:41:49 +03:00
}
}
2019-11-08 15:22:12 +03:00
void hchacha_block_arch ( const u32 * state , u32 * stream , int nrounds )
{
if ( ! static_branch_likely ( & have_neon ) | | ! crypto_simd_usable ( ) ) {
hchacha_block_generic ( state , stream , nrounds ) ;
} else {
kernel_neon_begin ( ) ;
hchacha_block_neon ( state , stream , nrounds ) ;
kernel_neon_end ( ) ;
}
}
EXPORT_SYMBOL ( hchacha_block_arch ) ;
void chacha_init_arch ( u32 * state , const u32 * key , const u8 * iv )
{
chacha_init_generic ( state , key , iv ) ;
}
EXPORT_SYMBOL ( chacha_init_arch ) ;
void chacha_crypt_arch ( u32 * state , u8 * dst , const u8 * src , unsigned int bytes ,
int nrounds )
{
if ( ! static_branch_likely ( & have_neon ) | | bytes < = CHACHA_BLOCK_SIZE | |
! crypto_simd_usable ( ) )
return chacha_crypt_generic ( state , dst , src , bytes , nrounds ) ;
crypto: arch/lib - limit simd usage to 4k chunks
The initial Zinc patchset, after some mailing list discussion, contained
code to ensure that kernel_fpu_enable would not be kept on for more than
a 4k chunk, since it disables preemption. The choice of 4k isn't totally
scientific, but it's not a bad guess either, and it's what's used in
both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form
of PAGE_SIZE, which this commit corrects to be explicitly 4k for the
former two).
Ard did some back of the envelope calculations and found that
at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k
means we have a maximum preemption disabling of 20us, which Sebastian
confirmed was probably a good limit.
Unfortunately the chunking appears to have been left out of the final
patchset that added the glue code. So, this commit adds it back in.
Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function")
Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel")
Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation")
Cc: Eric Biggers <ebiggers@google.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-04-23 02:18:53 +03:00
do {
unsigned int todo = min_t ( unsigned int , bytes , SZ_4K ) ;
kernel_neon_begin ( ) ;
chacha_doneon ( state , dst , src , todo , nrounds ) ;
kernel_neon_end ( ) ;
bytes - = todo ;
src + = todo ;
dst + = todo ;
} while ( bytes ) ;
2019-11-08 15:22:12 +03:00
}
EXPORT_SYMBOL ( chacha_crypt_arch ) ;
2018-12-04 06:52:51 +03:00
static int chacha_neon_stream_xor ( struct skcipher_request * req ,
2019-06-03 08:47:14 +03:00
const struct chacha_ctx * ctx , const u8 * iv )
2017-01-11 19:41:49 +03:00
{
struct skcipher_walk walk ;
u32 state [ 16 ] ;
int err ;
2018-03-10 18:21:50 +03:00
err = skcipher_walk_virt ( & walk , req , false ) ;
2017-01-11 19:41:49 +03:00
2019-11-08 15:22:11 +03:00
chacha_init_generic ( state , ctx - > key , iv ) ;
2017-01-11 19:41:49 +03:00
while ( walk . nbytes > 0 ) {
unsigned int nbytes = walk . nbytes ;
if ( nbytes < walk . total )
2018-12-04 16:13:33 +03:00
nbytes = rounddown ( nbytes , walk . stride ) ;
2017-01-11 19:41:49 +03:00
2019-11-08 15:22:12 +03:00
if ( ! static_branch_likely ( & have_neon ) | |
! crypto_simd_usable ( ) ) {
2019-11-08 15:22:11 +03:00
chacha_crypt_generic ( state , walk . dst . virt . addr ,
walk . src . virt . addr , nbytes ,
ctx - > nrounds ) ;
} else {
kernel_neon_begin ( ) ;
chacha_doneon ( state , walk . dst . virt . addr ,
walk . src . virt . addr , nbytes , ctx - > nrounds ) ;
kernel_neon_end ( ) ;
}
2017-01-11 19:41:49 +03:00
err = skcipher_walk_done ( & walk , walk . nbytes - nbytes ) ;
}
return err ;
}
2018-12-04 06:52:51 +03:00
static int chacha_neon ( struct skcipher_request * req )
2018-12-04 06:52:50 +03:00
{
struct crypto_skcipher * tfm = crypto_skcipher_reqtfm ( req ) ;
struct chacha_ctx * ctx = crypto_skcipher_ctx ( tfm ) ;
2018-12-04 06:52:51 +03:00
return chacha_neon_stream_xor ( req , ctx , req - > iv ) ;
2018-12-04 06:52:50 +03:00
}
2018-12-04 06:52:51 +03:00
static int xchacha_neon ( struct skcipher_request * req )
2018-12-04 06:52:50 +03:00
{
struct crypto_skcipher * tfm = crypto_skcipher_reqtfm ( req ) ;
struct chacha_ctx * ctx = crypto_skcipher_ctx ( tfm ) ;
struct chacha_ctx subctx ;
u32 state [ 16 ] ;
u8 real_iv [ 16 ] ;
2019-11-08 15:22:11 +03:00
chacha_init_generic ( state , ctx - > key , req - > iv ) ;
2019-11-08 15:22:12 +03:00
hchacha_block_arch ( state , subctx . key , ctx - > nrounds ) ;
2018-12-04 06:52:51 +03:00
subctx . nrounds = ctx - > nrounds ;
2018-12-04 06:52:50 +03:00
memcpy ( & real_iv [ 0 ] , req - > iv + 24 , 8 ) ;
memcpy ( & real_iv [ 8 ] , req - > iv + 16 , 8 ) ;
2018-12-04 06:52:51 +03:00
return chacha_neon_stream_xor ( req , & subctx , real_iv ) ;
2018-12-04 06:52:50 +03:00
}
static struct skcipher_alg algs [ ] = {
{
. base . cra_name = " chacha20 " ,
. base . cra_driver_name = " chacha20-neon " ,
. base . cra_priority = 300 ,
. base . cra_blocksize = 1 ,
. base . cra_ctxsize = sizeof ( struct chacha_ctx ) ,
. base . cra_module = THIS_MODULE ,
. min_keysize = CHACHA_KEY_SIZE ,
. max_keysize = CHACHA_KEY_SIZE ,
. ivsize = CHACHA_IV_SIZE ,
. chunksize = CHACHA_BLOCK_SIZE ,
2018-12-04 16:13:33 +03:00
. walksize = 5 * CHACHA_BLOCK_SIZE ,
2019-11-08 15:22:11 +03:00
. setkey = chacha20_setkey ,
2018-12-04 06:52:51 +03:00
. encrypt = chacha_neon ,
. decrypt = chacha_neon ,
2018-12-04 06:52:50 +03:00
} , {
. base . cra_name = " xchacha20 " ,
. base . cra_driver_name = " xchacha20-neon " ,
. base . cra_priority = 300 ,
. base . cra_blocksize = 1 ,
. base . cra_ctxsize = sizeof ( struct chacha_ctx ) ,
. base . cra_module = THIS_MODULE ,
. min_keysize = CHACHA_KEY_SIZE ,
. max_keysize = CHACHA_KEY_SIZE ,
. ivsize = XCHACHA_IV_SIZE ,
. chunksize = CHACHA_BLOCK_SIZE ,
2018-12-04 16:13:33 +03:00
. walksize = 5 * CHACHA_BLOCK_SIZE ,
2019-11-08 15:22:11 +03:00
. setkey = chacha20_setkey ,
2018-12-04 06:52:51 +03:00
. encrypt = xchacha_neon ,
. decrypt = xchacha_neon ,
2018-12-04 06:52:52 +03:00
} , {
. base . cra_name = " xchacha12 " ,
. base . cra_driver_name = " xchacha12-neon " ,
. base . cra_priority = 300 ,
. base . cra_blocksize = 1 ,
. base . cra_ctxsize = sizeof ( struct chacha_ctx ) ,
. base . cra_module = THIS_MODULE ,
. min_keysize = CHACHA_KEY_SIZE ,
. max_keysize = CHACHA_KEY_SIZE ,
. ivsize = XCHACHA_IV_SIZE ,
. chunksize = CHACHA_BLOCK_SIZE ,
2018-12-04 16:13:33 +03:00
. walksize = 5 * CHACHA_BLOCK_SIZE ,
2019-11-08 15:22:11 +03:00
. setkey = chacha12_setkey ,
2018-12-04 06:52:52 +03:00
. encrypt = xchacha_neon ,
. decrypt = xchacha_neon ,
2018-12-04 06:52:50 +03:00
}
2017-01-11 19:41:49 +03:00
} ;
2018-12-04 06:52:51 +03:00
static int __init chacha_simd_mod_init ( void )
2017-01-11 19:41:49 +03:00
{
2019-04-09 12:52:40 +03:00
if ( ! cpu_have_named_feature ( ASIMD ) )
2019-11-08 15:22:12 +03:00
return 0 ;
static_branch_enable ( & have_neon ) ;
2017-01-11 19:41:49 +03:00
2019-11-25 13:31:12 +03:00
return IS_REACHABLE ( CONFIG_CRYPTO_SKCIPHER ) ?
crypto_register_skciphers ( algs , ARRAY_SIZE ( algs ) ) : 0 ;
2017-01-11 19:41:49 +03:00
}
2018-12-04 06:52:51 +03:00
static void __exit chacha_simd_mod_fini ( void )
2017-01-11 19:41:49 +03:00
{
2019-11-25 13:31:12 +03:00
if ( IS_REACHABLE ( CONFIG_CRYPTO_SKCIPHER ) & & cpu_have_named_feature ( ASIMD ) )
2019-11-08 15:22:12 +03:00
crypto_unregister_skciphers ( algs , ARRAY_SIZE ( algs ) ) ;
2017-01-11 19:41:49 +03:00
}
2018-12-04 06:52:51 +03:00
module_init ( chacha_simd_mod_init ) ;
module_exit ( chacha_simd_mod_fini ) ;
2017-01-11 19:41:49 +03:00
2018-12-04 06:52:51 +03:00
MODULE_DESCRIPTION ( " ChaCha and XChaCha stream ciphers (NEON accelerated) " ) ;
2017-01-11 19:41:49 +03:00
MODULE_AUTHOR ( " Ard Biesheuvel <ard.biesheuvel@linaro.org> " ) ;
MODULE_LICENSE ( " GPL v2 " ) ;
MODULE_ALIAS_CRYPTO ( " chacha20 " ) ;
2018-12-04 06:52:50 +03:00
MODULE_ALIAS_CRYPTO ( " chacha20-neon " ) ;
MODULE_ALIAS_CRYPTO ( " xchacha20 " ) ;
MODULE_ALIAS_CRYPTO ( " xchacha20-neon " ) ;
2018-12-04 06:52:52 +03:00
MODULE_ALIAS_CRYPTO ( " xchacha12 " ) ;
MODULE_ALIAS_CRYPTO ( " xchacha12-neon " ) ;