dfc6031ec9
Even though the kernel's implementations of AES-XTS were updated to implement ciphertext stealing and can operate on inputs of any size larger than or equal to the AES block size, this feature is rarely used in practice. In fact, in the kernel, AES-XTS is only used to operate on 4096 or 512 byte blocks, which means that not only the ciphertext stealing is effectively dead code, the logic in the bit sliced NEON implementation to deal with fewer than 8 blocks at a time is also never used. Since the bit-sliced NEON driver already depends on the plain NEON version, which is slower but can operate on smaller data quantities more straightforwardly, let's fallback to the plain NEON implementation of XTS for any residual inputs that are not multiples of 128 bytes. This allows us to remove a lot of complicated logic that rarely gets exercised in practice. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
458 lines
12 KiB
C
458 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Bit sliced AES using NEON instructions
|
|
*
|
|
* Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
|
*/
|
|
|
|
#include <asm/neon.h>
|
|
#include <asm/simd.h>
|
|
#include <crypto/aes.h>
|
|
#include <crypto/ctr.h>
|
|
#include <crypto/internal/simd.h>
|
|
#include <crypto/internal/skcipher.h>
|
|
#include <crypto/scatterwalk.h>
|
|
#include <crypto/xts.h>
|
|
#include <linux/module.h>
|
|
|
|
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
|
MODULE_LICENSE("GPL v2");
|
|
|
|
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
|
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
|
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
|
MODULE_ALIAS_CRYPTO("xts(aes)");
|
|
|
|
asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
|
|
|
|
asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
int rounds, int blocks);
|
|
asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
int rounds, int blocks);
|
|
|
|
asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
int rounds, int blocks, u8 iv[]);
|
|
|
|
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
int rounds, int blocks, u8 iv[]);
|
|
|
|
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
int rounds, int blocks, u8 iv[]);
|
|
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
|
int rounds, int blocks, u8 iv[]);
|
|
|
|
/* borrowed from aes-neon-blk.ko */
|
|
asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
|
int rounds, int blocks);
|
|
asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
|
int rounds, int blocks, u8 iv[]);
|
|
asmlinkage void neon_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
|
int rounds, int bytes, u8 ctr[]);
|
|
asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[],
|
|
u32 const rk1[], int rounds, int bytes,
|
|
u32 const rk2[], u8 iv[], int first);
|
|
asmlinkage void neon_aes_xts_decrypt(u8 out[], u8 const in[],
|
|
u32 const rk1[], int rounds, int bytes,
|
|
u32 const rk2[], u8 iv[], int first);
|
|
|
|
struct aesbs_ctx {
|
|
u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32];
|
|
int rounds;
|
|
} __aligned(AES_BLOCK_SIZE);
|
|
|
|
struct aesbs_cbc_ctr_ctx {
|
|
struct aesbs_ctx key;
|
|
u32 enc[AES_MAX_KEYLENGTH_U32];
|
|
};
|
|
|
|
struct aesbs_xts_ctx {
|
|
struct aesbs_ctx key;
|
|
u32 twkey[AES_MAX_KEYLENGTH_U32];
|
|
struct crypto_aes_ctx cts;
|
|
};
|
|
|
|
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
|
unsigned int key_len)
|
|
{
|
|
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
struct crypto_aes_ctx rk;
|
|
int err;
|
|
|
|
err = aes_expandkey(&rk, in_key, key_len);
|
|
if (err)
|
|
return err;
|
|
|
|
ctx->rounds = 6 + key_len / 4;
|
|
|
|
kernel_neon_begin();
|
|
aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
|
|
kernel_neon_end();
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __ecb_crypt(struct skcipher_request *req,
|
|
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
|
int rounds, int blocks))
|
|
{
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
struct skcipher_walk walk;
|
|
int err;
|
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
|
|
|
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
|
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
|
|
|
if (walk.nbytes < walk.total)
|
|
blocks = round_down(blocks,
|
|
walk.stride / AES_BLOCK_SIZE);
|
|
|
|
kernel_neon_begin();
|
|
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
|
|
ctx->rounds, blocks);
|
|
kernel_neon_end();
|
|
err = skcipher_walk_done(&walk,
|
|
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static int ecb_encrypt(struct skcipher_request *req)
|
|
{
|
|
return __ecb_crypt(req, aesbs_ecb_encrypt);
|
|
}
|
|
|
|
static int ecb_decrypt(struct skcipher_request *req)
|
|
{
|
|
return __ecb_crypt(req, aesbs_ecb_decrypt);
|
|
}
|
|
|
|
static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
|
unsigned int key_len)
|
|
{
|
|
struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
struct crypto_aes_ctx rk;
|
|
int err;
|
|
|
|
err = aes_expandkey(&rk, in_key, key_len);
|
|
if (err)
|
|
return err;
|
|
|
|
ctx->key.rounds = 6 + key_len / 4;
|
|
|
|
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
|
|
|
|
kernel_neon_begin();
|
|
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
|
|
kernel_neon_end();
|
|
memzero_explicit(&rk, sizeof(rk));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int cbc_encrypt(struct skcipher_request *req)
|
|
{
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
struct skcipher_walk walk;
|
|
int err;
|
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
|
|
|
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
|
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
|
|
|
/* fall back to the non-bitsliced NEON implementation */
|
|
kernel_neon_begin();
|
|
neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
|
ctx->enc, ctx->key.rounds, blocks,
|
|
walk.iv);
|
|
kernel_neon_end();
|
|
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
static int cbc_decrypt(struct skcipher_request *req)
|
|
{
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
struct skcipher_walk walk;
|
|
int err;
|
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
|
|
|
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
|
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
|
|
|
if (walk.nbytes < walk.total)
|
|
blocks = round_down(blocks,
|
|
walk.stride / AES_BLOCK_SIZE);
|
|
|
|
kernel_neon_begin();
|
|
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
|
ctx->key.rk, ctx->key.rounds, blocks,
|
|
walk.iv);
|
|
kernel_neon_end();
|
|
err = skcipher_walk_done(&walk,
|
|
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static int ctr_encrypt(struct skcipher_request *req)
|
|
{
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
struct skcipher_walk walk;
|
|
int err;
|
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
|
|
|
while (walk.nbytes > 0) {
|
|
int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7;
|
|
int nbytes = walk.nbytes % (8 * AES_BLOCK_SIZE);
|
|
const u8 *src = walk.src.virt.addr;
|
|
u8 *dst = walk.dst.virt.addr;
|
|
|
|
kernel_neon_begin();
|
|
if (blocks >= 8) {
|
|
aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds,
|
|
blocks, walk.iv);
|
|
dst += blocks * AES_BLOCK_SIZE;
|
|
src += blocks * AES_BLOCK_SIZE;
|
|
}
|
|
if (nbytes && walk.nbytes == walk.total) {
|
|
neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
|
|
nbytes, walk.iv);
|
|
nbytes = 0;
|
|
}
|
|
kernel_neon_end();
|
|
err = skcipher_walk_done(&walk, nbytes);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
|
unsigned int key_len)
|
|
{
|
|
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
struct crypto_aes_ctx rk;
|
|
int err;
|
|
|
|
err = xts_verify_key(tfm, in_key, key_len);
|
|
if (err)
|
|
return err;
|
|
|
|
key_len /= 2;
|
|
err = aes_expandkey(&ctx->cts, in_key, key_len);
|
|
if (err)
|
|
return err;
|
|
|
|
err = aes_expandkey(&rk, in_key + key_len, key_len);
|
|
if (err)
|
|
return err;
|
|
|
|
memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey));
|
|
|
|
return aesbs_setkey(tfm, in_key, key_len);
|
|
}
|
|
|
|
static int __xts_crypt(struct skcipher_request *req, bool encrypt,
|
|
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
|
int rounds, int blocks, u8 iv[]))
|
|
{
|
|
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
|
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
|
int tail = req->cryptlen % (8 * AES_BLOCK_SIZE);
|
|
struct scatterlist sg_src[2], sg_dst[2];
|
|
struct skcipher_request subreq;
|
|
struct scatterlist *src, *dst;
|
|
struct skcipher_walk walk;
|
|
int nbytes, err;
|
|
int first = 1;
|
|
u8 *out, *in;
|
|
|
|
if (req->cryptlen < AES_BLOCK_SIZE)
|
|
return -EINVAL;
|
|
|
|
/* ensure that the cts tail is covered by a single step */
|
|
if (unlikely(tail > 0 && tail < AES_BLOCK_SIZE)) {
|
|
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
|
|
AES_BLOCK_SIZE) - 2;
|
|
|
|
skcipher_request_set_tfm(&subreq, tfm);
|
|
skcipher_request_set_callback(&subreq,
|
|
skcipher_request_flags(req),
|
|
NULL, NULL);
|
|
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
|
xts_blocks * AES_BLOCK_SIZE,
|
|
req->iv);
|
|
req = &subreq;
|
|
} else {
|
|
tail = 0;
|
|
}
|
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
|
if (err)
|
|
return err;
|
|
|
|
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
|
int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7;
|
|
out = walk.dst.virt.addr;
|
|
in = walk.src.virt.addr;
|
|
nbytes = walk.nbytes;
|
|
|
|
kernel_neon_begin();
|
|
if (blocks >= 8) {
|
|
if (first == 1)
|
|
neon_aes_ecb_encrypt(walk.iv, walk.iv,
|
|
ctx->twkey,
|
|
ctx->key.rounds, 1);
|
|
first = 2;
|
|
|
|
fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
|
|
walk.iv);
|
|
|
|
out += blocks * AES_BLOCK_SIZE;
|
|
in += blocks * AES_BLOCK_SIZE;
|
|
nbytes -= blocks * AES_BLOCK_SIZE;
|
|
}
|
|
if (walk.nbytes == walk.total && nbytes > 0) {
|
|
if (encrypt)
|
|
neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
|
|
ctx->key.rounds, nbytes,
|
|
ctx->twkey, walk.iv, first);
|
|
else
|
|
neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
|
|
ctx->key.rounds, nbytes,
|
|
ctx->twkey, walk.iv, first);
|
|
nbytes = first = 0;
|
|
}
|
|
kernel_neon_end();
|
|
err = skcipher_walk_done(&walk, nbytes);
|
|
}
|
|
|
|
if (err || likely(!tail))
|
|
return err;
|
|
|
|
/* handle ciphertext stealing */
|
|
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
|
|
if (req->dst != req->src)
|
|
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
|
|
|
|
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
|
|
req->iv);
|
|
|
|
err = skcipher_walk_virt(&walk, req, false);
|
|
if (err)
|
|
return err;
|
|
|
|
out = walk.dst.virt.addr;
|
|
in = walk.src.virt.addr;
|
|
nbytes = walk.nbytes;
|
|
|
|
kernel_neon_begin();
|
|
if (encrypt)
|
|
neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
|
|
nbytes, ctx->twkey, walk.iv, first);
|
|
else
|
|
neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
|
|
nbytes, ctx->twkey, walk.iv, first);
|
|
kernel_neon_end();
|
|
|
|
return skcipher_walk_done(&walk, 0);
|
|
}
|
|
|
|
static int xts_encrypt(struct skcipher_request *req)
|
|
{
|
|
return __xts_crypt(req, true, aesbs_xts_encrypt);
|
|
}
|
|
|
|
static int xts_decrypt(struct skcipher_request *req)
|
|
{
|
|
return __xts_crypt(req, false, aesbs_xts_decrypt);
|
|
}
|
|
|
|
static struct skcipher_alg aes_algs[] = { {
|
|
.base.cra_name = "ecb(aes)",
|
|
.base.cra_driver_name = "ecb-aes-neonbs",
|
|
.base.cra_priority = 250,
|
|
.base.cra_blocksize = AES_BLOCK_SIZE,
|
|
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
.min_keysize = AES_MIN_KEY_SIZE,
|
|
.max_keysize = AES_MAX_KEY_SIZE,
|
|
.walksize = 8 * AES_BLOCK_SIZE,
|
|
.setkey = aesbs_setkey,
|
|
.encrypt = ecb_encrypt,
|
|
.decrypt = ecb_decrypt,
|
|
}, {
|
|
.base.cra_name = "cbc(aes)",
|
|
.base.cra_driver_name = "cbc-aes-neonbs",
|
|
.base.cra_priority = 250,
|
|
.base.cra_blocksize = AES_BLOCK_SIZE,
|
|
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctr_ctx),
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
.min_keysize = AES_MIN_KEY_SIZE,
|
|
.max_keysize = AES_MAX_KEY_SIZE,
|
|
.walksize = 8 * AES_BLOCK_SIZE,
|
|
.ivsize = AES_BLOCK_SIZE,
|
|
.setkey = aesbs_cbc_ctr_setkey,
|
|
.encrypt = cbc_encrypt,
|
|
.decrypt = cbc_decrypt,
|
|
}, {
|
|
.base.cra_name = "ctr(aes)",
|
|
.base.cra_driver_name = "ctr-aes-neonbs",
|
|
.base.cra_priority = 250,
|
|
.base.cra_blocksize = 1,
|
|
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctr_ctx),
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
.min_keysize = AES_MIN_KEY_SIZE,
|
|
.max_keysize = AES_MAX_KEY_SIZE,
|
|
.chunksize = AES_BLOCK_SIZE,
|
|
.walksize = 8 * AES_BLOCK_SIZE,
|
|
.ivsize = AES_BLOCK_SIZE,
|
|
.setkey = aesbs_cbc_ctr_setkey,
|
|
.encrypt = ctr_encrypt,
|
|
.decrypt = ctr_encrypt,
|
|
}, {
|
|
.base.cra_name = "xts(aes)",
|
|
.base.cra_driver_name = "xts-aes-neonbs",
|
|
.base.cra_priority = 250,
|
|
.base.cra_blocksize = AES_BLOCK_SIZE,
|
|
.base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
|
|
.base.cra_module = THIS_MODULE,
|
|
|
|
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
|
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
|
.walksize = 8 * AES_BLOCK_SIZE,
|
|
.ivsize = AES_BLOCK_SIZE,
|
|
.setkey = aesbs_xts_setkey,
|
|
.encrypt = xts_encrypt,
|
|
.decrypt = xts_decrypt,
|
|
} };
|
|
|
|
static void aes_exit(void)
|
|
{
|
|
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
|
}
|
|
|
|
static int __init aes_init(void)
|
|
{
|
|
if (!cpu_have_named_feature(ASIMD))
|
|
return -ENODEV;
|
|
|
|
return crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
|
}
|
|
|
|
module_init(aes_init);
|
|
module_exit(aes_exit);
|