2018-11-16 17:26:29 -08:00
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Common values and helper functions for the NHPoly1305 hash function .
*/
# ifndef _NHPOLY1305_H
# define _NHPOLY1305_H
# include <crypto/hash.h>
crypto: poly1305 - add new 32 and 64-bit generic versions
These two C implementations from Zinc -- a 32x32 one and a 64x64 one,
depending on the platform -- come from Andrew Moon's public domain
poly1305-donna portable code, modified for usage in the kernel. The
precomputation in the 32-bit version and the use of 64x64 multiplies in
the 64-bit version make these perform better than the code it replaces.
Moon's code is also very widespread and has received many eyeballs of
scrutiny.
There's a bit of interference between the x86 implementation, which
relies on internal details of the old scalar implementation. In the next
commit, the x86 implementation will be replaced with a faster one that
doesn't rely on this, so none of this matters much. But for now, to keep
this passing the tests, we inline the bits of the old implementation
that the x86 implementation relied on. Also, since we now support a
slightly larger key space, via the union, some offsets had to be fixed
up.
Nonce calculation was folded in with the emit function, to take
advantage of 64x64 arithmetic. However, Adiantum appeared to rely on no
nonce handling in emit, so this path was conditionalized. We also
introduced a new struct, poly1305_core_key, to represent the precise
amount of space that particular implementation uses.
Testing with kbench9000, depending on the CPU, the update function for
the 32x32 version has been improved by 4%-7%, and for the 64x64 by
19%-30%. The 32x32 gains are small, but I think there's great value in
having a parallel implementation to the 64x64 one so that the two can be
compared side-by-side as nice stand-alone units.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-01-05 22:40:46 -05:00
# include <crypto/internal/poly1305.h>
2018-11-16 17:26:29 -08:00
/* NH parameterization: */
/* Endianness: little */
/* Word size: 32 bits (works well on NEON, SSE2, AVX2) */
/* Stride: 2 words (optimal on ARM32 NEON; works okay on other CPUs too) */
# define NH_PAIR_STRIDE 2
# define NH_MESSAGE_UNIT (NH_PAIR_STRIDE * 2 * sizeof(u32))
/* Num passes (Toeplitz iteration count): 4, to give ε = 2^{-128} */
# define NH_NUM_PASSES 4
# define NH_HASH_BYTES (NH_NUM_PASSES * sizeof(u64))
/* Max message size: 1024 bytes (32x compression factor) */
# define NH_NUM_STRIDES 64
# define NH_MESSAGE_WORDS (NH_PAIR_STRIDE * 2 * NH_NUM_STRIDES)
# define NH_MESSAGE_BYTES (NH_MESSAGE_WORDS * sizeof(u32))
# define NH_KEY_WORDS (NH_MESSAGE_WORDS + \
NH_PAIR_STRIDE * 2 * ( NH_NUM_PASSES - 1 ) )
# define NH_KEY_BYTES (NH_KEY_WORDS * sizeof(u32))
# define NHPOLY1305_KEY_SIZE (POLY1305_BLOCK_SIZE + NH_KEY_BYTES)
struct nhpoly1305_key {
crypto: poly1305 - add new 32 and 64-bit generic versions
These two C implementations from Zinc -- a 32x32 one and a 64x64 one,
depending on the platform -- come from Andrew Moon's public domain
poly1305-donna portable code, modified for usage in the kernel. The
precomputation in the 32-bit version and the use of 64x64 multiplies in
the 64-bit version make these perform better than the code it replaces.
Moon's code is also very widespread and has received many eyeballs of
scrutiny.
There's a bit of interference between the x86 implementation, which
relies on internal details of the old scalar implementation. In the next
commit, the x86 implementation will be replaced with a faster one that
doesn't rely on this, so none of this matters much. But for now, to keep
this passing the tests, we inline the bits of the old implementation
that the x86 implementation relied on. Also, since we now support a
slightly larger key space, via the union, some offsets had to be fixed
up.
Nonce calculation was folded in with the emit function, to take
advantage of 64x64 arithmetic. However, Adiantum appeared to rely on no
nonce handling in emit, so this path was conditionalized. We also
introduced a new struct, poly1305_core_key, to represent the precise
amount of space that particular implementation uses.
Testing with kbench9000, depending on the CPU, the update function for
the 32x32 version has been improved by 4%-7%, and for the 64x64 by
19%-30%. The 32x32 gains are small, but I think there's great value in
having a parallel implementation to the 64x64 one so that the two can be
compared side-by-side as nice stand-alone units.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2020-01-05 22:40:46 -05:00
struct poly1305_core_key poly_key ;
2018-11-16 17:26:29 -08:00
u32 nh_key [ NH_KEY_WORDS ] ;
} ;
struct nhpoly1305_state {
/* Running total of polynomial evaluation */
struct poly1305_state poly_state ;
/* Partial block buffer */
u8 buffer [ NH_MESSAGE_UNIT ] ;
unsigned int buflen ;
/*
* Number of bytes remaining until the current NH message reaches
* NH_MESSAGE_BYTES . When nonzero , ' nh_hash ' holds the partial NH hash .
*/
unsigned int nh_remaining ;
__le64 nh_hash [ NH_NUM_PASSES ] ;
} ;
typedef void ( * nh_t ) ( const u32 * key , const u8 * message , size_t message_len ,
__le64 hash [ NH_NUM_PASSES ] ) ;
int crypto_nhpoly1305_setkey ( struct crypto_shash * tfm ,
const u8 * key , unsigned int keylen ) ;
int crypto_nhpoly1305_init ( struct shash_desc * desc ) ;
int crypto_nhpoly1305_update ( struct shash_desc * desc ,
const u8 * src , unsigned int srclen ) ;
int crypto_nhpoly1305_update_helper ( struct shash_desc * desc ,
const u8 * src , unsigned int srclen ,
nh_t nh_fn ) ;
int crypto_nhpoly1305_final ( struct shash_desc * desc , u8 * dst ) ;
int crypto_nhpoly1305_final_helper ( struct shash_desc * desc , u8 * dst ,
nh_t nh_fn ) ;
# endif /* _NHPOLY1305_H */