Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: "API: - Try to catch hash output overrun in testmgr - Introduce walksize attribute for batched walking - Make crypto_xor() and crypto_inc() alignment agnostic Algorithms: - Add time-invariant AES algorithm - Add standalone CBCMAC algorithm Drivers: - Add NEON acclerated chacha20 on ARM/ARM64 - Expose AES-CTR as synchronous skcipher on ARM64 - Add scalar AES implementation on ARM64 - Improve scalar AES implementation on ARM - Improve NEON AES implementation on ARM/ARM64 - Merge CRC32 and PMULL instruction based drivers on ARM64 - Add NEON acclerated CBCMAC/CMAC/XCBC AES on ARM64 - Add IPsec AUTHENC implementation in atmel - Add Support for Octeon-tx CPT Engine - Add Broadcom SPU driver - Add MediaTek driver" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (142 commits) crypto: xts - Add ECB dependency crypto: cavium - switch to pci_alloc_irq_vectors crypto: cavium - switch to pci_alloc_irq_vectors crypto: cavium - remove dead MSI-X related define crypto: brcm - Avoid double free in ahash_finup() crypto: cavium - fix Kconfig dependencies crypto: cavium - cpt_bind_vq_to_grp could return an error code crypto: doc - fix typo hwrng: omap - update Kconfig help description crypto: ccm - drop unnecessary minimum 32-bit alignment crypto: ccm - honour alignmask of subordinate MAC cipher crypto: caam - fix state buffer DMA (un)mapping crypto: caam - abstract ahash request double buffering crypto: caam - fix error path for ctx_dma mapping failure crypto: caam - fix DMA API leaks for multiple setkey() calls crypto: caam - don't dma_map key for hash algorithms crypto: caam - use dma_map_sg() return code crypto: caam - replace sg_count() with sg_nents_for_len() crypto: caam - check sg_count() return value crypto: caam - fix HW S/G in ablkcipher_giv_edesc_alloc() ..
This commit is contained in:
commit
5bcbe22ca4
@ -14,7 +14,7 @@ Asynchronous Message Digest API
|
||||
:doc: Asynchronous Message Digest API
|
||||
|
||||
.. kernel-doc:: include/crypto/hash.h
|
||||
:functions: crypto_alloc_ahash crypto_free_ahash crypto_ahash_init crypto_ahash_digestsize crypto_ahash_reqtfm crypto_ahash_reqsize crypto_ahash_setkey crypto_ahash_finup crypto_ahash_final crypto_ahash_digest crypto_ahash_export crypto_ahash_import
|
||||
:functions: crypto_alloc_ahash crypto_free_ahash crypto_ahash_init crypto_ahash_digestsize crypto_ahash_reqtfm crypto_ahash_reqsize crypto_ahash_statesize crypto_ahash_setkey crypto_ahash_finup crypto_ahash_final crypto_ahash_digest crypto_ahash_export crypto_ahash_import
|
||||
|
||||
Asynchronous Hash Request Handle
|
||||
--------------------------------
|
||||
|
@ -59,4 +59,4 @@ Synchronous Block Cipher API - Deprecated
|
||||
:doc: Synchronous Block Cipher API
|
||||
|
||||
.. kernel-doc:: include/linux/crypto.h
|
||||
:functions: crypto_alloc_blkcipher rypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv
|
||||
:functions: crypto_alloc_blkcipher crypto_free_blkcipher crypto_has_blkcipher crypto_blkcipher_name crypto_blkcipher_ivsize crypto_blkcipher_blocksize crypto_blkcipher_setkey crypto_blkcipher_encrypt crypto_blkcipher_encrypt_iv crypto_blkcipher_decrypt crypto_blkcipher_decrypt_iv crypto_blkcipher_set_iv crypto_blkcipher_get_iv
|
||||
|
22
Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt
Normal file
22
Documentation/devicetree/bindings/crypto/brcm,spu-crypto.txt
Normal file
@ -0,0 +1,22 @@
|
||||
The Broadcom Secure Processing Unit (SPU) hardware supports symmetric
|
||||
cryptographic offload for Broadcom SoCs. A SoC may have multiple SPU hardware
|
||||
blocks.
|
||||
|
||||
Required properties:
|
||||
- compatible: Should be one of the following:
|
||||
brcm,spum-crypto - for devices with SPU-M hardware
|
||||
brcm,spu2-crypto - for devices with SPU2 hardware
|
||||
brcm,spu2-v2-crypto - for devices with enhanced SPU2 hardware features like SHA3
|
||||
and Rabin Fingerprint support
|
||||
brcm,spum-nsp-crypto - for the Northstar Plus variant of the SPU-M hardware
|
||||
|
||||
- reg: Should contain SPU registers location and length.
|
||||
- mboxes: The mailbox channel to be used to communicate with the SPU.
|
||||
Mailbox channels correspond to DMA rings on the device.
|
||||
|
||||
Example:
|
||||
crypto@612d0000 {
|
||||
compatible = "brcm,spum-crypto";
|
||||
reg = <0 0x612d0000 0 0x900>;
|
||||
mboxes = <&pdc0 0>;
|
||||
};
|
27
Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
Normal file
27
Documentation/devicetree/bindings/crypto/mediatek-crypto.txt
Normal file
@ -0,0 +1,27 @@
|
||||
MediaTek cryptographic accelerators
|
||||
|
||||
Required properties:
|
||||
- compatible: Should be "mediatek,eip97-crypto"
|
||||
- reg: Address and length of the register set for the device
|
||||
- interrupts: Should contain the five crypto engines interrupts in numeric
|
||||
order. These are global system and four descriptor rings.
|
||||
- clocks: the clock used by the core
|
||||
- clock-names: the names of the clock listed in the clocks property. These are
|
||||
"ethif", "cryp"
|
||||
- power-domains: Must contain a reference to the PM domain.
|
||||
|
||||
|
||||
Example:
|
||||
crypto: crypto@1b240000 {
|
||||
compatible = "mediatek,eip97-crypto";
|
||||
reg = <0 0x1b240000 0 0x20000>;
|
||||
interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_LOW>,
|
||||
<GIC_SPI 83 IRQ_TYPE_LEVEL_LOW>,
|
||||
<GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>,
|
||||
<GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>,
|
||||
<GIC_SPI 97 IRQ_TYPE_LEVEL_LOW>;
|
||||
clocks = <&topckgen CLK_TOP_ETHIF_SEL>,
|
||||
<ðsys CLK_ETHSYS_CRYPTO>;
|
||||
clock-names = "ethif","cryp";
|
||||
power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
|
||||
};
|
@ -3031,6 +3031,13 @@ W: http://www.cavium.com
|
||||
S: Supported
|
||||
F: drivers/net/ethernet/cavium/liquidio/
|
||||
|
||||
CAVIUM OCTEON-TX CRYPTO DRIVER
|
||||
M: George Cherian <george.cherian@cavium.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
W: http://www.cavium.com
|
||||
S: Supported
|
||||
F: drivers/crypto/cavium/cpt/
|
||||
|
||||
CC2520 IEEE-802.15.4 RADIO DRIVER
|
||||
M: Varka Bhadram <varkabhadram@gmail.com>
|
||||
L: linux-wpan@vger.kernel.org
|
||||
|
@ -62,35 +62,18 @@ config CRYPTO_SHA512_ARM
|
||||
using optimized ARM assembler and NEON, when available.
|
||||
|
||||
config CRYPTO_AES_ARM
|
||||
tristate "AES cipher algorithms (ARM-asm)"
|
||||
depends on ARM
|
||||
tristate "Scalar AES cipher for ARM"
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES
|
||||
help
|
||||
Use optimized AES assembler routines for ARM platforms.
|
||||
|
||||
AES cipher algorithms (FIPS-197). AES uses the Rijndael
|
||||
algorithm.
|
||||
|
||||
Rijndael appears to be consistently a very good performer in
|
||||
both hardware and software across a wide range of computing
|
||||
environments regardless of its use in feedback or non-feedback
|
||||
modes. Its key setup time is excellent, and its key agility is
|
||||
good. Rijndael's very low memory requirements make it very well
|
||||
suited for restricted-space environments, in which it also
|
||||
demonstrates excellent performance. Rijndael's operations are
|
||||
among the easiest to defend against power and timing attacks.
|
||||
|
||||
The AES specifies three key sizes: 128, 192 and 256 bits
|
||||
|
||||
See <http://csrc.nist.gov/encryption/aes/> for more information.
|
||||
|
||||
config CRYPTO_AES_ARM_BS
|
||||
tristate "Bit sliced AES using NEON instructions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_AES_ARM
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SIMD
|
||||
select CRYPTO_AES_ARM
|
||||
help
|
||||
Use a faster and more secure NEON based implementation of AES in CBC,
|
||||
CTR and XTS modes
|
||||
@ -130,4 +113,10 @@ config CRYPTO_CRC32_ARM_CE
|
||||
depends on KERNEL_MODE_NEON && CRC32
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
tristate "NEON accelerated ChaCha20 symmetric cipher"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
|
||||
endif
|
||||
|
@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
|
||||
|
||||
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
|
||||
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
|
||||
@ -26,8 +27,8 @@ $(warning $(ce-obj-y) $(ce-obj-m))
|
||||
endif
|
||||
endif
|
||||
|
||||
aes-arm-y := aes-armv4.o aes_glue.o
|
||||
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
|
||||
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
|
||||
aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
|
||||
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
|
||||
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
|
||||
sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
|
||||
@ -40,17 +41,15 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
|
||||
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
|
||||
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
|
||||
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
|
||||
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
|
||||
|
||||
quiet_cmd_perl = PERL $@
|
||||
cmd_perl = $(PERL) $(<) > $(@)
|
||||
|
||||
$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S
|
||||
.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -169,19 +169,19 @@ ENTRY(ce_aes_ecb_encrypt)
|
||||
.Lecbencloop3x:
|
||||
subs r4, r4, #3
|
||||
bmi .Lecbenc1x
|
||||
vld1.8 {q0-q1}, [r1, :64]!
|
||||
vld1.8 {q2}, [r1, :64]!
|
||||
vld1.8 {q0-q1}, [r1]!
|
||||
vld1.8 {q2}, [r1]!
|
||||
bl aes_encrypt_3x
|
||||
vst1.8 {q0-q1}, [r0, :64]!
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
b .Lecbencloop3x
|
||||
.Lecbenc1x:
|
||||
adds r4, r4, #3
|
||||
beq .Lecbencout
|
||||
.Lecbencloop:
|
||||
vld1.8 {q0}, [r1, :64]!
|
||||
vld1.8 {q0}, [r1]!
|
||||
bl aes_encrypt
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
bne .Lecbencloop
|
||||
.Lecbencout:
|
||||
@ -195,19 +195,19 @@ ENTRY(ce_aes_ecb_decrypt)
|
||||
.Lecbdecloop3x:
|
||||
subs r4, r4, #3
|
||||
bmi .Lecbdec1x
|
||||
vld1.8 {q0-q1}, [r1, :64]!
|
||||
vld1.8 {q2}, [r1, :64]!
|
||||
vld1.8 {q0-q1}, [r1]!
|
||||
vld1.8 {q2}, [r1]!
|
||||
bl aes_decrypt_3x
|
||||
vst1.8 {q0-q1}, [r0, :64]!
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
b .Lecbdecloop3x
|
||||
.Lecbdec1x:
|
||||
adds r4, r4, #3
|
||||
beq .Lecbdecout
|
||||
.Lecbdecloop:
|
||||
vld1.8 {q0}, [r1, :64]!
|
||||
vld1.8 {q0}, [r1]!
|
||||
bl aes_decrypt
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
bne .Lecbdecloop
|
||||
.Lecbdecout:
|
||||
@ -226,10 +226,10 @@ ENTRY(ce_aes_cbc_encrypt)
|
||||
vld1.8 {q0}, [r5]
|
||||
prepare_key r2, r3
|
||||
.Lcbcencloop:
|
||||
vld1.8 {q1}, [r1, :64]! @ get next pt block
|
||||
vld1.8 {q1}, [r1]! @ get next pt block
|
||||
veor q0, q0, q1 @ ..and xor with iv
|
||||
bl aes_encrypt
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
bne .Lcbcencloop
|
||||
vst1.8 {q0}, [r5]
|
||||
@ -244,8 +244,8 @@ ENTRY(ce_aes_cbc_decrypt)
|
||||
.Lcbcdecloop3x:
|
||||
subs r4, r4, #3
|
||||
bmi .Lcbcdec1x
|
||||
vld1.8 {q0-q1}, [r1, :64]!
|
||||
vld1.8 {q2}, [r1, :64]!
|
||||
vld1.8 {q0-q1}, [r1]!
|
||||
vld1.8 {q2}, [r1]!
|
||||
vmov q3, q0
|
||||
vmov q4, q1
|
||||
vmov q5, q2
|
||||
@ -254,19 +254,19 @@ ENTRY(ce_aes_cbc_decrypt)
|
||||
veor q1, q1, q3
|
||||
veor q2, q2, q4
|
||||
vmov q6, q5
|
||||
vst1.8 {q0-q1}, [r0, :64]!
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
b .Lcbcdecloop3x
|
||||
.Lcbcdec1x:
|
||||
adds r4, r4, #3
|
||||
beq .Lcbcdecout
|
||||
vmov q15, q14 @ preserve last round key
|
||||
.Lcbcdecloop:
|
||||
vld1.8 {q0}, [r1, :64]! @ get next ct block
|
||||
vld1.8 {q0}, [r1]! @ get next ct block
|
||||
veor q14, q15, q6 @ combine prev ct with last key
|
||||
vmov q6, q0
|
||||
bl aes_decrypt
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
bne .Lcbcdecloop
|
||||
.Lcbcdecout:
|
||||
@ -300,15 +300,15 @@ ENTRY(ce_aes_ctr_encrypt)
|
||||
rev ip, r6
|
||||
add r6, r6, #1
|
||||
vmov s11, ip
|
||||
vld1.8 {q3-q4}, [r1, :64]!
|
||||
vld1.8 {q5}, [r1, :64]!
|
||||
vld1.8 {q3-q4}, [r1]!
|
||||
vld1.8 {q5}, [r1]!
|
||||
bl aes_encrypt_3x
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
rev ip, r6
|
||||
vst1.8 {q0-q1}, [r0, :64]!
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]!
|
||||
vst1.8 {q2}, [r0]!
|
||||
vmov s27, ip
|
||||
b .Lctrloop3x
|
||||
.Lctr1x:
|
||||
@ -318,10 +318,10 @@ ENTRY(ce_aes_ctr_encrypt)
|
||||
vmov q0, q6
|
||||
bl aes_encrypt
|
||||
subs r4, r4, #1
|
||||
bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
|
||||
vld1.8 {q3}, [r1, :64]!
|
||||
bmi .Lctrtailblock @ blocks < 0 means tail block
|
||||
vld1.8 {q3}, [r1]!
|
||||
veor q3, q0, q3
|
||||
vst1.8 {q3}, [r0, :64]!
|
||||
vst1.8 {q3}, [r0]!
|
||||
|
||||
adds r6, r6, #1 @ increment BE ctr
|
||||
rev ip, r6
|
||||
@ -333,10 +333,8 @@ ENTRY(ce_aes_ctr_encrypt)
|
||||
vst1.8 {q6}, [r5]
|
||||
pop {r4-r6, pc}
|
||||
|
||||
.Lctrhalfblock:
|
||||
vld1.8 {d1}, [r1, :64]
|
||||
veor d0, d0, d1
|
||||
vst1.8 {d0}, [r0, :64]
|
||||
.Lctrtailblock:
|
||||
vst1.8 {q0}, [r0, :64] @ return just the key stream
|
||||
pop {r4-r6, pc}
|
||||
|
||||
.Lctrcarry:
|
||||
@ -405,8 +403,8 @@ ENTRY(ce_aes_xts_encrypt)
|
||||
.Lxtsenc3x:
|
||||
subs r4, r4, #3
|
||||
bmi .Lxtsenc1x
|
||||
vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
|
||||
vld1.8 {q2}, [r1, :64]!
|
||||
vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
|
||||
vld1.8 {q2}, [r1]!
|
||||
next_tweak q4, q3, q7, q6
|
||||
veor q0, q0, q3
|
||||
next_tweak q5, q4, q7, q6
|
||||
@ -416,8 +414,8 @@ ENTRY(ce_aes_xts_encrypt)
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
|
||||
vst1.8 {q2}, [r0]!
|
||||
vmov q3, q5
|
||||
teq r4, #0
|
||||
beq .Lxtsencout
|
||||
@ -426,11 +424,11 @@ ENTRY(ce_aes_xts_encrypt)
|
||||
adds r4, r4, #3
|
||||
beq .Lxtsencout
|
||||
.Lxtsencloop:
|
||||
vld1.8 {q0}, [r1, :64]!
|
||||
vld1.8 {q0}, [r1]!
|
||||
veor q0, q0, q3
|
||||
bl aes_encrypt
|
||||
veor q0, q0, q3
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
beq .Lxtsencout
|
||||
next_tweak q3, q3, q7, q6
|
||||
@ -456,8 +454,8 @@ ENTRY(ce_aes_xts_decrypt)
|
||||
.Lxtsdec3x:
|
||||
subs r4, r4, #3
|
||||
bmi .Lxtsdec1x
|
||||
vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
|
||||
vld1.8 {q2}, [r1, :64]!
|
||||
vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
|
||||
vld1.8 {q2}, [r1]!
|
||||
next_tweak q4, q3, q7, q6
|
||||
veor q0, q0, q3
|
||||
next_tweak q5, q4, q7, q6
|
||||
@ -467,8 +465,8 @@ ENTRY(ce_aes_xts_decrypt)
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q4
|
||||
veor q2, q2, q5
|
||||
vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
|
||||
vst1.8 {q2}, [r0, :64]!
|
||||
vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
|
||||
vst1.8 {q2}, [r0]!
|
||||
vmov q3, q5
|
||||
teq r4, #0
|
||||
beq .Lxtsdecout
|
||||
@ -477,12 +475,12 @@ ENTRY(ce_aes_xts_decrypt)
|
||||
adds r4, r4, #3
|
||||
beq .Lxtsdecout
|
||||
.Lxtsdecloop:
|
||||
vld1.8 {q0}, [r1, :64]!
|
||||
vld1.8 {q0}, [r1]!
|
||||
veor q0, q0, q3
|
||||
add ip, r2, #32 @ 3rd round key
|
||||
bl aes_decrypt
|
||||
veor q0, q0, q3
|
||||
vst1.8 {q0}, [r0, :64]!
|
||||
vst1.8 {q0}, [r0]!
|
||||
subs r4, r4, #1
|
||||
beq .Lxtsdecout
|
||||
next_tweak q3, q3, q7, q6
|
||||
|
@ -278,14 +278,15 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
u8 *tsrc = walk.src.virt.addr;
|
||||
|
||||
/*
|
||||
* Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
|
||||
* to tell aes_ctr_encrypt() to only read half a block.
|
||||
* Tell aes_ctr_encrypt() to process a tail block.
|
||||
*/
|
||||
blocks = (nbytes <= 8) ? -1 : 1;
|
||||
blocks = -1;
|
||||
|
||||
ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
|
||||
ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
|
||||
num_rounds(ctx), blocks, walk.iv);
|
||||
memcpy(tdst, tail, nbytes);
|
||||
if (tdst != tsrc)
|
||||
memcpy(tdst, tsrc, nbytes);
|
||||
crypto_xor(tdst, tail, nbytes);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
@ -345,7 +346,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -361,7 +361,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -378,7 +377,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -396,7 +394,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
|
179
arch/arm/crypto/aes-cipher-core.S
Normal file
179
arch/arm/crypto/aes-cipher-core.S
Normal file
@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Scalar AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd.
|
||||
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
.align 5
|
||||
|
||||
rk .req r0
|
||||
rounds .req r1
|
||||
in .req r2
|
||||
out .req r3
|
||||
ttab .req ip
|
||||
|
||||
t0 .req lr
|
||||
t1 .req r2
|
||||
t2 .req r3
|
||||
|
||||
.macro __select, out, in, idx
|
||||
.if __LINUX_ARM_ARCH__ < 7
|
||||
and \out, \in, #0xff << (8 * \idx)
|
||||
.else
|
||||
ubfx \out, \in, #(8 * \idx), #8
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro __load, out, in, idx
|
||||
.if __LINUX_ARM_ARCH__ < 7 && \idx > 0
|
||||
ldr \out, [ttab, \in, lsr #(8 * \idx) - 2]
|
||||
.else
|
||||
ldr \out, [ttab, \in, lsl #2]
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
|
||||
__select \out0, \in0, 0
|
||||
__select t0, \in1, 1
|
||||
__load \out0, \out0, 0
|
||||
__load t0, t0, 1
|
||||
|
||||
.if \enc
|
||||
__select \out1, \in1, 0
|
||||
__select t1, \in2, 1
|
||||
.else
|
||||
__select \out1, \in3, 0
|
||||
__select t1, \in0, 1
|
||||
.endif
|
||||
__load \out1, \out1, 0
|
||||
__select t2, \in2, 2
|
||||
__load t1, t1, 1
|
||||
__load t2, t2, 2
|
||||
|
||||
eor \out0, \out0, t0, ror #24
|
||||
|
||||
__select t0, \in3, 3
|
||||
.if \enc
|
||||
__select \t3, \in3, 2
|
||||
__select \t4, \in0, 3
|
||||
.else
|
||||
__select \t3, \in1, 2
|
||||
__select \t4, \in2, 3
|
||||
.endif
|
||||
__load \t3, \t3, 2
|
||||
__load t0, t0, 3
|
||||
__load \t4, \t4, 3
|
||||
|
||||
eor \out1, \out1, t1, ror #24
|
||||
eor \out0, \out0, t2, ror #16
|
||||
ldm rk!, {t1, t2}
|
||||
eor \out1, \out1, \t3, ror #16
|
||||
eor \out0, \out0, t0, ror #8
|
||||
eor \out1, \out1, \t4, ror #8
|
||||
eor \out0, \out0, t1
|
||||
eor \out1, \out1, t2
|
||||
.endm
|
||||
|
||||
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3
|
||||
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
|
||||
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
|
||||
.endm
|
||||
|
||||
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3
|
||||
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
|
||||
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
|
||||
.endm
|
||||
|
||||
.macro __rev, out, in
|
||||
.if __LINUX_ARM_ARCH__ < 6
|
||||
lsl t0, \in, #24
|
||||
and t1, \in, #0xff00
|
||||
and t2, \in, #0xff0000
|
||||
orr \out, t0, \in, lsr #24
|
||||
orr \out, \out, t1, lsl #8
|
||||
orr \out, \out, t2, lsr #8
|
||||
.else
|
||||
rev \out, \in
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro __adrl, out, sym, c
|
||||
.if __LINUX_ARM_ARCH__ < 7
|
||||
ldr\c \out, =\sym
|
||||
.else
|
||||
movw\c \out, #:lower16:\sym
|
||||
movt\c \out, #:upper16:\sym
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro do_crypt, round, ttab, ltab
|
||||
push {r3-r11, lr}
|
||||
|
||||
ldr r4, [in]
|
||||
ldr r5, [in, #4]
|
||||
ldr r6, [in, #8]
|
||||
ldr r7, [in, #12]
|
||||
|
||||
ldm rk!, {r8-r11}
|
||||
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
__rev r4, r4
|
||||
__rev r5, r5
|
||||
__rev r6, r6
|
||||
__rev r7, r7
|
||||
#endif
|
||||
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
|
||||
__adrl ttab, \ttab
|
||||
|
||||
tst rounds, #2
|
||||
bne 1f
|
||||
|
||||
0: \round r8, r9, r10, r11, r4, r5, r6, r7
|
||||
\round r4, r5, r6, r7, r8, r9, r10, r11
|
||||
|
||||
1: subs rounds, rounds, #4
|
||||
\round r8, r9, r10, r11, r4, r5, r6, r7
|
||||
__adrl ttab, \ltab, ls
|
||||
\round r4, r5, r6, r7, r8, r9, r10, r11
|
||||
bhi 0b
|
||||
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
__rev r4, r4
|
||||
__rev r5, r5
|
||||
__rev r6, r6
|
||||
__rev r7, r7
|
||||
#endif
|
||||
|
||||
ldr out, [sp]
|
||||
|
||||
str r4, [out]
|
||||
str r5, [out, #4]
|
||||
str r6, [out, #8]
|
||||
str r7, [out, #12]
|
||||
|
||||
pop {r3-r11, pc}
|
||||
|
||||
.align 3
|
||||
.ltorg
|
||||
.endm
|
||||
|
||||
ENTRY(__aes_arm_encrypt)
|
||||
do_crypt fround, crypto_ft_tab, crypto_fl_tab
|
||||
ENDPROC(__aes_arm_encrypt)
|
||||
|
||||
ENTRY(__aes_arm_decrypt)
|
||||
do_crypt iround, crypto_it_tab, crypto_il_tab
|
||||
ENDPROC(__aes_arm_decrypt)
|
74
arch/arm/crypto/aes-cipher-glue.c
Normal file
74
arch/arm/crypto/aes-cipher-glue.c
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Scalar AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd.
|
||||
* Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <crypto/aes.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
|
||||
EXPORT_SYMBOL(__aes_arm_encrypt);
|
||||
|
||||
asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
|
||||
EXPORT_SYMBOL(__aes_arm_decrypt);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
|
||||
__aes_arm_encrypt(ctx->key_enc, rounds, in, out);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
|
||||
__aes_arm_decrypt(ctx->key_dec, rounds, in, out);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-arm",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
|
||||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cra_cipher.cia_setkey = crypto_aes_set_key,
|
||||
.cra_cipher.cia_encrypt = aes_encrypt,
|
||||
.cra_cipher.cia_decrypt = aes_decrypt,
|
||||
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
.cra_alignmask = 3,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Scalar AES cipher for ARM");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("aes");
|
1023
arch/arm/crypto/aes-neonbs-core.S
Normal file
1023
arch/arm/crypto/aes-neonbs-core.S
Normal file
File diff suppressed because it is too large
Load Diff
406
arch/arm/crypto/aes-neonbs-glue.c
Normal file
406
arch/arm/crypto/aes-neonbs-glue.c
Normal file
@ -0,0 +1,406 @@
|
||||
/*
|
||||
* Bit sliced AES using NEON instructions
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/cbc.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
|
||||
asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
|
||||
|
||||
asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks);
|
||||
asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks);
|
||||
|
||||
asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
|
||||
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 ctr[], u8 final[]);
|
||||
|
||||
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
|
||||
asmlinkage void __aes_arm_encrypt(const u32 rk[], int rounds, const u8 in[],
|
||||
u8 out[]);
|
||||
|
||||
struct aesbs_ctx {
|
||||
int rounds;
|
||||
u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32] __aligned(AES_BLOCK_SIZE);
|
||||
};
|
||||
|
||||
struct aesbs_cbc_ctx {
|
||||
struct aesbs_ctx key;
|
||||
u32 enc[AES_MAX_KEYLENGTH_U32];
|
||||
};
|
||||
|
||||
struct aesbs_xts_ctx {
|
||||
struct aesbs_ctx key;
|
||||
u32 twkey[AES_MAX_KEYLENGTH_U32];
|
||||
};
|
||||
|
||||
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ctx->rounds = 6 + key_len / 4;
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __ecb_crypt(struct skcipher_request *req,
|
||||
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks))
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
|
||||
ctx->rounds, blocks);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __ecb_crypt(req, aesbs_ecb_encrypt);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __ecb_crypt(req, aesbs_ecb_decrypt);
|
||||
}
|
||||
|
||||
static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ctx->key.rounds = 6 + key_len / 4;
|
||||
|
||||
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cbc_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
__aes_arm_encrypt(ctx->enc, ctx->key.rounds, src, dst);
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return crypto_cbc_encrypt_walk(req, cbc_encrypt_one);
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key.rk, ctx->key.rounds, blocks,
|
||||
walk.iv);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
|
||||
|
||||
if (walk.nbytes < walk.total) {
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
final = NULL;
|
||||
}
|
||||
|
||||
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->rk, ctx->rounds, blocks, walk.iv, final);
|
||||
|
||||
if (final) {
|
||||
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
|
||||
if (dst != src)
|
||||
memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
|
||||
crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
|
||||
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
break;
|
||||
}
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key_len /= 2;
|
||||
err = crypto_aes_expand_key(&rk, in_key + key_len, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey));
|
||||
|
||||
return aesbs_setkey(tfm, in_key, key_len);
|
||||
}
|
||||
|
||||
static int __xts_crypt(struct skcipher_request *req,
|
||||
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]))
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
__aes_arm_encrypt(ctx->twkey, ctx->key.rounds, walk.iv, walk.iv);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
|
||||
ctx->key.rounds, blocks, walk.iv);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, aesbs_xts_encrypt);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, aesbs_xts_decrypt);
|
||||
}
|
||||
|
||||
static struct skcipher_alg aes_algs[] = { {
|
||||
.base.cra_name = "__ecb(aes)",
|
||||
.base.cra_driver_name = "__ecb-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "__cbc(aes)",
|
||||
.base.cra_driver_name = "__cbc-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_cbc_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "__ctr(aes)",
|
||||
.base.cra_driver_name = "__ctr-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base.cra_name = "__xts(aes)",
|
||||
.base.cra_driver_name = "__xts-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_xts_setkey,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
} };
|
||||
|
||||
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
|
||||
|
||||
static void aes_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
|
||||
if (aes_simd_algs[i])
|
||||
simd_skcipher_free(aes_simd_algs[i]);
|
||||
|
||||
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
}
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
struct simd_skcipher_alg *simd;
|
||||
const char *basename;
|
||||
const char *algname;
|
||||
const char *drvname;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
if (!(elf_hwcap & HWCAP_NEON))
|
||||
return -ENODEV;
|
||||
|
||||
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
|
||||
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
|
||||
continue;
|
||||
|
||||
algname = aes_algs[i].base.cra_name + 2;
|
||||
drvname = aes_algs[i].base.cra_driver_name + 2;
|
||||
basename = aes_algs[i].base.cra_driver_name;
|
||||
simd = simd_skcipher_create_compat(algname, drvname, basename);
|
||||
err = PTR_ERR(simd);
|
||||
if (IS_ERR(simd))
|
||||
goto unregister_simds;
|
||||
|
||||
aes_simd_algs[i] = simd;
|
||||
}
|
||||
return 0;
|
||||
|
||||
unregister_simds:
|
||||
aes_exit();
|
||||
return err;
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_exit);
|
@ -1,98 +0,0 @@
|
||||
/*
|
||||
* Glue Code for the asm optimized version of the AES Cipher Algorithm
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/aes.h>
|
||||
|
||||
#include "aes_glue.h"
|
||||
|
||||
EXPORT_SYMBOL(AES_encrypt);
|
||||
EXPORT_SYMBOL(AES_decrypt);
|
||||
EXPORT_SYMBOL(private_AES_set_encrypt_key);
|
||||
EXPORT_SYMBOL(private_AES_set_decrypt_key);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
|
||||
AES_encrypt(src, dst, &ctx->enc_key);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
|
||||
AES_decrypt(src, dst, &ctx->dec_key);
|
||||
}
|
||||
|
||||
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct AES_CTX *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
switch (key_len) {
|
||||
case AES_KEYSIZE_128:
|
||||
key_len = 128;
|
||||
break;
|
||||
case AES_KEYSIZE_192:
|
||||
key_len = 192;
|
||||
break;
|
||||
case AES_KEYSIZE_256:
|
||||
key_len = 256;
|
||||
break;
|
||||
default:
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, key_len, &ctx->enc_key) == -1) {
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
/* private_AES_set_decrypt_key expects an encryption key as input */
|
||||
ctx->dec_key = ctx->enc_key;
|
||||
if (private_AES_set_decrypt_key(in_key, key_len, &ctx->dec_key) == -1) {
|
||||
tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-asm",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct AES_CTX),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = aes_set_key,
|
||||
.cia_encrypt = aes_encrypt,
|
||||
.cia_decrypt = aes_decrypt
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_CRYPTO("aes");
|
||||
MODULE_ALIAS_CRYPTO("aes-asm");
|
||||
MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");
|
@ -1,19 +0,0 @@
|
||||
|
||||
#define AES_MAXNR 14
|
||||
|
||||
struct AES_KEY {
|
||||
unsigned int rd_key[4 * (AES_MAXNR + 1)];
|
||||
int rounds;
|
||||
};
|
||||
|
||||
struct AES_CTX {
|
||||
struct AES_KEY enc_key;
|
||||
struct AES_KEY dec_key;
|
||||
};
|
||||
|
||||
asmlinkage void AES_encrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
|
||||
asmlinkage void AES_decrypt(const u8 *in, u8 *out, struct AES_KEY *ctx);
|
||||
asmlinkage int private_AES_set_decrypt_key(const unsigned char *userKey,
|
||||
const int bits, struct AES_KEY *key);
|
||||
asmlinkage int private_AES_set_encrypt_key(const unsigned char *userKey,
|
||||
const int bits, struct AES_KEY *key);
|
File diff suppressed because it is too large
Load Diff
@ -1,367 +0,0 @@
|
||||
/*
|
||||
* linux/arch/arm/crypto/aesbs-glue.c - glue code for NEON bit sliced AES
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/cbc.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/module.h>
|
||||
#include <crypto/xts.h>
|
||||
|
||||
#include "aes_glue.h"
|
||||
|
||||
#define BIT_SLICED_KEY_MAXSIZE (128 * (AES_MAXNR - 1) + 2 * AES_BLOCK_SIZE)
|
||||
|
||||
struct BS_KEY {
|
||||
struct AES_KEY rk;
|
||||
int converted;
|
||||
u8 __aligned(8) bs[BIT_SLICED_KEY_MAXSIZE];
|
||||
} __aligned(8);
|
||||
|
||||
asmlinkage void bsaes_enc_key_convert(u8 out[], struct AES_KEY const *in);
|
||||
asmlinkage void bsaes_dec_key_convert(u8 out[], struct AES_KEY const *in);
|
||||
|
||||
asmlinkage void bsaes_cbc_encrypt(u8 const in[], u8 out[], u32 bytes,
|
||||
struct BS_KEY *key, u8 iv[]);
|
||||
|
||||
asmlinkage void bsaes_ctr32_encrypt_blocks(u8 const in[], u8 out[], u32 blocks,
|
||||
struct BS_KEY *key, u8 const iv[]);
|
||||
|
||||
asmlinkage void bsaes_xts_encrypt(u8 const in[], u8 out[], u32 bytes,
|
||||
struct BS_KEY *key, u8 tweak[]);
|
||||
|
||||
asmlinkage void bsaes_xts_decrypt(u8 const in[], u8 out[], u32 bytes,
|
||||
struct BS_KEY *key, u8 tweak[]);
|
||||
|
||||
struct aesbs_cbc_ctx {
|
||||
struct AES_KEY enc;
|
||||
struct BS_KEY dec;
|
||||
};
|
||||
|
||||
struct aesbs_ctr_ctx {
|
||||
struct BS_KEY enc;
|
||||
};
|
||||
|
||||
struct aesbs_xts_ctx {
|
||||
struct BS_KEY enc;
|
||||
struct BS_KEY dec;
|
||||
struct AES_KEY twkey;
|
||||
};
|
||||
|
||||
static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int bits = key_len * 8;
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) {
|
||||
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->dec.rk = ctx->enc;
|
||||
private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
|
||||
ctx->dec.converted = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int bits = key_len * 8;
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
|
||||
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->enc.converted = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int bits = key_len * 4;
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
|
||||
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->dec.rk = ctx->enc.rk;
|
||||
private_AES_set_decrypt_key(in_key, bits, &ctx->dec.rk);
|
||||
private_AES_set_encrypt_key(in_key + key_len / 2, bits, &ctx->twkey);
|
||||
ctx->enc.converted = ctx->dec.converted = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm,
|
||||
const u8 *src, u8 *dst)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
AES_encrypt(src, dst, &ctx->enc);
|
||||
}
|
||||
|
||||
static int aesbs_cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one);
|
||||
}
|
||||
|
||||
static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm,
|
||||
const u8 *src, u8 *dst)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
AES_decrypt(src, dst, &ctx->dec.rk);
|
||||
}
|
||||
|
||||
static int aesbs_cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
unsigned int nbytes;
|
||||
int err;
|
||||
|
||||
for (err = skcipher_walk_virt(&walk, req, false);
|
||||
(nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) {
|
||||
u32 blocks = nbytes / AES_BLOCK_SIZE;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 *src = walk.src.virt.addr;
|
||||
u8 *iv = walk.iv;
|
||||
|
||||
if (blocks >= 8) {
|
||||
kernel_neon_begin();
|
||||
bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv);
|
||||
kernel_neon_end();
|
||||
nbytes %= AES_BLOCK_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
nbytes = crypto_cbc_decrypt_blocks(&walk, tfm,
|
||||
aesbs_decrypt_one);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void inc_be128_ctr(__be32 ctr[], u32 addend)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 3; i >= 0; i--, addend = 1) {
|
||||
u32 n = be32_to_cpu(ctr[i]) + addend;
|
||||
|
||||
ctr[i] = cpu_to_be32(n);
|
||||
if (n >= addend)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int aesbs_ctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u32 blocks;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) {
|
||||
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
|
||||
__be32 *ctr = (__be32 *)walk.iv;
|
||||
u32 headroom = UINT_MAX - be32_to_cpu(ctr[3]);
|
||||
|
||||
/* avoid 32 bit counter overflow in the NEON code */
|
||||
if (unlikely(headroom < blocks)) {
|
||||
blocks = headroom + 1;
|
||||
tail = walk.nbytes - blocks * AES_BLOCK_SIZE;
|
||||
}
|
||||
kernel_neon_begin();
|
||||
bsaes_ctr32_encrypt_blocks(walk.src.virt.addr,
|
||||
walk.dst.virt.addr, blocks,
|
||||
&ctx->enc, walk.iv);
|
||||
kernel_neon_end();
|
||||
inc_be128_ctr(ctr, blocks);
|
||||
|
||||
err = skcipher_walk_done(&walk, tail);
|
||||
}
|
||||
if (walk.nbytes) {
|
||||
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 ks[AES_BLOCK_SIZE];
|
||||
|
||||
AES_encrypt(walk.iv, ks, &ctx->enc.rk);
|
||||
if (tdst != tsrc)
|
||||
memcpy(tdst, tsrc, walk.nbytes);
|
||||
crypto_xor(tdst, ks, walk.nbytes);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
/* generate the initial tweak */
|
||||
AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
|
||||
|
||||
while (walk.nbytes) {
|
||||
kernel_neon_begin();
|
||||
bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
|
||||
walk.nbytes, &ctx->enc, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
/* generate the initial tweak */
|
||||
AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
|
||||
|
||||
while (walk.nbytes) {
|
||||
kernel_neon_begin();
|
||||
bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
|
||||
walk.nbytes, &ctx->dec, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct skcipher_alg aesbs_algs[] = { {
|
||||
.base = {
|
||||
.cra_name = "__cbc(aes)",
|
||||
.cra_driver_name = "__cbc-aes-neonbs",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_cbc_set_key,
|
||||
.encrypt = aesbs_cbc_encrypt,
|
||||
.decrypt = aesbs_cbc_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__ctr(aes)",
|
||||
.cra_driver_name = "__ctr-aes-neonbs",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_ctr_set_key,
|
||||
.encrypt = aesbs_ctr_encrypt,
|
||||
.decrypt = aesbs_ctr_encrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__xts(aes)",
|
||||
.cra_driver_name = "__xts-aes-neonbs",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_xts_set_key,
|
||||
.encrypt = aesbs_xts_encrypt,
|
||||
.decrypt = aesbs_xts_decrypt,
|
||||
} };
|
||||
|
||||
struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)];
|
||||
|
||||
static void aesbs_mod_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++)
|
||||
simd_skcipher_free(aesbs_simd_algs[i]);
|
||||
|
||||
crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
|
||||
}
|
||||
|
||||
static int __init aesbs_mod_init(void)
|
||||
{
|
||||
struct simd_skcipher_alg *simd;
|
||||
const char *basename;
|
||||
const char *algname;
|
||||
const char *drvname;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
if (!cpu_has_neon())
|
||||
return -ENODEV;
|
||||
|
||||
err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) {
|
||||
algname = aesbs_algs[i].base.cra_name + 2;
|
||||
drvname = aesbs_algs[i].base.cra_driver_name + 2;
|
||||
basename = aesbs_algs[i].base.cra_driver_name;
|
||||
simd = simd_skcipher_create_compat(algname, drvname, basename);
|
||||
err = PTR_ERR(simd);
|
||||
if (IS_ERR(simd))
|
||||
goto unregister_simds;
|
||||
|
||||
aesbs_simd_algs[i] = simd;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unregister_simds:
|
||||
aesbs_mod_exit();
|
||||
return err;
|
||||
}
|
||||
|
||||
module_init(aesbs_mod_init);
|
||||
module_exit(aesbs_mod_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Bit sliced AES in CBC/CTR/XTS modes using NEON");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL");
|
File diff suppressed because it is too large
Load Diff
523
arch/arm/crypto/chacha20-neon-core.S
Normal file
523
arch/arm/crypto/chacha20-neon-core.S
Normal file
@ -0,0 +1,523 @@
|
||||
/*
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
.fpu neon
|
||||
.align 5
|
||||
|
||||
ENTRY(chacha20_block_xor_neon)
|
||||
// r0: Input state matrix, s
|
||||
// r1: 1 data block output, o
|
||||
// r2: 1 data block input, i
|
||||
|
||||
//
|
||||
// This function encrypts one ChaCha20 block by loading the state matrix
|
||||
// in four NEON registers. It performs matrix operation on four words in
|
||||
// parallel, but requireds shuffling to rearrange the words after each
|
||||
// round.
|
||||
//
|
||||
|
||||
// x0..3 = s0..3
|
||||
add ip, r0, #0x20
|
||||
vld1.32 {q0-q1}, [r0]
|
||||
vld1.32 {q2-q3}, [ip]
|
||||
|
||||
vmov q8, q0
|
||||
vmov q9, q1
|
||||
vmov q10, q2
|
||||
vmov q11, q3
|
||||
|
||||
mov r3, #10
|
||||
|
||||
.Ldoubleround:
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||
vadd.i32 q0, q0, q1
|
||||
veor q4, q3, q0
|
||||
vshl.u32 q3, q4, #16
|
||||
vsri.u32 q3, q4, #16
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
|
||||
vadd.i32 q2, q2, q3
|
||||
veor q4, q1, q2
|
||||
vshl.u32 q1, q4, #12
|
||||
vsri.u32 q1, q4, #20
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
|
||||
vadd.i32 q0, q0, q1
|
||||
veor q4, q3, q0
|
||||
vshl.u32 q3, q4, #8
|
||||
vsri.u32 q3, q4, #24
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
|
||||
vadd.i32 q2, q2, q3
|
||||
veor q4, q1, q2
|
||||
vshl.u32 q1, q4, #7
|
||||
vsri.u32 q1, q4, #25
|
||||
|
||||
// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
|
||||
vext.8 q1, q1, q1, #4
|
||||
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
|
||||
vext.8 q2, q2, q2, #8
|
||||
// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
|
||||
vext.8 q3, q3, q3, #12
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||
vadd.i32 q0, q0, q1
|
||||
veor q4, q3, q0
|
||||
vshl.u32 q3, q4, #16
|
||||
vsri.u32 q3, q4, #16
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
|
||||
vadd.i32 q2, q2, q3
|
||||
veor q4, q1, q2
|
||||
vshl.u32 q1, q4, #12
|
||||
vsri.u32 q1, q4, #20
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
|
||||
vadd.i32 q0, q0, q1
|
||||
veor q4, q3, q0
|
||||
vshl.u32 q3, q4, #8
|
||||
vsri.u32 q3, q4, #24
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
|
||||
vadd.i32 q2, q2, q3
|
||||
veor q4, q1, q2
|
||||
vshl.u32 q1, q4, #7
|
||||
vsri.u32 q1, q4, #25
|
||||
|
||||
// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
|
||||
vext.8 q1, q1, q1, #12
|
||||
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
|
||||
vext.8 q2, q2, q2, #8
|
||||
// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
|
||||
vext.8 q3, q3, q3, #4
|
||||
|
||||
subs r3, r3, #1
|
||||
bne .Ldoubleround
|
||||
|
||||
add ip, r2, #0x20
|
||||
vld1.8 {q4-q5}, [r2]
|
||||
vld1.8 {q6-q7}, [ip]
|
||||
|
||||
// o0 = i0 ^ (x0 + s0)
|
||||
vadd.i32 q0, q0, q8
|
||||
veor q0, q0, q4
|
||||
|
||||
// o1 = i1 ^ (x1 + s1)
|
||||
vadd.i32 q1, q1, q9
|
||||
veor q1, q1, q5
|
||||
|
||||
// o2 = i2 ^ (x2 + s2)
|
||||
vadd.i32 q2, q2, q10
|
||||
veor q2, q2, q6
|
||||
|
||||
// o3 = i3 ^ (x3 + s3)
|
||||
vadd.i32 q3, q3, q11
|
||||
veor q3, q3, q7
|
||||
|
||||
add ip, r1, #0x20
|
||||
vst1.8 {q0-q1}, [r1]
|
||||
vst1.8 {q2-q3}, [ip]
|
||||
|
||||
bx lr
|
||||
ENDPROC(chacha20_block_xor_neon)
|
||||
|
||||
.align 5
|
||||
ENTRY(chacha20_4block_xor_neon)
|
||||
push {r4-r6, lr}
|
||||
mov ip, sp // preserve the stack pointer
|
||||
sub r3, sp, #0x20 // allocate a 32 byte buffer
|
||||
bic r3, r3, #0x1f // aligned to 32 bytes
|
||||
mov sp, r3
|
||||
|
||||
// r0: Input state matrix, s
|
||||
// r1: 4 data blocks output, o
|
||||
// r2: 4 data blocks input, i
|
||||
|
||||
//
|
||||
// This function encrypts four consecutive ChaCha20 blocks by loading
|
||||
// the state matrix in NEON registers four times. The algorithm performs
|
||||
// each operation on the corresponding word of each state matrix, hence
|
||||
// requires no word shuffling. For final XORing step we transpose the
|
||||
// matrix by interleaving 32- and then 64-bit words, which allows us to
|
||||
// do XOR in NEON registers.
|
||||
//
|
||||
|
||||
// x0..15[0-3] = s0..3[0..3]
|
||||
add r3, r0, #0x20
|
||||
vld1.32 {q0-q1}, [r0]
|
||||
vld1.32 {q2-q3}, [r3]
|
||||
|
||||
adr r3, CTRINC
|
||||
vdup.32 q15, d7[1]
|
||||
vdup.32 q14, d7[0]
|
||||
vld1.32 {q11}, [r3, :128]
|
||||
vdup.32 q13, d6[1]
|
||||
vdup.32 q12, d6[0]
|
||||
vadd.i32 q12, q12, q11 // x12 += counter values 0-3
|
||||
vdup.32 q11, d5[1]
|
||||
vdup.32 q10, d5[0]
|
||||
vdup.32 q9, d4[1]
|
||||
vdup.32 q8, d4[0]
|
||||
vdup.32 q7, d3[1]
|
||||
vdup.32 q6, d3[0]
|
||||
vdup.32 q5, d2[1]
|
||||
vdup.32 q4, d2[0]
|
||||
vdup.32 q3, d1[1]
|
||||
vdup.32 q2, d1[0]
|
||||
vdup.32 q1, d0[1]
|
||||
vdup.32 q0, d0[0]
|
||||
|
||||
mov r3, #10
|
||||
|
||||
.Ldoubleround4:
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
|
||||
vadd.i32 q0, q0, q4
|
||||
vadd.i32 q1, q1, q5
|
||||
vadd.i32 q2, q2, q6
|
||||
vadd.i32 q3, q3, q7
|
||||
|
||||
veor q12, q12, q0
|
||||
veor q13, q13, q1
|
||||
veor q14, q14, q2
|
||||
veor q15, q15, q3
|
||||
|
||||
vrev32.16 q12, q12
|
||||
vrev32.16 q13, q13
|
||||
vrev32.16 q14, q14
|
||||
vrev32.16 q15, q15
|
||||
|
||||
// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
|
||||
// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
|
||||
// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
|
||||
// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
|
||||
vadd.i32 q8, q8, q12
|
||||
vadd.i32 q9, q9, q13
|
||||
vadd.i32 q10, q10, q14
|
||||
vadd.i32 q11, q11, q15
|
||||
|
||||
vst1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
veor q8, q4, q8
|
||||
veor q9, q5, q9
|
||||
vshl.u32 q4, q8, #12
|
||||
vshl.u32 q5, q9, #12
|
||||
vsri.u32 q4, q8, #20
|
||||
vsri.u32 q5, q9, #20
|
||||
|
||||
veor q8, q6, q10
|
||||
veor q9, q7, q11
|
||||
vshl.u32 q6, q8, #12
|
||||
vshl.u32 q7, q9, #12
|
||||
vsri.u32 q6, q8, #20
|
||||
vsri.u32 q7, q9, #20
|
||||
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
|
||||
vadd.i32 q0, q0, q4
|
||||
vadd.i32 q1, q1, q5
|
||||
vadd.i32 q2, q2, q6
|
||||
vadd.i32 q3, q3, q7
|
||||
|
||||
veor q8, q12, q0
|
||||
veor q9, q13, q1
|
||||
vshl.u32 q12, q8, #8
|
||||
vshl.u32 q13, q9, #8
|
||||
vsri.u32 q12, q8, #24
|
||||
vsri.u32 q13, q9, #24
|
||||
|
||||
veor q8, q14, q2
|
||||
veor q9, q15, q3
|
||||
vshl.u32 q14, q8, #8
|
||||
vshl.u32 q15, q9, #8
|
||||
vsri.u32 q14, q8, #24
|
||||
vsri.u32 q15, q9, #24
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
|
||||
// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
|
||||
// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
|
||||
// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
|
||||
vadd.i32 q8, q8, q12
|
||||
vadd.i32 q9, q9, q13
|
||||
vadd.i32 q10, q10, q14
|
||||
vadd.i32 q11, q11, q15
|
||||
|
||||
vst1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
veor q8, q4, q8
|
||||
veor q9, q5, q9
|
||||
vshl.u32 q4, q8, #7
|
||||
vshl.u32 q5, q9, #7
|
||||
vsri.u32 q4, q8, #25
|
||||
vsri.u32 q5, q9, #25
|
||||
|
||||
veor q8, q6, q10
|
||||
veor q9, q7, q11
|
||||
vshl.u32 q6, q8, #7
|
||||
vshl.u32 q7, q9, #7
|
||||
vsri.u32 q6, q8, #25
|
||||
vsri.u32 q7, q9, #25
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
|
||||
vadd.i32 q0, q0, q5
|
||||
vadd.i32 q1, q1, q6
|
||||
vadd.i32 q2, q2, q7
|
||||
vadd.i32 q3, q3, q4
|
||||
|
||||
veor q15, q15, q0
|
||||
veor q12, q12, q1
|
||||
veor q13, q13, q2
|
||||
veor q14, q14, q3
|
||||
|
||||
vrev32.16 q15, q15
|
||||
vrev32.16 q12, q12
|
||||
vrev32.16 q13, q13
|
||||
vrev32.16 q14, q14
|
||||
|
||||
// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
|
||||
// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
|
||||
// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
|
||||
// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
|
||||
vadd.i32 q10, q10, q15
|
||||
vadd.i32 q11, q11, q12
|
||||
vadd.i32 q8, q8, q13
|
||||
vadd.i32 q9, q9, q14
|
||||
|
||||
vst1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
veor q8, q7, q8
|
||||
veor q9, q4, q9
|
||||
vshl.u32 q7, q8, #12
|
||||
vshl.u32 q4, q9, #12
|
||||
vsri.u32 q7, q8, #20
|
||||
vsri.u32 q4, q9, #20
|
||||
|
||||
veor q8, q5, q10
|
||||
veor q9, q6, q11
|
||||
vshl.u32 q5, q8, #12
|
||||
vshl.u32 q6, q9, #12
|
||||
vsri.u32 q5, q8, #20
|
||||
vsri.u32 q6, q9, #20
|
||||
|
||||
// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
|
||||
vadd.i32 q0, q0, q5
|
||||
vadd.i32 q1, q1, q6
|
||||
vadd.i32 q2, q2, q7
|
||||
vadd.i32 q3, q3, q4
|
||||
|
||||
veor q8, q15, q0
|
||||
veor q9, q12, q1
|
||||
vshl.u32 q15, q8, #8
|
||||
vshl.u32 q12, q9, #8
|
||||
vsri.u32 q15, q8, #24
|
||||
vsri.u32 q12, q9, #24
|
||||
|
||||
veor q8, q13, q2
|
||||
veor q9, q14, q3
|
||||
vshl.u32 q13, q8, #8
|
||||
vshl.u32 q14, q9, #8
|
||||
vsri.u32 q13, q8, #24
|
||||
vsri.u32 q14, q9, #24
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
|
||||
// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
|
||||
// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
|
||||
// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
|
||||
vadd.i32 q10, q10, q15
|
||||
vadd.i32 q11, q11, q12
|
||||
vadd.i32 q8, q8, q13
|
||||
vadd.i32 q9, q9, q14
|
||||
|
||||
vst1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
veor q8, q7, q8
|
||||
veor q9, q4, q9
|
||||
vshl.u32 q7, q8, #7
|
||||
vshl.u32 q4, q9, #7
|
||||
vsri.u32 q7, q8, #25
|
||||
vsri.u32 q4, q9, #25
|
||||
|
||||
veor q8, q5, q10
|
||||
veor q9, q6, q11
|
||||
vshl.u32 q5, q8, #7
|
||||
vshl.u32 q6, q9, #7
|
||||
vsri.u32 q5, q8, #25
|
||||
vsri.u32 q6, q9, #25
|
||||
|
||||
subs r3, r3, #1
|
||||
beq 0f
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
b .Ldoubleround4
|
||||
|
||||
// x0[0-3] += s0[0]
|
||||
// x1[0-3] += s0[1]
|
||||
// x2[0-3] += s0[2]
|
||||
// x3[0-3] += s0[3]
|
||||
0: ldmia r0!, {r3-r6}
|
||||
vdup.32 q8, r3
|
||||
vdup.32 q9, r4
|
||||
vadd.i32 q0, q0, q8
|
||||
vadd.i32 q1, q1, q9
|
||||
vdup.32 q8, r5
|
||||
vdup.32 q9, r6
|
||||
vadd.i32 q2, q2, q8
|
||||
vadd.i32 q3, q3, q9
|
||||
|
||||
// x4[0-3] += s1[0]
|
||||
// x5[0-3] += s1[1]
|
||||
// x6[0-3] += s1[2]
|
||||
// x7[0-3] += s1[3]
|
||||
ldmia r0!, {r3-r6}
|
||||
vdup.32 q8, r3
|
||||
vdup.32 q9, r4
|
||||
vadd.i32 q4, q4, q8
|
||||
vadd.i32 q5, q5, q9
|
||||
vdup.32 q8, r5
|
||||
vdup.32 q9, r6
|
||||
vadd.i32 q6, q6, q8
|
||||
vadd.i32 q7, q7, q9
|
||||
|
||||
// interleave 32-bit words in state n, n+1
|
||||
vzip.32 q0, q1
|
||||
vzip.32 q2, q3
|
||||
vzip.32 q4, q5
|
||||
vzip.32 q6, q7
|
||||
|
||||
// interleave 64-bit words in state n, n+2
|
||||
vswp d1, d4
|
||||
vswp d3, d6
|
||||
vswp d9, d12
|
||||
vswp d11, d14
|
||||
|
||||
// xor with corresponding input, write to output
|
||||
vld1.8 {q8-q9}, [r2]!
|
||||
veor q8, q8, q0
|
||||
veor q9, q9, q4
|
||||
vst1.8 {q8-q9}, [r1]!
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
// x8[0-3] += s2[0]
|
||||
// x9[0-3] += s2[1]
|
||||
// x10[0-3] += s2[2]
|
||||
// x11[0-3] += s2[3]
|
||||
ldmia r0!, {r3-r6}
|
||||
vdup.32 q0, r3
|
||||
vdup.32 q4, r4
|
||||
vadd.i32 q8, q8, q0
|
||||
vadd.i32 q9, q9, q4
|
||||
vdup.32 q0, r5
|
||||
vdup.32 q4, r6
|
||||
vadd.i32 q10, q10, q0
|
||||
vadd.i32 q11, q11, q4
|
||||
|
||||
// x12[0-3] += s3[0]
|
||||
// x13[0-3] += s3[1]
|
||||
// x14[0-3] += s3[2]
|
||||
// x15[0-3] += s3[3]
|
||||
ldmia r0!, {r3-r6}
|
||||
vdup.32 q0, r3
|
||||
vdup.32 q4, r4
|
||||
adr r3, CTRINC
|
||||
vadd.i32 q12, q12, q0
|
||||
vld1.32 {q0}, [r3, :128]
|
||||
vadd.i32 q13, q13, q4
|
||||
vadd.i32 q12, q12, q0 // x12 += counter values 0-3
|
||||
|
||||
vdup.32 q0, r5
|
||||
vdup.32 q4, r6
|
||||
vadd.i32 q14, q14, q0
|
||||
vadd.i32 q15, q15, q4
|
||||
|
||||
// interleave 32-bit words in state n, n+1
|
||||
vzip.32 q8, q9
|
||||
vzip.32 q10, q11
|
||||
vzip.32 q12, q13
|
||||
vzip.32 q14, q15
|
||||
|
||||
// interleave 64-bit words in state n, n+2
|
||||
vswp d17, d20
|
||||
vswp d19, d22
|
||||
vswp d25, d28
|
||||
vswp d27, d30
|
||||
|
||||
vmov q4, q1
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q8
|
||||
veor q1, q1, q12
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q2
|
||||
veor q1, q1, q6
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q10
|
||||
veor q1, q1, q14
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q4
|
||||
veor q1, q1, q5
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q9
|
||||
veor q1, q1, q13
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q3
|
||||
veor q1, q1, q7
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]
|
||||
veor q0, q0, q11
|
||||
veor q1, q1, q15
|
||||
vst1.8 {q0-q1}, [r1]
|
||||
|
||||
mov sp, ip
|
||||
pop {r4-r6, pc}
|
||||
ENDPROC(chacha20_4block_xor_neon)
|
||||
|
||||
.align 4
|
||||
CTRINC: .word 0, 1, 2, 3
|
127
arch/arm/crypto/chacha20-neon-glue.c
Normal file
127
arch/arm/crypto/chacha20-neon-glue.c
Normal file
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/chacha20.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||
|
||||
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes)
|
||||
{
|
||||
u8 buf[CHACHA20_BLOCK_SIZE];
|
||||
|
||||
while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
|
||||
chacha20_4block_xor_neon(state, dst, src);
|
||||
bytes -= CHACHA20_BLOCK_SIZE * 4;
|
||||
src += CHACHA20_BLOCK_SIZE * 4;
|
||||
dst += CHACHA20_BLOCK_SIZE * 4;
|
||||
state[12] += 4;
|
||||
}
|
||||
while (bytes >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_block_xor_neon(state, dst, src);
|
||||
bytes -= CHACHA20_BLOCK_SIZE;
|
||||
src += CHACHA20_BLOCK_SIZE;
|
||||
dst += CHACHA20_BLOCK_SIZE;
|
||||
state[12]++;
|
||||
}
|
||||
if (bytes) {
|
||||
memcpy(buf, src, bytes);
|
||||
chacha20_block_xor_neon(state, buf, buf);
|
||||
memcpy(dst, buf, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static int chacha20_neon(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
|
||||
return crypto_chacha20_crypt(req);
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_chacha20_init(state, ctx, walk.iv);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
nbytes);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct skcipher_alg alg = {
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA20_KEY_SIZE,
|
||||
.max_keysize = CHACHA20_KEY_SIZE,
|
||||
.ivsize = CHACHA20_IV_SIZE,
|
||||
.chunksize = CHACHA20_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA20_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = chacha20_neon,
|
||||
.decrypt = chacha20_neon,
|
||||
};
|
||||
|
||||
static int __init chacha20_simd_mod_init(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_NEON))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_skcipher(&alg);
|
||||
}
|
||||
|
||||
static void __exit chacha20_simd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_skcipher(&alg);
|
||||
}
|
||||
|
||||
module_init(chacha20_simd_mod_init);
|
||||
module_exit(chacha20_simd_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("chacha20");
|
@ -516,4 +516,3 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
|
||||
# CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set
|
||||
CONFIG_CRYPTO_CRC32_ARM64=y
|
||||
|
@ -37,10 +37,14 @@ config CRYPTO_CRCT10DIF_ARM64_CE
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CRC32_ARM64_CE
|
||||
tristate "CRC32 and CRC32C digest algorithms using PMULL instructions"
|
||||
depends on KERNEL_MODE_NEON && CRC32
|
||||
tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
|
||||
depends on CRC32
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_AES_ARM64
|
||||
tristate "AES core cipher using scalar instructions"
|
||||
select CRYPTO_AES
|
||||
|
||||
config CRYPTO_AES_ARM64_CE
|
||||
tristate "AES core cipher using ARMv8 Crypto Extensions"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
@ -67,9 +71,17 @@ config CRYPTO_AES_ARM64_NEON_BLK
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_SIMD
|
||||
|
||||
config CRYPTO_CRC32_ARM64
|
||||
tristate "CRC32 and CRC32C using optional ARMv8 instructions"
|
||||
depends on ARM64
|
||||
select CRYPTO_HASH
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
tristate "NEON accelerated ChaCha20 symmetric cipher"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
|
||||
config CRYPTO_AES_ARM64_BS
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64_NEON_BLK
|
||||
select CRYPTO_SIMD
|
||||
|
||||
endif
|
||||
|
@ -41,15 +41,20 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
|
||||
sha512-arm64-y := sha512-glue.o sha512-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
|
||||
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
|
||||
aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
|
||||
aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
|
||||
|
||||
AFLAGS_aes-ce.o := -DINTERLEAVE=4
|
||||
AFLAGS_aes-neon.o := -DINTERLEAVE=4
|
||||
|
||||
CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o
|
||||
|
||||
CFLAGS_crc32-arm64.o := -mcpu=generic+crc
|
||||
|
||||
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
|
||||
$(call if_changed_rule,cc_o_c)
|
||||
|
||||
|
@ -258,7 +258,6 @@ static struct aead_alg ccm_aes_alg = {
|
||||
.cra_priority = 300,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
|
110
arch/arm64/crypto/aes-cipher-core.S
Normal file
110
arch/arm64/crypto/aes-cipher-core.S
Normal file
@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Scalar AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.text
|
||||
|
||||
rk .req x0
|
||||
out .req x1
|
||||
in .req x2
|
||||
rounds .req x3
|
||||
tt .req x4
|
||||
lt .req x2
|
||||
|
||||
.macro __pair, enc, reg0, reg1, in0, in1e, in1d, shift
|
||||
ubfx \reg0, \in0, #\shift, #8
|
||||
.if \enc
|
||||
ubfx \reg1, \in1e, #\shift, #8
|
||||
.else
|
||||
ubfx \reg1, \in1d, #\shift, #8
|
||||
.endif
|
||||
ldr \reg0, [tt, \reg0, uxtw #2]
|
||||
ldr \reg1, [tt, \reg1, uxtw #2]
|
||||
.endm
|
||||
|
||||
.macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
|
||||
ldp \out0, \out1, [rk], #8
|
||||
|
||||
__pair \enc, w13, w14, \in0, \in1, \in3, 0
|
||||
__pair \enc, w15, w16, \in1, \in2, \in0, 8
|
||||
__pair \enc, w17, w18, \in2, \in3, \in1, 16
|
||||
__pair \enc, \t0, \t1, \in3, \in0, \in2, 24
|
||||
|
||||
eor \out0, \out0, w13
|
||||
eor \out1, \out1, w14
|
||||
eor \out0, \out0, w15, ror #24
|
||||
eor \out1, \out1, w16, ror #24
|
||||
eor \out0, \out0, w17, ror #16
|
||||
eor \out1, \out1, w18, ror #16
|
||||
eor \out0, \out0, \t0, ror #8
|
||||
eor \out1, \out1, \t1, ror #8
|
||||
.endm
|
||||
|
||||
.macro fround, out0, out1, out2, out3, in0, in1, in2, in3
|
||||
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
|
||||
__hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
|
||||
.endm
|
||||
|
||||
.macro iround, out0, out1, out2, out3, in0, in1, in2, in3
|
||||
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
|
||||
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
|
||||
.endm
|
||||
|
||||
.macro do_crypt, round, ttab, ltab
|
||||
ldp w5, w6, [in]
|
||||
ldp w7, w8, [in, #8]
|
||||
ldp w9, w10, [rk], #16
|
||||
ldp w11, w12, [rk, #-8]
|
||||
|
||||
CPU_BE( rev w5, w5 )
|
||||
CPU_BE( rev w6, w6 )
|
||||
CPU_BE( rev w7, w7 )
|
||||
CPU_BE( rev w8, w8 )
|
||||
|
||||
eor w5, w5, w9
|
||||
eor w6, w6, w10
|
||||
eor w7, w7, w11
|
||||
eor w8, w8, w12
|
||||
|
||||
adr_l tt, \ttab
|
||||
adr_l lt, \ltab
|
||||
|
||||
tbnz rounds, #1, 1f
|
||||
|
||||
0: \round w9, w10, w11, w12, w5, w6, w7, w8
|
||||
\round w5, w6, w7, w8, w9, w10, w11, w12
|
||||
|
||||
1: subs rounds, rounds, #4
|
||||
\round w9, w10, w11, w12, w5, w6, w7, w8
|
||||
csel tt, tt, lt, hi
|
||||
\round w5, w6, w7, w8, w9, w10, w11, w12
|
||||
b.hi 0b
|
||||
|
||||
CPU_BE( rev w5, w5 )
|
||||
CPU_BE( rev w6, w6 )
|
||||
CPU_BE( rev w7, w7 )
|
||||
CPU_BE( rev w8, w8 )
|
||||
|
||||
stp w5, w6, [out]
|
||||
stp w7, w8, [out, #8]
|
||||
ret
|
||||
.endm
|
||||
|
||||
.align 5
|
||||
ENTRY(__aes_arm64_encrypt)
|
||||
do_crypt fround, crypto_ft_tab, crypto_fl_tab
|
||||
ENDPROC(__aes_arm64_encrypt)
|
||||
|
||||
.align 5
|
||||
ENTRY(__aes_arm64_decrypt)
|
||||
do_crypt iround, crypto_it_tab, crypto_il_tab
|
||||
ENDPROC(__aes_arm64_decrypt)
|
69
arch/arm64/crypto/aes-cipher-glue.c
Normal file
69
arch/arm64/crypto/aes-cipher-glue.c
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Scalar AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <crypto/aes.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
EXPORT_SYMBOL(__aes_arm64_encrypt);
|
||||
|
||||
asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
EXPORT_SYMBOL(__aes_arm64_decrypt);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
|
||||
__aes_arm64_encrypt(ctx->key_enc, out, in, rounds);
|
||||
}
|
||||
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
|
||||
__aes_arm64_decrypt(ctx->key_dec, out, in, rounds);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-arm64",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
|
||||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cra_cipher.cia_setkey = crypto_aes_set_key,
|
||||
.cra_cipher.cia_encrypt = aes_encrypt,
|
||||
.cra_cipher.cia_decrypt = aes_decrypt
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Scalar AES cipher for arm64");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("aes");
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
@ -11,6 +11,7 @@
|
||||
#include <asm/neon.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/module.h>
|
||||
@ -31,6 +32,7 @@
|
||||
#define aes_ctr_encrypt ce_aes_ctr_encrypt
|
||||
#define aes_xts_encrypt ce_aes_xts_encrypt
|
||||
#define aes_xts_decrypt ce_aes_xts_decrypt
|
||||
#define aes_mac_update ce_aes_mac_update
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
#else
|
||||
#define MODE "neon"
|
||||
@ -44,11 +46,15 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
#define aes_ctr_encrypt neon_aes_ctr_encrypt
|
||||
#define aes_xts_encrypt neon_aes_xts_encrypt
|
||||
#define aes_xts_decrypt neon_aes_xts_decrypt
|
||||
#define aes_mac_update neon_aes_mac_update
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
|
||||
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cmac(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xcbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbcmac(aes)");
|
||||
#endif
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
@ -75,11 +81,25 @@ asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
|
||||
int rounds, int blocks, u8 const rk2[], u8 iv[],
|
||||
int first);
|
||||
|
||||
asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
|
||||
int blocks, u8 dg[], int enc_before,
|
||||
int enc_after);
|
||||
|
||||
struct crypto_aes_xts_ctx {
|
||||
struct crypto_aes_ctx key1;
|
||||
struct crypto_aes_ctx __aligned(8) key2;
|
||||
};
|
||||
|
||||
struct mac_tfm_ctx {
|
||||
struct crypto_aes_ctx key;
|
||||
u8 __aligned(8) consts[];
|
||||
};
|
||||
|
||||
struct mac_desc_ctx {
|
||||
unsigned int len;
|
||||
u8 dg[AES_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
@ -215,14 +235,15 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
u8 *tsrc = walk.src.virt.addr;
|
||||
|
||||
/*
|
||||
* Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
|
||||
* to tell aes_ctr_encrypt() to only read half a block.
|
||||
* Tell aes_ctr_encrypt() to process a tail block.
|
||||
*/
|
||||
blocks = (nbytes <= 8) ? -1 : 1;
|
||||
blocks = -1;
|
||||
|
||||
aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
|
||||
aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
|
||||
blocks, walk.iv, first);
|
||||
memcpy(tdst, tail, nbytes);
|
||||
if (tdst != tsrc)
|
||||
memcpy(tdst, tsrc, nbytes);
|
||||
crypto_xor(tdst, tail, nbytes);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
kernel_neon_end();
|
||||
@ -282,7 +303,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -298,7 +318,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -315,7 +334,22 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.setkey = skcipher_aes_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "ctr(aes)",
|
||||
.cra_driver_name = "ctr-aes-" MODE,
|
||||
.cra_priority = PRIO - 1,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
@ -333,7 +367,6 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
|
||||
.cra_alignmask = 7,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
@ -344,15 +377,228 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.decrypt = xts_decrypt,
|
||||
} };
|
||||
|
||||
static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = aes_expandkey(&ctx->key, in_key, key_len);
|
||||
if (err)
|
||||
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void cmac_gf128_mul_by_x(be128 *y, const be128 *x)
|
||||
{
|
||||
u64 a = be64_to_cpu(x->a);
|
||||
u64 b = be64_to_cpu(x->b);
|
||||
|
||||
y->a = cpu_to_be64((a << 1) | (b >> 63));
|
||||
y->b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
|
||||
}
|
||||
|
||||
static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
be128 *consts = (be128 *)ctx->consts;
|
||||
u8 *rk = (u8 *)ctx->key.key_enc;
|
||||
int rounds = 6 + key_len / 4;
|
||||
int err;
|
||||
|
||||
err = cbcmac_setkey(tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* encrypt the zero vector */
|
||||
kernel_neon_begin();
|
||||
aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1, 1);
|
||||
kernel_neon_end();
|
||||
|
||||
cmac_gf128_mul_by_x(consts, consts);
|
||||
cmac_gf128_mul_by_x(consts + 1, consts);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
static u8 const ks[3][AES_BLOCK_SIZE] = {
|
||||
{ [0 ... AES_BLOCK_SIZE - 1] = 0x1 },
|
||||
{ [0 ... AES_BLOCK_SIZE - 1] = 0x2 },
|
||||
{ [0 ... AES_BLOCK_SIZE - 1] = 0x3 },
|
||||
};
|
||||
|
||||
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
u8 *rk = (u8 *)ctx->key.key_enc;
|
||||
int rounds = 6 + key_len / 4;
|
||||
u8 key[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
err = cbcmac_setkey(tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_ecb_encrypt(key, ks[0], rk, rounds, 1, 1);
|
||||
aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2, 0);
|
||||
kernel_neon_end();
|
||||
|
||||
return cbcmac_setkey(tfm, key, sizeof(key));
|
||||
}
|
||||
|
||||
static int mac_init(struct shash_desc *desc)
|
||||
{
|
||||
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
memset(ctx->dg, 0, AES_BLOCK_SIZE);
|
||||
ctx->len = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
|
||||
{
|
||||
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
int rounds = 6 + tctx->key.key_length / 4;
|
||||
|
||||
while (len > 0) {
|
||||
unsigned int l;
|
||||
|
||||
if ((ctx->len % AES_BLOCK_SIZE) == 0 &&
|
||||
(ctx->len + len) > AES_BLOCK_SIZE) {
|
||||
|
||||
int blocks = len / AES_BLOCK_SIZE;
|
||||
|
||||
len %= AES_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
|
||||
ctx->dg, (ctx->len != 0), (len != 0));
|
||||
kernel_neon_end();
|
||||
|
||||
p += blocks * AES_BLOCK_SIZE;
|
||||
|
||||
if (!len) {
|
||||
ctx->len = AES_BLOCK_SIZE;
|
||||
break;
|
||||
}
|
||||
ctx->len = 0;
|
||||
}
|
||||
|
||||
l = min(len, AES_BLOCK_SIZE - ctx->len);
|
||||
|
||||
if (l <= AES_BLOCK_SIZE) {
|
||||
crypto_xor(ctx->dg + ctx->len, p, l);
|
||||
ctx->len += l;
|
||||
len -= l;
|
||||
p += l;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cbcmac_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
int rounds = 6 + tctx->key.key_length / 4;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
|
||||
kernel_neon_end();
|
||||
|
||||
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cmac_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
int rounds = 6 + tctx->key.key_length / 4;
|
||||
u8 *consts = tctx->consts;
|
||||
|
||||
if (ctx->len != AES_BLOCK_SIZE) {
|
||||
ctx->dg[ctx->len] ^= 0x80;
|
||||
consts += AES_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
|
||||
kernel_neon_end();
|
||||
|
||||
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg mac_algs[] = { {
|
||||
.base.cra_name = "cmac(aes)",
|
||||
.base.cra_driver_name = "cmac-aes-" MODE,
|
||||
.base.cra_priority = PRIO,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
|
||||
2 * AES_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.digestsize = AES_BLOCK_SIZE,
|
||||
.init = mac_init,
|
||||
.update = mac_update,
|
||||
.final = cmac_final,
|
||||
.setkey = cmac_setkey,
|
||||
.descsize = sizeof(struct mac_desc_ctx),
|
||||
}, {
|
||||
.base.cra_name = "xcbc(aes)",
|
||||
.base.cra_driver_name = "xcbc-aes-" MODE,
|
||||
.base.cra_priority = PRIO,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
|
||||
2 * AES_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.digestsize = AES_BLOCK_SIZE,
|
||||
.init = mac_init,
|
||||
.update = mac_update,
|
||||
.final = cmac_final,
|
||||
.setkey = xcbc_setkey,
|
||||
.descsize = sizeof(struct mac_desc_ctx),
|
||||
}, {
|
||||
.base.cra_name = "cbcmac(aes)",
|
||||
.base.cra_driver_name = "cbcmac-aes-" MODE,
|
||||
.base.cra_priority = PRIO,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.digestsize = AES_BLOCK_SIZE,
|
||||
.init = mac_init,
|
||||
.update = mac_update,
|
||||
.final = cbcmac_final,
|
||||
.setkey = cbcmac_setkey,
|
||||
.descsize = sizeof(struct mac_desc_ctx),
|
||||
} };
|
||||
|
||||
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
|
||||
|
||||
static void aes_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
|
||||
simd_skcipher_free(aes_simd_algs[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
|
||||
if (aes_simd_algs[i])
|
||||
simd_skcipher_free(aes_simd_algs[i]);
|
||||
|
||||
crypto_unregister_shashes(mac_algs, ARRAY_SIZE(mac_algs));
|
||||
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
}
|
||||
|
||||
@ -369,7 +615,14 @@ static int __init aes_init(void)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = crypto_register_shashes(mac_algs, ARRAY_SIZE(mac_algs));
|
||||
if (err)
|
||||
goto unregister_ciphers;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
|
||||
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
|
||||
continue;
|
||||
|
||||
algname = aes_algs[i].base.cra_name + 2;
|
||||
drvname = aes_algs[i].base.cra_driver_name + 2;
|
||||
basename = aes_algs[i].base.cra_driver_name;
|
||||
@ -385,6 +638,8 @@ static int __init aes_init(void)
|
||||
|
||||
unregister_simds:
|
||||
aes_exit();
|
||||
unregister_ciphers:
|
||||
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -392,5 +647,7 @@ unregister_simds:
|
||||
module_cpu_feature_match(AES, aes_init);
|
||||
#else
|
||||
module_init(aes_init);
|
||||
EXPORT_SYMBOL(neon_aes_ecb_encrypt);
|
||||
EXPORT_SYMBOL(neon_aes_cbc_encrypt);
|
||||
#endif
|
||||
module_exit(aes_exit);
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
@ -337,7 +337,7 @@ AES_ENTRY(aes_ctr_encrypt)
|
||||
|
||||
.Lctrcarrydone:
|
||||
subs w4, w4, #1
|
||||
bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
|
||||
bmi .Lctrtailblock /* blocks <0 means tail block */
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
eor v3.16b, v0.16b, v3.16b
|
||||
st1 {v3.16b}, [x0], #16
|
||||
@ -348,10 +348,8 @@ AES_ENTRY(aes_ctr_encrypt)
|
||||
FRAME_POP
|
||||
ret
|
||||
|
||||
.Lctrhalfblock:
|
||||
ld1 {v3.8b}, [x1]
|
||||
eor v3.8b, v0.8b, v3.8b
|
||||
st1 {v3.8b}, [x0]
|
||||
.Lctrtailblock:
|
||||
st1 {v0.16b}, [x0]
|
||||
FRAME_POP
|
||||
ret
|
||||
|
||||
@ -527,3 +525,30 @@ AES_ENTRY(aes_xts_decrypt)
|
||||
FRAME_POP
|
||||
ret
|
||||
AES_ENDPROC(aes_xts_decrypt)
|
||||
|
||||
/*
|
||||
* aes_mac_update(u8 const in[], u32 const rk[], int rounds,
|
||||
* int blocks, u8 dg[], int enc_before, int enc_after)
|
||||
*/
|
||||
AES_ENTRY(aes_mac_update)
|
||||
ld1 {v0.16b}, [x4] /* get dg */
|
||||
enc_prepare w2, x1, x7
|
||||
cbnz w5, .Lmacenc
|
||||
|
||||
.Lmacloop:
|
||||
cbz w3, .Lmacout
|
||||
ld1 {v1.16b}, [x0], #16 /* get next pt block */
|
||||
eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
|
||||
|
||||
subs w3, w3, #1
|
||||
csinv x5, x6, xzr, eq
|
||||
cbz w5, .Lmacout
|
||||
|
||||
.Lmacenc:
|
||||
encrypt_block v0, w2, x1, x7, w8
|
||||
b .Lmacloop
|
||||
|
||||
.Lmacout:
|
||||
st1 {v0.16b}, [x4] /* return dg */
|
||||
ret
|
||||
AES_ENDPROC(aes_mac_update)
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
|
||||
*
|
||||
* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
@ -17,17 +17,25 @@
|
||||
/* multiply by polynomial 'x' in GF(2^8) */
|
||||
.macro mul_by_x, out, in, temp, const
|
||||
sshr \temp, \in, #7
|
||||
add \out, \in, \in
|
||||
shl \out, \in, #1
|
||||
and \temp, \temp, \const
|
||||
eor \out, \out, \temp
|
||||
.endm
|
||||
|
||||
/* multiply by polynomial 'x^2' in GF(2^8) */
|
||||
.macro mul_by_x2, out, in, temp, const
|
||||
ushr \temp, \in, #6
|
||||
shl \out, \in, #2
|
||||
pmul \temp, \temp, \const
|
||||
eor \out, \out, \temp
|
||||
.endm
|
||||
|
||||
/* preload the entire Sbox */
|
||||
.macro prepare, sbox, shiftrows, temp
|
||||
adr \temp, \sbox
|
||||
movi v12.16b, #0x40
|
||||
movi v12.16b, #0x1b
|
||||
ldr q13, \shiftrows
|
||||
movi v14.16b, #0x1b
|
||||
ldr q14, .Lror32by8
|
||||
ld1 {v16.16b-v19.16b}, [\temp], #64
|
||||
ld1 {v20.16b-v23.16b}, [\temp], #64
|
||||
ld1 {v24.16b-v27.16b}, [\temp], #64
|
||||
@ -50,37 +58,31 @@
|
||||
|
||||
/* apply SubBytes transformation using the the preloaded Sbox */
|
||||
.macro sub_bytes, in
|
||||
sub v9.16b, \in\().16b, v12.16b
|
||||
sub v9.16b, \in\().16b, v15.16b
|
||||
tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
|
||||
sub v10.16b, v9.16b, v12.16b
|
||||
sub v10.16b, v9.16b, v15.16b
|
||||
tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v11.16b, v10.16b, v12.16b
|
||||
sub v11.16b, v10.16b, v15.16b
|
||||
tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
|
||||
.endm
|
||||
|
||||
/* apply MixColumns transformation */
|
||||
.macro mix_columns, in
|
||||
mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
|
||||
rev32 v8.8h, \in\().8h
|
||||
eor \in\().16b, v10.16b, \in\().16b
|
||||
shl v9.4s, v8.4s, #24
|
||||
shl v11.4s, \in\().4s, #24
|
||||
sri v9.4s, v8.4s, #8
|
||||
sri v11.4s, \in\().4s, #8
|
||||
eor v9.16b, v9.16b, v8.16b
|
||||
eor v10.16b, v10.16b, v9.16b
|
||||
eor \in\().16b, v10.16b, v11.16b
|
||||
.endm
|
||||
|
||||
.macro mix_columns, in, enc
|
||||
.if \enc == 0
|
||||
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
|
||||
.macro inv_mix_columns, in
|
||||
mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
|
||||
mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
|
||||
eor \in\().16b, \in\().16b, v11.16b
|
||||
rev32 v11.8h, v11.8h
|
||||
eor \in\().16b, \in\().16b, v11.16b
|
||||
mix_columns \in
|
||||
mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
|
||||
eor \in\().16b, \in\().16b, v8.16b
|
||||
rev32 v8.8h, v8.8h
|
||||
eor \in\().16b, \in\().16b, v8.16b
|
||||
.endif
|
||||
|
||||
mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
|
||||
rev32 v8.8h, \in\().8h
|
||||
eor v8.16b, v8.16b, v9.16b
|
||||
eor \in\().16b, \in\().16b, v8.16b
|
||||
tbl \in\().16b, {\in\().16b}, v14.16b
|
||||
eor \in\().16b, \in\().16b, v8.16b
|
||||
.endm
|
||||
|
||||
.macro do_block, enc, in, rounds, rk, rkp, i
|
||||
@ -88,16 +90,13 @@
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
|
||||
movi v15.16b, #0x40
|
||||
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
|
||||
sub_bytes \in
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
subs \i, \i, #1
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
beq 2222f
|
||||
.if \enc == 1
|
||||
mix_columns \in
|
||||
.else
|
||||
inv_mix_columns \in
|
||||
.endif
|
||||
mix_columns \in, \enc
|
||||
b 1111b
|
||||
2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
|
||||
.endm
|
||||
@ -116,139 +115,114 @@
|
||||
*/
|
||||
|
||||
.macro sub_bytes_2x, in0, in1
|
||||
sub v8.16b, \in0\().16b, v12.16b
|
||||
sub v9.16b, \in1\().16b, v12.16b
|
||||
sub v8.16b, \in0\().16b, v15.16b
|
||||
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
|
||||
sub v9.16b, \in1\().16b, v15.16b
|
||||
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
|
||||
sub v10.16b, v8.16b, v12.16b
|
||||
sub v11.16b, v9.16b, v12.16b
|
||||
sub v10.16b, v8.16b, v15.16b
|
||||
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
|
||||
sub v11.16b, v9.16b, v15.16b
|
||||
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v8.16b, v10.16b, v12.16b
|
||||
sub v9.16b, v11.16b, v12.16b
|
||||
sub v8.16b, v10.16b, v15.16b
|
||||
tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
sub v9.16b, v11.16b, v15.16b
|
||||
tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
|
||||
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
|
||||
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
|
||||
.endm
|
||||
|
||||
.macro sub_bytes_4x, in0, in1, in2, in3
|
||||
sub v8.16b, \in0\().16b, v12.16b
|
||||
sub v8.16b, \in0\().16b, v15.16b
|
||||
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
|
||||
sub v9.16b, \in1\().16b, v12.16b
|
||||
sub v9.16b, \in1\().16b, v15.16b
|
||||
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
|
||||
sub v10.16b, \in2\().16b, v12.16b
|
||||
sub v10.16b, \in2\().16b, v15.16b
|
||||
tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
|
||||
sub v11.16b, \in3\().16b, v12.16b
|
||||
sub v11.16b, \in3\().16b, v15.16b
|
||||
tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
|
||||
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
|
||||
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v8.16b, v8.16b, v12.16b
|
||||
sub v8.16b, v8.16b, v15.16b
|
||||
tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
|
||||
sub v9.16b, v9.16b, v12.16b
|
||||
sub v9.16b, v9.16b, v15.16b
|
||||
tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
|
||||
sub v10.16b, v10.16b, v12.16b
|
||||
sub v10.16b, v10.16b, v15.16b
|
||||
tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
|
||||
sub v11.16b, v11.16b, v12.16b
|
||||
sub v11.16b, v11.16b, v15.16b
|
||||
tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
|
||||
sub v8.16b, v8.16b, v12.16b
|
||||
sub v8.16b, v8.16b, v15.16b
|
||||
tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
sub v9.16b, v9.16b, v12.16b
|
||||
sub v9.16b, v9.16b, v15.16b
|
||||
tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
|
||||
sub v10.16b, v10.16b, v12.16b
|
||||
sub v10.16b, v10.16b, v15.16b
|
||||
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
|
||||
sub v11.16b, v11.16b, v12.16b
|
||||
sub v11.16b, v11.16b, v15.16b
|
||||
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
|
||||
tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
|
||||
tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
|
||||
.endm
|
||||
|
||||
.macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
|
||||
sshr \tmp0\().16b, \in0\().16b, #7
|
||||
add \out0\().16b, \in0\().16b, \in0\().16b
|
||||
sshr \tmp1\().16b, \in1\().16b, #7
|
||||
sshr \tmp0\().16b, \in0\().16b, #7
|
||||
shl \out0\().16b, \in0\().16b, #1
|
||||
sshr \tmp1\().16b, \in1\().16b, #7
|
||||
and \tmp0\().16b, \tmp0\().16b, \const\().16b
|
||||
add \out1\().16b, \in1\().16b, \in1\().16b
|
||||
shl \out1\().16b, \in1\().16b, #1
|
||||
and \tmp1\().16b, \tmp1\().16b, \const\().16b
|
||||
eor \out0\().16b, \out0\().16b, \tmp0\().16b
|
||||
eor \out1\().16b, \out1\().16b, \tmp1\().16b
|
||||
.endm
|
||||
|
||||
.macro mix_columns_2x, in0, in1
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
|
||||
.macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
|
||||
ushr \tmp0\().16b, \in0\().16b, #6
|
||||
shl \out0\().16b, \in0\().16b, #2
|
||||
ushr \tmp1\().16b, \in1\().16b, #6
|
||||
pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
|
||||
shl \out1\().16b, \in1\().16b, #2
|
||||
pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
|
||||
eor \out0\().16b, \out0\().16b, \tmp0\().16b
|
||||
eor \out1\().16b, \out1\().16b, \tmp1\().16b
|
||||
.endm
|
||||
|
||||
.macro mix_columns_2x, in0, in1, enc
|
||||
.if \enc == 0
|
||||
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
|
||||
mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
rev32 v8.8h, v8.8h
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
rev32 v9.8h, v9.8h
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
.endif
|
||||
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
|
||||
rev32 v10.8h, \in0\().8h
|
||||
rev32 v11.8h, \in1\().8h
|
||||
eor \in0\().16b, v8.16b, \in0\().16b
|
||||
eor \in1\().16b, v9.16b, \in1\().16b
|
||||
shl v12.4s, v10.4s, #24
|
||||
shl v13.4s, v11.4s, #24
|
||||
eor v8.16b, v8.16b, v10.16b
|
||||
sri v12.4s, v10.4s, #8
|
||||
shl v10.4s, \in0\().4s, #24
|
||||
eor v9.16b, v9.16b, v11.16b
|
||||
sri v13.4s, v11.4s, #8
|
||||
shl v11.4s, \in1\().4s, #24
|
||||
sri v10.4s, \in0\().4s, #8
|
||||
eor \in0\().16b, v8.16b, v12.16b
|
||||
sri v11.4s, \in1\().4s, #8
|
||||
eor \in1\().16b, v9.16b, v13.16b
|
||||
eor \in0\().16b, v10.16b, \in0\().16b
|
||||
eor \in1\().16b, v11.16b, \in1\().16b
|
||||
eor v10.16b, v10.16b, v8.16b
|
||||
eor v11.16b, v11.16b, v9.16b
|
||||
eor \in0\().16b, \in0\().16b, v10.16b
|
||||
eor \in1\().16b, \in1\().16b, v11.16b
|
||||
tbl \in0\().16b, {\in0\().16b}, v14.16b
|
||||
tbl \in1\().16b, {\in1\().16b}, v14.16b
|
||||
eor \in0\().16b, \in0\().16b, v10.16b
|
||||
eor \in1\().16b, \in1\().16b, v11.16b
|
||||
.endm
|
||||
|
||||
.macro inv_mix_cols_2x, in0, in1
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
|
||||
mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
rev32 v8.8h, v8.8h
|
||||
rev32 v9.8h, v9.8h
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
mix_columns_2x \in0, \in1
|
||||
.endm
|
||||
|
||||
.macro inv_mix_cols_4x, in0, in1, in2, in3
|
||||
mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
|
||||
mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
|
||||
mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
|
||||
mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
eor \in2\().16b, \in2\().16b, v10.16b
|
||||
eor \in3\().16b, \in3\().16b, v11.16b
|
||||
rev32 v8.8h, v8.8h
|
||||
rev32 v9.8h, v9.8h
|
||||
rev32 v10.8h, v10.8h
|
||||
rev32 v11.8h, v11.8h
|
||||
eor \in0\().16b, \in0\().16b, v8.16b
|
||||
eor \in1\().16b, \in1\().16b, v9.16b
|
||||
eor \in2\().16b, \in2\().16b, v10.16b
|
||||
eor \in3\().16b, \in3\().16b, v11.16b
|
||||
mix_columns_2x \in0, \in1
|
||||
mix_columns_2x \in2, \in3
|
||||
.endm
|
||||
|
||||
.macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
|
||||
.macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
|
||||
ld1 {v15.4s}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
sub_bytes_2x \in0, \in1
|
||||
movi v15.16b, #0x40
|
||||
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
sub_bytes_2x \in0, \in1
|
||||
subs \i, \i, #1
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
beq 2222f
|
||||
.if \enc == 1
|
||||
mix_columns_2x \in0, \in1
|
||||
ldr q13, .LForward_ShiftRows
|
||||
.else
|
||||
inv_mix_cols_2x \in0, \in1
|
||||
ldr q13, .LReverse_ShiftRows
|
||||
.endif
|
||||
movi v12.16b, #0x40
|
||||
mix_columns_2x \in0, \in1, \enc
|
||||
b 1111b
|
||||
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
@ -262,23 +236,17 @@
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
|
||||
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
|
||||
sub_bytes_4x \in0, \in1, \in2, \in3
|
||||
movi v15.16b, #0x40
|
||||
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
sub_bytes_4x \in0, \in1, \in2, \in3
|
||||
subs \i, \i, #1
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
beq 2222f
|
||||
.if \enc == 1
|
||||
mix_columns_2x \in0, \in1
|
||||
mix_columns_2x \in2, \in3
|
||||
ldr q13, .LForward_ShiftRows
|
||||
.else
|
||||
inv_mix_cols_4x \in0, \in1, \in2, \in3
|
||||
ldr q13, .LReverse_ShiftRows
|
||||
.endif
|
||||
movi v12.16b, #0x40
|
||||
mix_columns_2x \in0, \in1, \enc
|
||||
mix_columns_2x \in2, \in3, \enc
|
||||
b 1111b
|
||||
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
@ -305,19 +273,7 @@
|
||||
#include "aes-modes.S"
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.LForward_ShiftRows:
|
||||
CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 )
|
||||
CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb )
|
||||
CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 )
|
||||
CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 )
|
||||
|
||||
.LReverse_ShiftRows:
|
||||
CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb )
|
||||
CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 )
|
||||
CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 )
|
||||
CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
|
||||
|
||||
.align 6
|
||||
.LForward_Sbox:
|
||||
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
|
||||
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
|
||||
@ -385,3 +341,12 @@ CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
|
||||
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
|
||||
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
|
||||
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
|
||||
|
||||
.LForward_ShiftRows:
|
||||
.octa 0x0b06010c07020d08030e09040f0a0500
|
||||
|
||||
.LReverse_ShiftRows:
|
||||
.octa 0x0306090c0f0205080b0e0104070a0d00
|
||||
|
||||
.Lror32by8:
|
||||
.octa 0x0c0f0e0d080b0a090407060500030201
|
||||
|
972
arch/arm64/crypto/aes-neonbs-core.S
Normal file
972
arch/arm64/crypto/aes-neonbs-core.S
Normal file
@ -0,0 +1,972 @@
|
||||
/*
|
||||
* Bit sliced AES using NEON instructions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The algorithm implemented here is described in detail by the paper
|
||||
* 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
|
||||
* Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
|
||||
*
|
||||
* This implementation is based primarily on the OpenSSL implementation
|
||||
* for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.text
|
||||
|
||||
rounds .req x11
|
||||
bskey .req x12
|
||||
|
||||
.macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
|
||||
eor \b2, \b2, \b1
|
||||
eor \b5, \b5, \b6
|
||||
eor \b3, \b3, \b0
|
||||
eor \b6, \b6, \b2
|
||||
eor \b5, \b5, \b0
|
||||
eor \b6, \b6, \b3
|
||||
eor \b3, \b3, \b7
|
||||
eor \b7, \b7, \b5
|
||||
eor \b3, \b3, \b4
|
||||
eor \b4, \b4, \b5
|
||||
eor \b2, \b2, \b7
|
||||
eor \b3, \b3, \b1
|
||||
eor \b1, \b1, \b5
|
||||
.endm
|
||||
|
||||
.macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
|
||||
eor \b0, \b0, \b6
|
||||
eor \b1, \b1, \b4
|
||||
eor \b4, \b4, \b6
|
||||
eor \b2, \b2, \b0
|
||||
eor \b6, \b6, \b1
|
||||
eor \b1, \b1, \b5
|
||||
eor \b5, \b5, \b3
|
||||
eor \b3, \b3, \b7
|
||||
eor \b7, \b7, \b5
|
||||
eor \b2, \b2, \b5
|
||||
eor \b4, \b4, \b7
|
||||
.endm
|
||||
|
||||
.macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
|
||||
eor \b1, \b1, \b7
|
||||
eor \b4, \b4, \b7
|
||||
eor \b7, \b7, \b5
|
||||
eor \b1, \b1, \b3
|
||||
eor \b2, \b2, \b5
|
||||
eor \b3, \b3, \b7
|
||||
eor \b6, \b6, \b1
|
||||
eor \b2, \b2, \b0
|
||||
eor \b5, \b5, \b3
|
||||
eor \b4, \b4, \b6
|
||||
eor \b0, \b0, \b6
|
||||
eor \b1, \b1, \b4
|
||||
.endm
|
||||
|
||||
.macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
|
||||
eor \b1, \b1, \b5
|
||||
eor \b2, \b2, \b7
|
||||
eor \b3, \b3, \b1
|
||||
eor \b4, \b4, \b5
|
||||
eor \b7, \b7, \b5
|
||||
eor \b3, \b3, \b4
|
||||
eor \b5, \b5, \b0
|
||||
eor \b3, \b3, \b7
|
||||
eor \b6, \b6, \b2
|
||||
eor \b2, \b2, \b1
|
||||
eor \b6, \b6, \b3
|
||||
eor \b3, \b3, \b0
|
||||
eor \b5, \b5, \b6
|
||||
.endm
|
||||
|
||||
.macro mul_gf4, x0, x1, y0, y1, t0, t1
|
||||
eor \t0, \y0, \y1
|
||||
and \t0, \t0, \x0
|
||||
eor \x0, \x0, \x1
|
||||
and \t1, \x1, \y0
|
||||
and \x0, \x0, \y1
|
||||
eor \x1, \t1, \t0
|
||||
eor \x0, \x0, \t1
|
||||
.endm
|
||||
|
||||
.macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
|
||||
eor \t0, \y0, \y1
|
||||
eor \t1, \y2, \y3
|
||||
and \t0, \t0, \x0
|
||||
and \t1, \t1, \x2
|
||||
eor \x0, \x0, \x1
|
||||
eor \x2, \x2, \x3
|
||||
and \x1, \x1, \y0
|
||||
and \x3, \x3, \y2
|
||||
and \x0, \x0, \y1
|
||||
and \x2, \x2, \y3
|
||||
eor \x1, \x1, \x0
|
||||
eor \x2, \x2, \x3
|
||||
eor \x0, \x0, \t0
|
||||
eor \x3, \x3, \t1
|
||||
.endm
|
||||
|
||||
.macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
|
||||
y0, y1, y2, y3, t0, t1, t2, t3
|
||||
eor \t0, \x0, \x2
|
||||
eor \t1, \x1, \x3
|
||||
mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3
|
||||
eor \y0, \y0, \y2
|
||||
eor \y1, \y1, \y3
|
||||
mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
|
||||
eor \x0, \x0, \t0
|
||||
eor \x2, \x2, \t0
|
||||
eor \x1, \x1, \t1
|
||||
eor \x3, \x3, \t1
|
||||
eor \t0, \x4, \x6
|
||||
eor \t1, \x5, \x7
|
||||
mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
|
||||
eor \y0, \y0, \y2
|
||||
eor \y1, \y1, \y3
|
||||
mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3
|
||||
eor \x4, \x4, \t0
|
||||
eor \x6, \x6, \t0
|
||||
eor \x5, \x5, \t1
|
||||
eor \x7, \x7, \t1
|
||||
.endm
|
||||
|
||||
.macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
|
||||
t0, t1, t2, t3, s0, s1, s2, s3
|
||||
eor \t3, \x4, \x6
|
||||
eor \t0, \x5, \x7
|
||||
eor \t1, \x1, \x3
|
||||
eor \s1, \x7, \x6
|
||||
eor \s0, \x0, \x2
|
||||
eor \s3, \t3, \t0
|
||||
orr \t2, \t0, \t1
|
||||
and \s2, \t3, \s0
|
||||
orr \t3, \t3, \s0
|
||||
eor \s0, \s0, \t1
|
||||
and \t0, \t0, \t1
|
||||
eor \t1, \x3, \x2
|
||||
and \s3, \s3, \s0
|
||||
and \s1, \s1, \t1
|
||||
eor \t1, \x4, \x5
|
||||
eor \s0, \x1, \x0
|
||||
eor \t3, \t3, \s1
|
||||
eor \t2, \t2, \s1
|
||||
and \s1, \t1, \s0
|
||||
orr \t1, \t1, \s0
|
||||
eor \t3, \t3, \s3
|
||||
eor \t0, \t0, \s1
|
||||
eor \t2, \t2, \s2
|
||||
eor \t1, \t1, \s3
|
||||
eor \t0, \t0, \s2
|
||||
and \s0, \x7, \x3
|
||||
eor \t1, \t1, \s2
|
||||
and \s1, \x6, \x2
|
||||
and \s2, \x5, \x1
|
||||
orr \s3, \x4, \x0
|
||||
eor \t3, \t3, \s0
|
||||
eor \t1, \t1, \s2
|
||||
eor \s0, \t0, \s3
|
||||
eor \t2, \t2, \s1
|
||||
and \s2, \t3, \t1
|
||||
eor \s1, \t2, \s2
|
||||
eor \s3, \s0, \s2
|
||||
bsl \s1, \t1, \s0
|
||||
not \t0, \s0
|
||||
bsl \s0, \s1, \s3
|
||||
bsl \t0, \s1, \s3
|
||||
bsl \s3, \t3, \t2
|
||||
eor \t3, \t3, \t2
|
||||
and \s2, \s0, \s3
|
||||
eor \t1, \t1, \t0
|
||||
eor \s2, \s2, \t3
|
||||
mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
|
||||
\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
|
||||
.endm
|
||||
|
||||
.macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
|
||||
t0, t1, t2, t3, s0, s1, s2, s3
|
||||
in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
|
||||
\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
|
||||
inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
|
||||
\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
|
||||
\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
|
||||
\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
|
||||
out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
|
||||
\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
|
||||
.endm
|
||||
|
||||
.macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
|
||||
t0, t1, t2, t3, s0, s1, s2, s3
|
||||
inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
|
||||
\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
|
||||
inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
|
||||
\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
|
||||
\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
|
||||
\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
|
||||
inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
|
||||
\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
|
||||
.endm
|
||||
|
||||
.macro enc_next_rk
|
||||
ldp q16, q17, [bskey], #128
|
||||
ldp q18, q19, [bskey, #-96]
|
||||
ldp q20, q21, [bskey, #-64]
|
||||
ldp q22, q23, [bskey, #-32]
|
||||
.endm
|
||||
|
||||
.macro dec_next_rk
|
||||
ldp q16, q17, [bskey, #-128]!
|
||||
ldp q18, q19, [bskey, #32]
|
||||
ldp q20, q21, [bskey, #64]
|
||||
ldp q22, q23, [bskey, #96]
|
||||
.endm
|
||||
|
||||
.macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
|
||||
eor \x0\().16b, \x0\().16b, v16.16b
|
||||
eor \x1\().16b, \x1\().16b, v17.16b
|
||||
eor \x2\().16b, \x2\().16b, v18.16b
|
||||
eor \x3\().16b, \x3\().16b, v19.16b
|
||||
eor \x4\().16b, \x4\().16b, v20.16b
|
||||
eor \x5\().16b, \x5\().16b, v21.16b
|
||||
eor \x6\().16b, \x6\().16b, v22.16b
|
||||
eor \x7\().16b, \x7\().16b, v23.16b
|
||||
.endm
|
||||
|
||||
.macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
|
||||
tbl \x0\().16b, {\x0\().16b}, \mask\().16b
|
||||
tbl \x1\().16b, {\x1\().16b}, \mask\().16b
|
||||
tbl \x2\().16b, {\x2\().16b}, \mask\().16b
|
||||
tbl \x3\().16b, {\x3\().16b}, \mask\().16b
|
||||
tbl \x4\().16b, {\x4\().16b}, \mask\().16b
|
||||
tbl \x5\().16b, {\x5\().16b}, \mask\().16b
|
||||
tbl \x6\().16b, {\x6\().16b}, \mask\().16b
|
||||
tbl \x7\().16b, {\x7\().16b}, \mask\().16b
|
||||
.endm
|
||||
|
||||
.macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
|
||||
t0, t1, t2, t3, t4, t5, t6, t7, inv
|
||||
ext \t0\().16b, \x0\().16b, \x0\().16b, #12
|
||||
ext \t1\().16b, \x1\().16b, \x1\().16b, #12
|
||||
eor \x0\().16b, \x0\().16b, \t0\().16b
|
||||
ext \t2\().16b, \x2\().16b, \x2\().16b, #12
|
||||
eor \x1\().16b, \x1\().16b, \t1\().16b
|
||||
ext \t3\().16b, \x3\().16b, \x3\().16b, #12
|
||||
eor \x2\().16b, \x2\().16b, \t2\().16b
|
||||
ext \t4\().16b, \x4\().16b, \x4\().16b, #12
|
||||
eor \x3\().16b, \x3\().16b, \t3\().16b
|
||||
ext \t5\().16b, \x5\().16b, \x5\().16b, #12
|
||||
eor \x4\().16b, \x4\().16b, \t4\().16b
|
||||
ext \t6\().16b, \x6\().16b, \x6\().16b, #12
|
||||
eor \x5\().16b, \x5\().16b, \t5\().16b
|
||||
ext \t7\().16b, \x7\().16b, \x7\().16b, #12
|
||||
eor \x6\().16b, \x6\().16b, \t6\().16b
|
||||
eor \t1\().16b, \t1\().16b, \x0\().16b
|
||||
eor \x7\().16b, \x7\().16b, \t7\().16b
|
||||
ext \x0\().16b, \x0\().16b, \x0\().16b, #8
|
||||
eor \t2\().16b, \t2\().16b, \x1\().16b
|
||||
eor \t0\().16b, \t0\().16b, \x7\().16b
|
||||
eor \t1\().16b, \t1\().16b, \x7\().16b
|
||||
ext \x1\().16b, \x1\().16b, \x1\().16b, #8
|
||||
eor \t5\().16b, \t5\().16b, \x4\().16b
|
||||
eor \x0\().16b, \x0\().16b, \t0\().16b
|
||||
eor \t6\().16b, \t6\().16b, \x5\().16b
|
||||
eor \x1\().16b, \x1\().16b, \t1\().16b
|
||||
ext \t0\().16b, \x4\().16b, \x4\().16b, #8
|
||||
eor \t4\().16b, \t4\().16b, \x3\().16b
|
||||
ext \t1\().16b, \x5\().16b, \x5\().16b, #8
|
||||
eor \t7\().16b, \t7\().16b, \x6\().16b
|
||||
ext \x4\().16b, \x3\().16b, \x3\().16b, #8
|
||||
eor \t3\().16b, \t3\().16b, \x2\().16b
|
||||
ext \x5\().16b, \x7\().16b, \x7\().16b, #8
|
||||
eor \t4\().16b, \t4\().16b, \x7\().16b
|
||||
ext \x3\().16b, \x6\().16b, \x6\().16b, #8
|
||||
eor \t3\().16b, \t3\().16b, \x7\().16b
|
||||
ext \x6\().16b, \x2\().16b, \x2\().16b, #8
|
||||
eor \x7\().16b, \t1\().16b, \t5\().16b
|
||||
.ifb \inv
|
||||
eor \x2\().16b, \t0\().16b, \t4\().16b
|
||||
eor \x4\().16b, \x4\().16b, \t3\().16b
|
||||
eor \x5\().16b, \x5\().16b, \t7\().16b
|
||||
eor \x3\().16b, \x3\().16b, \t6\().16b
|
||||
eor \x6\().16b, \x6\().16b, \t2\().16b
|
||||
.else
|
||||
eor \t3\().16b, \t3\().16b, \x4\().16b
|
||||
eor \x5\().16b, \x5\().16b, \t7\().16b
|
||||
eor \x2\().16b, \x3\().16b, \t6\().16b
|
||||
eor \x3\().16b, \t0\().16b, \t4\().16b
|
||||
eor \x4\().16b, \x6\().16b, \t2\().16b
|
||||
mov \x6\().16b, \t3\().16b
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
|
||||
t0, t1, t2, t3, t4, t5, t6, t7
|
||||
ext \t0\().16b, \x0\().16b, \x0\().16b, #8
|
||||
ext \t6\().16b, \x6\().16b, \x6\().16b, #8
|
||||
ext \t7\().16b, \x7\().16b, \x7\().16b, #8
|
||||
eor \t0\().16b, \t0\().16b, \x0\().16b
|
||||
ext \t1\().16b, \x1\().16b, \x1\().16b, #8
|
||||
eor \t6\().16b, \t6\().16b, \x6\().16b
|
||||
ext \t2\().16b, \x2\().16b, \x2\().16b, #8
|
||||
eor \t7\().16b, \t7\().16b, \x7\().16b
|
||||
ext \t3\().16b, \x3\().16b, \x3\().16b, #8
|
||||
eor \t1\().16b, \t1\().16b, \x1\().16b
|
||||
ext \t4\().16b, \x4\().16b, \x4\().16b, #8
|
||||
eor \t2\().16b, \t2\().16b, \x2\().16b
|
||||
ext \t5\().16b, \x5\().16b, \x5\().16b, #8
|
||||
eor \t3\().16b, \t3\().16b, \x3\().16b
|
||||
eor \t4\().16b, \t4\().16b, \x4\().16b
|
||||
eor \t5\().16b, \t5\().16b, \x5\().16b
|
||||
eor \x0\().16b, \x0\().16b, \t6\().16b
|
||||
eor \x1\().16b, \x1\().16b, \t6\().16b
|
||||
eor \x2\().16b, \x2\().16b, \t0\().16b
|
||||
eor \x4\().16b, \x4\().16b, \t2\().16b
|
||||
eor \x3\().16b, \x3\().16b, \t1\().16b
|
||||
eor \x1\().16b, \x1\().16b, \t7\().16b
|
||||
eor \x2\().16b, \x2\().16b, \t7\().16b
|
||||
eor \x4\().16b, \x4\().16b, \t6\().16b
|
||||
eor \x5\().16b, \x5\().16b, \t3\().16b
|
||||
eor \x3\().16b, \x3\().16b, \t6\().16b
|
||||
eor \x6\().16b, \x6\().16b, \t4\().16b
|
||||
eor \x4\().16b, \x4\().16b, \t7\().16b
|
||||
eor \x5\().16b, \x5\().16b, \t7\().16b
|
||||
eor \x7\().16b, \x7\().16b, \t5\().16b
|
||||
mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
|
||||
\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
|
||||
.endm
|
||||
|
||||
.macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
|
||||
ushr \t0\().2d, \b0\().2d, #\n
|
||||
ushr \t1\().2d, \b1\().2d, #\n
|
||||
eor \t0\().16b, \t0\().16b, \a0\().16b
|
||||
eor \t1\().16b, \t1\().16b, \a1\().16b
|
||||
and \t0\().16b, \t0\().16b, \mask\().16b
|
||||
and \t1\().16b, \t1\().16b, \mask\().16b
|
||||
eor \a0\().16b, \a0\().16b, \t0\().16b
|
||||
shl \t0\().2d, \t0\().2d, #\n
|
||||
eor \a1\().16b, \a1\().16b, \t1\().16b
|
||||
shl \t1\().2d, \t1\().2d, #\n
|
||||
eor \b0\().16b, \b0\().16b, \t0\().16b
|
||||
eor \b1\().16b, \b1\().16b, \t1\().16b
|
||||
.endm
|
||||
|
||||
.macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
|
||||
movi \t0\().16b, #0x55
|
||||
movi \t1\().16b, #0x33
|
||||
swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
|
||||
swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
|
||||
movi \t0\().16b, #0x0f
|
||||
swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
|
||||
swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
|
||||
swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
|
||||
swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
|
||||
.endm
|
||||
|
||||
|
||||
.align 6
|
||||
M0: .octa 0x0004080c0105090d02060a0e03070b0f
|
||||
|
||||
M0SR: .octa 0x0004080c05090d010a0e02060f03070b
|
||||
SR: .octa 0x0f0e0d0c0a09080b0504070600030201
|
||||
SRM0: .octa 0x01060b0c0207080d0304090e00050a0f
|
||||
|
||||
M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03
|
||||
ISR: .octa 0x0f0e0d0c080b0a090504070602010003
|
||||
ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
|
||||
|
||||
/*
|
||||
* void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
|
||||
*/
|
||||
ENTRY(aesbs_convert_key)
|
||||
ld1 {v7.4s}, [x1], #16 // load round 0 key
|
||||
ld1 {v17.4s}, [x1], #16 // load round 1 key
|
||||
|
||||
movi v8.16b, #0x01 // bit masks
|
||||
movi v9.16b, #0x02
|
||||
movi v10.16b, #0x04
|
||||
movi v11.16b, #0x08
|
||||
movi v12.16b, #0x10
|
||||
movi v13.16b, #0x20
|
||||
movi v14.16b, #0x40
|
||||
movi v15.16b, #0x80
|
||||
ldr q16, M0
|
||||
|
||||
sub x2, x2, #1
|
||||
str q7, [x0], #16 // save round 0 key
|
||||
|
||||
.Lkey_loop:
|
||||
tbl v7.16b ,{v17.16b}, v16.16b
|
||||
ld1 {v17.4s}, [x1], #16 // load next round key
|
||||
|
||||
cmtst v0.16b, v7.16b, v8.16b
|
||||
cmtst v1.16b, v7.16b, v9.16b
|
||||
cmtst v2.16b, v7.16b, v10.16b
|
||||
cmtst v3.16b, v7.16b, v11.16b
|
||||
cmtst v4.16b, v7.16b, v12.16b
|
||||
cmtst v5.16b, v7.16b, v13.16b
|
||||
cmtst v6.16b, v7.16b, v14.16b
|
||||
cmtst v7.16b, v7.16b, v15.16b
|
||||
not v0.16b, v0.16b
|
||||
not v1.16b, v1.16b
|
||||
not v5.16b, v5.16b
|
||||
not v6.16b, v6.16b
|
||||
|
||||
subs x2, x2, #1
|
||||
stp q0, q1, [x0], #128
|
||||
stp q2, q3, [x0, #-96]
|
||||
stp q4, q5, [x0, #-64]
|
||||
stp q6, q7, [x0, #-32]
|
||||
b.ne .Lkey_loop
|
||||
|
||||
movi v7.16b, #0x63 // compose .L63
|
||||
eor v17.16b, v17.16b, v7.16b
|
||||
str q17, [x0]
|
||||
ret
|
||||
ENDPROC(aesbs_convert_key)
|
||||
|
||||
.align 4
|
||||
aesbs_encrypt8:
|
||||
ldr q9, [bskey], #16 // round 0 key
|
||||
ldr q8, M0SR
|
||||
ldr q24, SR
|
||||
|
||||
eor v10.16b, v0.16b, v9.16b // xor with round0 key
|
||||
eor v11.16b, v1.16b, v9.16b
|
||||
tbl v0.16b, {v10.16b}, v8.16b
|
||||
eor v12.16b, v2.16b, v9.16b
|
||||
tbl v1.16b, {v11.16b}, v8.16b
|
||||
eor v13.16b, v3.16b, v9.16b
|
||||
tbl v2.16b, {v12.16b}, v8.16b
|
||||
eor v14.16b, v4.16b, v9.16b
|
||||
tbl v3.16b, {v13.16b}, v8.16b
|
||||
eor v15.16b, v5.16b, v9.16b
|
||||
tbl v4.16b, {v14.16b}, v8.16b
|
||||
eor v10.16b, v6.16b, v9.16b
|
||||
tbl v5.16b, {v15.16b}, v8.16b
|
||||
eor v11.16b, v7.16b, v9.16b
|
||||
tbl v6.16b, {v10.16b}, v8.16b
|
||||
tbl v7.16b, {v11.16b}, v8.16b
|
||||
|
||||
bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
|
||||
|
||||
sub rounds, rounds, #1
|
||||
b .Lenc_sbox
|
||||
|
||||
.Lenc_loop:
|
||||
shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
|
||||
.Lenc_sbox:
|
||||
sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
|
||||
v13, v14, v15
|
||||
subs rounds, rounds, #1
|
||||
b.cc .Lenc_done
|
||||
|
||||
enc_next_rk
|
||||
|
||||
mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
|
||||
v13, v14, v15
|
||||
|
||||
add_round_key v0, v1, v2, v3, v4, v5, v6, v7
|
||||
|
||||
b.ne .Lenc_loop
|
||||
ldr q24, SRM0
|
||||
b .Lenc_loop
|
||||
|
||||
.Lenc_done:
|
||||
ldr q12, [bskey] // last round key
|
||||
|
||||
bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
|
||||
|
||||
eor v0.16b, v0.16b, v12.16b
|
||||
eor v1.16b, v1.16b, v12.16b
|
||||
eor v4.16b, v4.16b, v12.16b
|
||||
eor v6.16b, v6.16b, v12.16b
|
||||
eor v3.16b, v3.16b, v12.16b
|
||||
eor v7.16b, v7.16b, v12.16b
|
||||
eor v2.16b, v2.16b, v12.16b
|
||||
eor v5.16b, v5.16b, v12.16b
|
||||
ret
|
||||
ENDPROC(aesbs_encrypt8)
|
||||
|
||||
.align 4
|
||||
aesbs_decrypt8:
|
||||
lsl x9, rounds, #7
|
||||
add bskey, bskey, x9
|
||||
|
||||
ldr q9, [bskey, #-112]! // round 0 key
|
||||
ldr q8, M0ISR
|
||||
ldr q24, ISR
|
||||
|
||||
eor v10.16b, v0.16b, v9.16b // xor with round0 key
|
||||
eor v11.16b, v1.16b, v9.16b
|
||||
tbl v0.16b, {v10.16b}, v8.16b
|
||||
eor v12.16b, v2.16b, v9.16b
|
||||
tbl v1.16b, {v11.16b}, v8.16b
|
||||
eor v13.16b, v3.16b, v9.16b
|
||||
tbl v2.16b, {v12.16b}, v8.16b
|
||||
eor v14.16b, v4.16b, v9.16b
|
||||
tbl v3.16b, {v13.16b}, v8.16b
|
||||
eor v15.16b, v5.16b, v9.16b
|
||||
tbl v4.16b, {v14.16b}, v8.16b
|
||||
eor v10.16b, v6.16b, v9.16b
|
||||
tbl v5.16b, {v15.16b}, v8.16b
|
||||
eor v11.16b, v7.16b, v9.16b
|
||||
tbl v6.16b, {v10.16b}, v8.16b
|
||||
tbl v7.16b, {v11.16b}, v8.16b
|
||||
|
||||
bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
|
||||
|
||||
sub rounds, rounds, #1
|
||||
b .Ldec_sbox
|
||||
|
||||
.Ldec_loop:
|
||||
shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
|
||||
.Ldec_sbox:
|
||||
inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
|
||||
v13, v14, v15
|
||||
subs rounds, rounds, #1
|
||||
b.cc .Ldec_done
|
||||
|
||||
dec_next_rk
|
||||
|
||||
add_round_key v0, v1, v6, v4, v2, v7, v3, v5
|
||||
|
||||
inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
|
||||
v13, v14, v15
|
||||
|
||||
b.ne .Ldec_loop
|
||||
ldr q24, ISRM0
|
||||
b .Ldec_loop
|
||||
.Ldec_done:
|
||||
ldr q12, [bskey, #-16] // last round key
|
||||
|
||||
bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
|
||||
|
||||
eor v0.16b, v0.16b, v12.16b
|
||||
eor v1.16b, v1.16b, v12.16b
|
||||
eor v6.16b, v6.16b, v12.16b
|
||||
eor v4.16b, v4.16b, v12.16b
|
||||
eor v2.16b, v2.16b, v12.16b
|
||||
eor v7.16b, v7.16b, v12.16b
|
||||
eor v3.16b, v3.16b, v12.16b
|
||||
eor v5.16b, v5.16b, v12.16b
|
||||
ret
|
||||
ENDPROC(aesbs_decrypt8)
|
||||
|
||||
/*
|
||||
* aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks)
|
||||
* aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks)
|
||||
*/
|
||||
.macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
99: mov x5, #1
|
||||
lsl x5, x5, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x5, x5, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
tbnz x5, #1, 0f
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
tbnz x5, #2, 0f
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
tbnz x5, #3, 0f
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
tbnz x5, #4, 0f
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
tbnz x5, #5, 0f
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
tbnz x5, #6, 0f
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
tbnz x5, #7, 0f
|
||||
ld1 {v7.16b}, [x1], #16
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
bl \do8
|
||||
|
||||
st1 {\o0\().16b}, [x0], #16
|
||||
tbnz x5, #1, 1f
|
||||
st1 {\o1\().16b}, [x0], #16
|
||||
tbnz x5, #2, 1f
|
||||
st1 {\o2\().16b}, [x0], #16
|
||||
tbnz x5, #3, 1f
|
||||
st1 {\o3\().16b}, [x0], #16
|
||||
tbnz x5, #4, 1f
|
||||
st1 {\o4\().16b}, [x0], #16
|
||||
tbnz x5, #5, 1f
|
||||
st1 {\o5\().16b}, [x0], #16
|
||||
tbnz x5, #6, 1f
|
||||
st1 {\o6\().16b}, [x0], #16
|
||||
tbnz x5, #7, 1f
|
||||
st1 {\o7\().16b}, [x0], #16
|
||||
|
||||
cbnz x4, 99b
|
||||
|
||||
1: ldp x29, x30, [sp], #16
|
||||
ret
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
ENTRY(aesbs_ecb_encrypt)
|
||||
__ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
|
||||
ENDPROC(aesbs_ecb_encrypt)
|
||||
|
||||
.align 4
|
||||
ENTRY(aesbs_ecb_decrypt)
|
||||
__ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
|
||||
ENDPROC(aesbs_ecb_decrypt)
|
||||
|
||||
/*
|
||||
* aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
*/
|
||||
.align 4
|
||||
ENTRY(aesbs_cbc_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
99: mov x6, #1
|
||||
lsl x6, x6, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x6, x6, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
mov v25.16b, v0.16b
|
||||
tbnz x6, #1, 0f
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
mov v26.16b, v1.16b
|
||||
tbnz x6, #2, 0f
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
mov v27.16b, v2.16b
|
||||
tbnz x6, #3, 0f
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
mov v28.16b, v3.16b
|
||||
tbnz x6, #4, 0f
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
mov v29.16b, v4.16b
|
||||
tbnz x6, #5, 0f
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
mov v30.16b, v5.16b
|
||||
tbnz x6, #6, 0f
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
mov v31.16b, v6.16b
|
||||
tbnz x6, #7, 0f
|
||||
ld1 {v7.16b}, [x1]
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
bl aesbs_decrypt8
|
||||
|
||||
ld1 {v24.16b}, [x5] // load IV
|
||||
|
||||
eor v1.16b, v1.16b, v25.16b
|
||||
eor v6.16b, v6.16b, v26.16b
|
||||
eor v4.16b, v4.16b, v27.16b
|
||||
eor v2.16b, v2.16b, v28.16b
|
||||
eor v7.16b, v7.16b, v29.16b
|
||||
eor v0.16b, v0.16b, v24.16b
|
||||
eor v3.16b, v3.16b, v30.16b
|
||||
eor v5.16b, v5.16b, v31.16b
|
||||
|
||||
st1 {v0.16b}, [x0], #16
|
||||
mov v24.16b, v25.16b
|
||||
tbnz x6, #1, 1f
|
||||
st1 {v1.16b}, [x0], #16
|
||||
mov v24.16b, v26.16b
|
||||
tbnz x6, #2, 1f
|
||||
st1 {v6.16b}, [x0], #16
|
||||
mov v24.16b, v27.16b
|
||||
tbnz x6, #3, 1f
|
||||
st1 {v4.16b}, [x0], #16
|
||||
mov v24.16b, v28.16b
|
||||
tbnz x6, #4, 1f
|
||||
st1 {v2.16b}, [x0], #16
|
||||
mov v24.16b, v29.16b
|
||||
tbnz x6, #5, 1f
|
||||
st1 {v7.16b}, [x0], #16
|
||||
mov v24.16b, v30.16b
|
||||
tbnz x6, #6, 1f
|
||||
st1 {v3.16b}, [x0], #16
|
||||
mov v24.16b, v31.16b
|
||||
tbnz x6, #7, 1f
|
||||
ld1 {v24.16b}, [x1], #16
|
||||
st1 {v5.16b}, [x0], #16
|
||||
1: st1 {v24.16b}, [x5] // store IV
|
||||
|
||||
cbnz x4, 99b
|
||||
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
ENDPROC(aesbs_cbc_decrypt)
|
||||
|
||||
.macro next_tweak, out, in, const, tmp
|
||||
sshr \tmp\().2d, \in\().2d, #63
|
||||
and \tmp\().16b, \tmp\().16b, \const\().16b
|
||||
add \out\().2d, \in\().2d, \in\().2d
|
||||
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
|
||||
eor \out\().16b, \out\().16b, \tmp\().16b
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
.Lxts_mul_x:
|
||||
CPU_LE( .quad 1, 0x87 )
|
||||
CPU_BE( .quad 0x87, 1 )
|
||||
|
||||
/*
|
||||
* aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
* aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
*/
|
||||
__xts_crypt8:
|
||||
mov x6, #1
|
||||
lsl x6, x6, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x6, x6, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
next_tweak v26, v25, v30, v31
|
||||
eor v0.16b, v0.16b, v25.16b
|
||||
tbnz x6, #1, 0f
|
||||
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
next_tweak v27, v26, v30, v31
|
||||
eor v1.16b, v1.16b, v26.16b
|
||||
tbnz x6, #2, 0f
|
||||
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
next_tweak v28, v27, v30, v31
|
||||
eor v2.16b, v2.16b, v27.16b
|
||||
tbnz x6, #3, 0f
|
||||
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
next_tweak v29, v28, v30, v31
|
||||
eor v3.16b, v3.16b, v28.16b
|
||||
tbnz x6, #4, 0f
|
||||
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
str q29, [sp, #16]
|
||||
eor v4.16b, v4.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #5, 0f
|
||||
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
str q29, [sp, #32]
|
||||
eor v5.16b, v5.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #6, 0f
|
||||
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
str q29, [sp, #48]
|
||||
eor v6.16b, v6.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #7, 0f
|
||||
|
||||
ld1 {v7.16b}, [x1], #16
|
||||
str q29, [sp, #64]
|
||||
eor v7.16b, v7.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
br x7
|
||||
ENDPROC(__xts_crypt8)
|
||||
|
||||
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||
stp x29, x30, [sp, #-80]!
|
||||
mov x29, sp
|
||||
|
||||
ldr q30, .Lxts_mul_x
|
||||
ld1 {v25.16b}, [x5]
|
||||
|
||||
99: adr x7, \do8
|
||||
bl __xts_crypt8
|
||||
|
||||
ldp q16, q17, [sp, #16]
|
||||
ldp q18, q19, [sp, #48]
|
||||
|
||||
eor \o0\().16b, \o0\().16b, v25.16b
|
||||
eor \o1\().16b, \o1\().16b, v26.16b
|
||||
eor \o2\().16b, \o2\().16b, v27.16b
|
||||
eor \o3\().16b, \o3\().16b, v28.16b
|
||||
|
||||
st1 {\o0\().16b}, [x0], #16
|
||||
mov v25.16b, v26.16b
|
||||
tbnz x6, #1, 1f
|
||||
st1 {\o1\().16b}, [x0], #16
|
||||
mov v25.16b, v27.16b
|
||||
tbnz x6, #2, 1f
|
||||
st1 {\o2\().16b}, [x0], #16
|
||||
mov v25.16b, v28.16b
|
||||
tbnz x6, #3, 1f
|
||||
st1 {\o3\().16b}, [x0], #16
|
||||
mov v25.16b, v29.16b
|
||||
tbnz x6, #4, 1f
|
||||
|
||||
eor \o4\().16b, \o4\().16b, v16.16b
|
||||
eor \o5\().16b, \o5\().16b, v17.16b
|
||||
eor \o6\().16b, \o6\().16b, v18.16b
|
||||
eor \o7\().16b, \o7\().16b, v19.16b
|
||||
|
||||
st1 {\o4\().16b}, [x0], #16
|
||||
tbnz x6, #5, 1f
|
||||
st1 {\o5\().16b}, [x0], #16
|
||||
tbnz x6, #6, 1f
|
||||
st1 {\o6\().16b}, [x0], #16
|
||||
tbnz x6, #7, 1f
|
||||
st1 {\o7\().16b}, [x0], #16
|
||||
|
||||
cbnz x4, 99b
|
||||
|
||||
1: st1 {v25.16b}, [x5]
|
||||
ldp x29, x30, [sp], #80
|
||||
ret
|
||||
.endm
|
||||
|
||||
ENTRY(aesbs_xts_encrypt)
|
||||
__xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
|
||||
ENDPROC(aesbs_xts_encrypt)
|
||||
|
||||
ENTRY(aesbs_xts_decrypt)
|
||||
__xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
|
||||
ENDPROC(aesbs_xts_decrypt)
|
||||
|
||||
.macro next_ctr, v
|
||||
mov \v\().d[1], x8
|
||||
adds x8, x8, #1
|
||||
mov \v\().d[0], x7
|
||||
adc x7, x7, xzr
|
||||
rev64 \v\().16b, \v\().16b
|
||||
.endm
|
||||
|
||||
/*
|
||||
* aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
* int rounds, int blocks, u8 iv[], u8 final[])
|
||||
*/
|
||||
ENTRY(aesbs_ctr_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
cmp x6, #0
|
||||
cset x10, ne
|
||||
add x4, x4, x10 // do one extra block if final
|
||||
|
||||
ldp x7, x8, [x5]
|
||||
ld1 {v0.16b}, [x5]
|
||||
CPU_LE( rev x7, x7 )
|
||||
CPU_LE( rev x8, x8 )
|
||||
adds x8, x8, #1
|
||||
adc x7, x7, xzr
|
||||
|
||||
99: mov x9, #1
|
||||
lsl x9, x9, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x9, x9, xzr, le
|
||||
|
||||
tbnz x9, #1, 0f
|
||||
next_ctr v1
|
||||
tbnz x9, #2, 0f
|
||||
next_ctr v2
|
||||
tbnz x9, #3, 0f
|
||||
next_ctr v3
|
||||
tbnz x9, #4, 0f
|
||||
next_ctr v4
|
||||
tbnz x9, #5, 0f
|
||||
next_ctr v5
|
||||
tbnz x9, #6, 0f
|
||||
next_ctr v6
|
||||
tbnz x9, #7, 0f
|
||||
next_ctr v7
|
||||
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
bl aesbs_encrypt8
|
||||
|
||||
lsr x9, x9, x10 // disregard the extra block
|
||||
tbnz x9, #0, 0f
|
||||
|
||||
ld1 {v8.16b}, [x1], #16
|
||||
eor v0.16b, v0.16b, v8.16b
|
||||
st1 {v0.16b}, [x0], #16
|
||||
tbnz x9, #1, 1f
|
||||
|
||||
ld1 {v9.16b}, [x1], #16
|
||||
eor v1.16b, v1.16b, v9.16b
|
||||
st1 {v1.16b}, [x0], #16
|
||||
tbnz x9, #2, 2f
|
||||
|
||||
ld1 {v10.16b}, [x1], #16
|
||||
eor v4.16b, v4.16b, v10.16b
|
||||
st1 {v4.16b}, [x0], #16
|
||||
tbnz x9, #3, 3f
|
||||
|
||||
ld1 {v11.16b}, [x1], #16
|
||||
eor v6.16b, v6.16b, v11.16b
|
||||
st1 {v6.16b}, [x0], #16
|
||||
tbnz x9, #4, 4f
|
||||
|
||||
ld1 {v12.16b}, [x1], #16
|
||||
eor v3.16b, v3.16b, v12.16b
|
||||
st1 {v3.16b}, [x0], #16
|
||||
tbnz x9, #5, 5f
|
||||
|
||||
ld1 {v13.16b}, [x1], #16
|
||||
eor v7.16b, v7.16b, v13.16b
|
||||
st1 {v7.16b}, [x0], #16
|
||||
tbnz x9, #6, 6f
|
||||
|
||||
ld1 {v14.16b}, [x1], #16
|
||||
eor v2.16b, v2.16b, v14.16b
|
||||
st1 {v2.16b}, [x0], #16
|
||||
tbnz x9, #7, 7f
|
||||
|
||||
ld1 {v15.16b}, [x1], #16
|
||||
eor v5.16b, v5.16b, v15.16b
|
||||
st1 {v5.16b}, [x0], #16
|
||||
|
||||
8: next_ctr v0
|
||||
cbnz x4, 99b
|
||||
|
||||
0: st1 {v0.16b}, [x5]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
/*
|
||||
* If we are handling the tail of the input (x6 != NULL), return the
|
||||
* final keystream block back to the caller.
|
||||
*/
|
||||
1: cbz x6, 8b
|
||||
st1 {v1.16b}, [x6]
|
||||
b 8b
|
||||
2: cbz x6, 8b
|
||||
st1 {v4.16b}, [x6]
|
||||
b 8b
|
||||
3: cbz x6, 8b
|
||||
st1 {v6.16b}, [x6]
|
||||
b 8b
|
||||
4: cbz x6, 8b
|
||||
st1 {v3.16b}, [x6]
|
||||
b 8b
|
||||
5: cbz x6, 8b
|
||||
st1 {v7.16b}, [x6]
|
||||
b 8b
|
||||
6: cbz x6, 8b
|
||||
st1 {v2.16b}, [x6]
|
||||
b 8b
|
||||
7: cbz x6, 8b
|
||||
st1 {v5.16b}, [x6]
|
||||
b 8b
|
||||
ENDPROC(aesbs_ctr_encrypt)
|
439
arch/arm64/crypto/aes-neonbs-glue.c
Normal file
439
arch/arm64/crypto/aes-neonbs-glue.c
Normal file
@ -0,0 +1,439 @@
|
||||
/*
|
||||
* Bit sliced AES using NEON instructions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
|
||||
asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds);
|
||||
|
||||
asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks);
|
||||
asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks);
|
||||
|
||||
asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
|
||||
asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[], u8 final[]);
|
||||
|
||||
asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
|
||||
/* borrowed from aes-neon-blk.ko */
|
||||
asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, int first);
|
||||
asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, u8 iv[],
|
||||
int first);
|
||||
|
||||
struct aesbs_ctx {
|
||||
u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32];
|
||||
int rounds;
|
||||
} __aligned(AES_BLOCK_SIZE);
|
||||
|
||||
struct aesbs_cbc_ctx {
|
||||
struct aesbs_ctx key;
|
||||
u32 enc[AES_MAX_KEYLENGTH_U32];
|
||||
};
|
||||
|
||||
struct aesbs_xts_ctx {
|
||||
struct aesbs_ctx key;
|
||||
u32 twkey[AES_MAX_KEYLENGTH_U32];
|
||||
};
|
||||
|
||||
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ctx->rounds = 6 + key_len / 4;
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __ecb_crypt(struct skcipher_request *req,
|
||||
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks))
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
|
||||
ctx->rounds, blocks);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ecb_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __ecb_crypt(req, aesbs_ecb_encrypt);
|
||||
}
|
||||
|
||||
static int ecb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __ecb_crypt(req, aesbs_ecb_decrypt);
|
||||
}
|
||||
|
||||
static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ctx->key.rounds = 6 + key_len / 4;
|
||||
|
||||
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err, first = 1;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
/* fall back to the non-bitsliced NEON implementation */
|
||||
neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->enc, ctx->key.rounds, blocks, walk.iv,
|
||||
first);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
first = 0;
|
||||
}
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key.rk, ctx->key.rounds, blocks,
|
||||
walk.iv);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
|
||||
|
||||
if (walk.nbytes < walk.total) {
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
final = NULL;
|
||||
}
|
||||
|
||||
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->rk, ctx->rounds, blocks, walk.iv, final);
|
||||
|
||||
if (final) {
|
||||
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
|
||||
if (dst != src)
|
||||
memcpy(dst, src, walk.total % AES_BLOCK_SIZE);
|
||||
crypto_xor(dst, final, walk.total % AES_BLOCK_SIZE);
|
||||
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
break;
|
||||
}
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key_len /= 2;
|
||||
err = crypto_aes_expand_key(&rk, in_key + key_len, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey));
|
||||
|
||||
return aesbs_setkey(tfm, in_key, key_len);
|
||||
}
|
||||
|
||||
static int __xts_crypt(struct skcipher_request *req,
|
||||
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]))
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
|
||||
neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey,
|
||||
ctx->key.rounds, 1, 1);
|
||||
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
|
||||
ctx->key.rounds, blocks, walk.iv);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, aesbs_xts_encrypt);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, aesbs_xts_decrypt);
|
||||
}
|
||||
|
||||
static struct skcipher_alg aes_algs[] = { {
|
||||
.base.cra_name = "__ecb(aes)",
|
||||
.base.cra_driver_name = "__ecb-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ecb_encrypt,
|
||||
.decrypt = ecb_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "__cbc(aes)",
|
||||
.base.cra_driver_name = "__cbc-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_cbc_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "__ctr(aes)",
|
||||
.base.cra_driver_name = "__ctr-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base.cra_name = "ctr(aes)",
|
||||
.base.cra_driver_name = "ctr-aes-neonbs",
|
||||
.base.cra_priority = 250 - 1,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.chunksize = AES_BLOCK_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_setkey,
|
||||
.encrypt = ctr_encrypt,
|
||||
.decrypt = ctr_encrypt,
|
||||
}, {
|
||||
.base.cra_name = "__xts(aes)",
|
||||
.base.cra_driver_name = "__xts-aes-neonbs",
|
||||
.base.cra_priority = 250,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.walksize = 8 * AES_BLOCK_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = aesbs_xts_setkey,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
} };
|
||||
|
||||
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
|
||||
|
||||
static void aes_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
|
||||
if (aes_simd_algs[i])
|
||||
simd_skcipher_free(aes_simd_algs[i]);
|
||||
|
||||
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
}
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
struct simd_skcipher_alg *simd;
|
||||
const char *basename;
|
||||
const char *algname;
|
||||
const char *drvname;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||
return -ENODEV;
|
||||
|
||||
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
|
||||
if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
|
||||
continue;
|
||||
|
||||
algname = aes_algs[i].base.cra_name + 2;
|
||||
drvname = aes_algs[i].base.cra_driver_name + 2;
|
||||
basename = aes_algs[i].base.cra_driver_name;
|
||||
simd = simd_skcipher_create_compat(algname, drvname, basename);
|
||||
err = PTR_ERR(simd);
|
||||
if (IS_ERR(simd))
|
||||
goto unregister_simds;
|
||||
|
||||
aes_simd_algs[i] = simd;
|
||||
}
|
||||
return 0;
|
||||
|
||||
unregister_simds:
|
||||
aes_exit();
|
||||
return err;
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_exit);
|
450
arch/arm64/crypto/chacha20-neon-core.S
Normal file
450
arch/arm64/crypto/chacha20-neon-core.S
Normal file
@ -0,0 +1,450 @@
|
||||
/*
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
.align 6
|
||||
|
||||
ENTRY(chacha20_block_xor_neon)
|
||||
// x0: Input state matrix, s
|
||||
// x1: 1 data block output, o
|
||||
// x2: 1 data block input, i
|
||||
|
||||
//
|
||||
// This function encrypts one ChaCha20 block by loading the state matrix
|
||||
// in four NEON registers. It performs matrix operation on four words in
|
||||
// parallel, but requires shuffling to rearrange the words after each
|
||||
// round.
|
||||
//
|
||||
|
||||
// x0..3 = s0..3
|
||||
adr x3, ROT8
|
||||
ld1 {v0.4s-v3.4s}, [x0]
|
||||
ld1 {v8.4s-v11.4s}, [x0]
|
||||
ld1 {v12.4s}, [x3]
|
||||
|
||||
mov x3, #10
|
||||
|
||||
.Ldoubleround:
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||
add v0.4s, v0.4s, v1.4s
|
||||
eor v3.16b, v3.16b, v0.16b
|
||||
rev32 v3.8h, v3.8h
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
|
||||
add v2.4s, v2.4s, v3.4s
|
||||
eor v4.16b, v1.16b, v2.16b
|
||||
shl v1.4s, v4.4s, #12
|
||||
sri v1.4s, v4.4s, #20
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
|
||||
add v0.4s, v0.4s, v1.4s
|
||||
eor v3.16b, v3.16b, v0.16b
|
||||
tbl v3.16b, {v3.16b}, v12.16b
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
|
||||
add v2.4s, v2.4s, v3.4s
|
||||
eor v4.16b, v1.16b, v2.16b
|
||||
shl v1.4s, v4.4s, #7
|
||||
sri v1.4s, v4.4s, #25
|
||||
|
||||
// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
|
||||
ext v1.16b, v1.16b, v1.16b, #4
|
||||
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
|
||||
ext v2.16b, v2.16b, v2.16b, #8
|
||||
// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
|
||||
ext v3.16b, v3.16b, v3.16b, #12
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||
add v0.4s, v0.4s, v1.4s
|
||||
eor v3.16b, v3.16b, v0.16b
|
||||
rev32 v3.8h, v3.8h
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
|
||||
add v2.4s, v2.4s, v3.4s
|
||||
eor v4.16b, v1.16b, v2.16b
|
||||
shl v1.4s, v4.4s, #12
|
||||
sri v1.4s, v4.4s, #20
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
|
||||
add v0.4s, v0.4s, v1.4s
|
||||
eor v3.16b, v3.16b, v0.16b
|
||||
tbl v3.16b, {v3.16b}, v12.16b
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
|
||||
add v2.4s, v2.4s, v3.4s
|
||||
eor v4.16b, v1.16b, v2.16b
|
||||
shl v1.4s, v4.4s, #7
|
||||
sri v1.4s, v4.4s, #25
|
||||
|
||||
// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
|
||||
ext v1.16b, v1.16b, v1.16b, #12
|
||||
// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
|
||||
ext v2.16b, v2.16b, v2.16b, #8
|
||||
// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
|
||||
ext v3.16b, v3.16b, v3.16b, #4
|
||||
|
||||
subs x3, x3, #1
|
||||
b.ne .Ldoubleround
|
||||
|
||||
ld1 {v4.16b-v7.16b}, [x2]
|
||||
|
||||
// o0 = i0 ^ (x0 + s0)
|
||||
add v0.4s, v0.4s, v8.4s
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
|
||||
// o1 = i1 ^ (x1 + s1)
|
||||
add v1.4s, v1.4s, v9.4s
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
|
||||
// o2 = i2 ^ (x2 + s2)
|
||||
add v2.4s, v2.4s, v10.4s
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
|
||||
// o3 = i3 ^ (x3 + s3)
|
||||
add v3.4s, v3.4s, v11.4s
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
|
||||
st1 {v0.16b-v3.16b}, [x1]
|
||||
|
||||
ret
|
||||
ENDPROC(chacha20_block_xor_neon)
|
||||
|
||||
.align 6
|
||||
ENTRY(chacha20_4block_xor_neon)
|
||||
// x0: Input state matrix, s
|
||||
// x1: 4 data blocks output, o
|
||||
// x2: 4 data blocks input, i
|
||||
|
||||
//
|
||||
// This function encrypts four consecutive ChaCha20 blocks by loading
|
||||
// the state matrix in NEON registers four times. The algorithm performs
|
||||
// each operation on the corresponding word of each state matrix, hence
|
||||
// requires no word shuffling. For final XORing step we transpose the
|
||||
// matrix by interleaving 32- and then 64-bit words, which allows us to
|
||||
// do XOR in NEON registers.
|
||||
//
|
||||
adr x3, CTRINC // ... and ROT8
|
||||
ld1 {v30.4s-v31.4s}, [x3]
|
||||
|
||||
// x0..15[0-3] = s0..3[0..3]
|
||||
mov x4, x0
|
||||
ld4r { v0.4s- v3.4s}, [x4], #16
|
||||
ld4r { v4.4s- v7.4s}, [x4], #16
|
||||
ld4r { v8.4s-v11.4s}, [x4], #16
|
||||
ld4r {v12.4s-v15.4s}, [x4]
|
||||
|
||||
// x12 += counter values 0-3
|
||||
add v12.4s, v12.4s, v30.4s
|
||||
|
||||
mov x3, #10
|
||||
|
||||
.Ldoubleround4:
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
|
||||
add v0.4s, v0.4s, v4.4s
|
||||
add v1.4s, v1.4s, v5.4s
|
||||
add v2.4s, v2.4s, v6.4s
|
||||
add v3.4s, v3.4s, v7.4s
|
||||
|
||||
eor v12.16b, v12.16b, v0.16b
|
||||
eor v13.16b, v13.16b, v1.16b
|
||||
eor v14.16b, v14.16b, v2.16b
|
||||
eor v15.16b, v15.16b, v3.16b
|
||||
|
||||
rev32 v12.8h, v12.8h
|
||||
rev32 v13.8h, v13.8h
|
||||
rev32 v14.8h, v14.8h
|
||||
rev32 v15.8h, v15.8h
|
||||
|
||||
// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
|
||||
// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
|
||||
// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
|
||||
// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
|
||||
add v8.4s, v8.4s, v12.4s
|
||||
add v9.4s, v9.4s, v13.4s
|
||||
add v10.4s, v10.4s, v14.4s
|
||||
add v11.4s, v11.4s, v15.4s
|
||||
|
||||
eor v16.16b, v4.16b, v8.16b
|
||||
eor v17.16b, v5.16b, v9.16b
|
||||
eor v18.16b, v6.16b, v10.16b
|
||||
eor v19.16b, v7.16b, v11.16b
|
||||
|
||||
shl v4.4s, v16.4s, #12
|
||||
shl v5.4s, v17.4s, #12
|
||||
shl v6.4s, v18.4s, #12
|
||||
shl v7.4s, v19.4s, #12
|
||||
|
||||
sri v4.4s, v16.4s, #20
|
||||
sri v5.4s, v17.4s, #20
|
||||
sri v6.4s, v18.4s, #20
|
||||
sri v7.4s, v19.4s, #20
|
||||
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
|
||||
add v0.4s, v0.4s, v4.4s
|
||||
add v1.4s, v1.4s, v5.4s
|
||||
add v2.4s, v2.4s, v6.4s
|
||||
add v3.4s, v3.4s, v7.4s
|
||||
|
||||
eor v12.16b, v12.16b, v0.16b
|
||||
eor v13.16b, v13.16b, v1.16b
|
||||
eor v14.16b, v14.16b, v2.16b
|
||||
eor v15.16b, v15.16b, v3.16b
|
||||
|
||||
tbl v12.16b, {v12.16b}, v31.16b
|
||||
tbl v13.16b, {v13.16b}, v31.16b
|
||||
tbl v14.16b, {v14.16b}, v31.16b
|
||||
tbl v15.16b, {v15.16b}, v31.16b
|
||||
|
||||
// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
|
||||
// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
|
||||
// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
|
||||
// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
|
||||
add v8.4s, v8.4s, v12.4s
|
||||
add v9.4s, v9.4s, v13.4s
|
||||
add v10.4s, v10.4s, v14.4s
|
||||
add v11.4s, v11.4s, v15.4s
|
||||
|
||||
eor v16.16b, v4.16b, v8.16b
|
||||
eor v17.16b, v5.16b, v9.16b
|
||||
eor v18.16b, v6.16b, v10.16b
|
||||
eor v19.16b, v7.16b, v11.16b
|
||||
|
||||
shl v4.4s, v16.4s, #7
|
||||
shl v5.4s, v17.4s, #7
|
||||
shl v6.4s, v18.4s, #7
|
||||
shl v7.4s, v19.4s, #7
|
||||
|
||||
sri v4.4s, v16.4s, #25
|
||||
sri v5.4s, v17.4s, #25
|
||||
sri v6.4s, v18.4s, #25
|
||||
sri v7.4s, v19.4s, #25
|
||||
|
||||
// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
|
||||
add v0.4s, v0.4s, v5.4s
|
||||
add v1.4s, v1.4s, v6.4s
|
||||
add v2.4s, v2.4s, v7.4s
|
||||
add v3.4s, v3.4s, v4.4s
|
||||
|
||||
eor v15.16b, v15.16b, v0.16b
|
||||
eor v12.16b, v12.16b, v1.16b
|
||||
eor v13.16b, v13.16b, v2.16b
|
||||
eor v14.16b, v14.16b, v3.16b
|
||||
|
||||
rev32 v15.8h, v15.8h
|
||||
rev32 v12.8h, v12.8h
|
||||
rev32 v13.8h, v13.8h
|
||||
rev32 v14.8h, v14.8h
|
||||
|
||||
// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
|
||||
// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
|
||||
// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
|
||||
// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
|
||||
add v10.4s, v10.4s, v15.4s
|
||||
add v11.4s, v11.4s, v12.4s
|
||||
add v8.4s, v8.4s, v13.4s
|
||||
add v9.4s, v9.4s, v14.4s
|
||||
|
||||
eor v16.16b, v5.16b, v10.16b
|
||||
eor v17.16b, v6.16b, v11.16b
|
||||
eor v18.16b, v7.16b, v8.16b
|
||||
eor v19.16b, v4.16b, v9.16b
|
||||
|
||||
shl v5.4s, v16.4s, #12
|
||||
shl v6.4s, v17.4s, #12
|
||||
shl v7.4s, v18.4s, #12
|
||||
shl v4.4s, v19.4s, #12
|
||||
|
||||
sri v5.4s, v16.4s, #20
|
||||
sri v6.4s, v17.4s, #20
|
||||
sri v7.4s, v18.4s, #20
|
||||
sri v4.4s, v19.4s, #20
|
||||
|
||||
// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
|
||||
add v0.4s, v0.4s, v5.4s
|
||||
add v1.4s, v1.4s, v6.4s
|
||||
add v2.4s, v2.4s, v7.4s
|
||||
add v3.4s, v3.4s, v4.4s
|
||||
|
||||
eor v15.16b, v15.16b, v0.16b
|
||||
eor v12.16b, v12.16b, v1.16b
|
||||
eor v13.16b, v13.16b, v2.16b
|
||||
eor v14.16b, v14.16b, v3.16b
|
||||
|
||||
tbl v15.16b, {v15.16b}, v31.16b
|
||||
tbl v12.16b, {v12.16b}, v31.16b
|
||||
tbl v13.16b, {v13.16b}, v31.16b
|
||||
tbl v14.16b, {v14.16b}, v31.16b
|
||||
|
||||
// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
|
||||
// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
|
||||
// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
|
||||
// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
|
||||
add v10.4s, v10.4s, v15.4s
|
||||
add v11.4s, v11.4s, v12.4s
|
||||
add v8.4s, v8.4s, v13.4s
|
||||
add v9.4s, v9.4s, v14.4s
|
||||
|
||||
eor v16.16b, v5.16b, v10.16b
|
||||
eor v17.16b, v6.16b, v11.16b
|
||||
eor v18.16b, v7.16b, v8.16b
|
||||
eor v19.16b, v4.16b, v9.16b
|
||||
|
||||
shl v5.4s, v16.4s, #7
|
||||
shl v6.4s, v17.4s, #7
|
||||
shl v7.4s, v18.4s, #7
|
||||
shl v4.4s, v19.4s, #7
|
||||
|
||||
sri v5.4s, v16.4s, #25
|
||||
sri v6.4s, v17.4s, #25
|
||||
sri v7.4s, v18.4s, #25
|
||||
sri v4.4s, v19.4s, #25
|
||||
|
||||
subs x3, x3, #1
|
||||
b.ne .Ldoubleround4
|
||||
|
||||
ld4r {v16.4s-v19.4s}, [x0], #16
|
||||
ld4r {v20.4s-v23.4s}, [x0], #16
|
||||
|
||||
// x12 += counter values 0-3
|
||||
add v12.4s, v12.4s, v30.4s
|
||||
|
||||
// x0[0-3] += s0[0]
|
||||
// x1[0-3] += s0[1]
|
||||
// x2[0-3] += s0[2]
|
||||
// x3[0-3] += s0[3]
|
||||
add v0.4s, v0.4s, v16.4s
|
||||
add v1.4s, v1.4s, v17.4s
|
||||
add v2.4s, v2.4s, v18.4s
|
||||
add v3.4s, v3.4s, v19.4s
|
||||
|
||||
ld4r {v24.4s-v27.4s}, [x0], #16
|
||||
ld4r {v28.4s-v31.4s}, [x0]
|
||||
|
||||
// x4[0-3] += s1[0]
|
||||
// x5[0-3] += s1[1]
|
||||
// x6[0-3] += s1[2]
|
||||
// x7[0-3] += s1[3]
|
||||
add v4.4s, v4.4s, v20.4s
|
||||
add v5.4s, v5.4s, v21.4s
|
||||
add v6.4s, v6.4s, v22.4s
|
||||
add v7.4s, v7.4s, v23.4s
|
||||
|
||||
// x8[0-3] += s2[0]
|
||||
// x9[0-3] += s2[1]
|
||||
// x10[0-3] += s2[2]
|
||||
// x11[0-3] += s2[3]
|
||||
add v8.4s, v8.4s, v24.4s
|
||||
add v9.4s, v9.4s, v25.4s
|
||||
add v10.4s, v10.4s, v26.4s
|
||||
add v11.4s, v11.4s, v27.4s
|
||||
|
||||
// x12[0-3] += s3[0]
|
||||
// x13[0-3] += s3[1]
|
||||
// x14[0-3] += s3[2]
|
||||
// x15[0-3] += s3[3]
|
||||
add v12.4s, v12.4s, v28.4s
|
||||
add v13.4s, v13.4s, v29.4s
|
||||
add v14.4s, v14.4s, v30.4s
|
||||
add v15.4s, v15.4s, v31.4s
|
||||
|
||||
// interleave 32-bit words in state n, n+1
|
||||
zip1 v16.4s, v0.4s, v1.4s
|
||||
zip2 v17.4s, v0.4s, v1.4s
|
||||
zip1 v18.4s, v2.4s, v3.4s
|
||||
zip2 v19.4s, v2.4s, v3.4s
|
||||
zip1 v20.4s, v4.4s, v5.4s
|
||||
zip2 v21.4s, v4.4s, v5.4s
|
||||
zip1 v22.4s, v6.4s, v7.4s
|
||||
zip2 v23.4s, v6.4s, v7.4s
|
||||
zip1 v24.4s, v8.4s, v9.4s
|
||||
zip2 v25.4s, v8.4s, v9.4s
|
||||
zip1 v26.4s, v10.4s, v11.4s
|
||||
zip2 v27.4s, v10.4s, v11.4s
|
||||
zip1 v28.4s, v12.4s, v13.4s
|
||||
zip2 v29.4s, v12.4s, v13.4s
|
||||
zip1 v30.4s, v14.4s, v15.4s
|
||||
zip2 v31.4s, v14.4s, v15.4s
|
||||
|
||||
// interleave 64-bit words in state n, n+2
|
||||
zip1 v0.2d, v16.2d, v18.2d
|
||||
zip2 v4.2d, v16.2d, v18.2d
|
||||
zip1 v8.2d, v17.2d, v19.2d
|
||||
zip2 v12.2d, v17.2d, v19.2d
|
||||
ld1 {v16.16b-v19.16b}, [x2], #64
|
||||
|
||||
zip1 v1.2d, v20.2d, v22.2d
|
||||
zip2 v5.2d, v20.2d, v22.2d
|
||||
zip1 v9.2d, v21.2d, v23.2d
|
||||
zip2 v13.2d, v21.2d, v23.2d
|
||||
ld1 {v20.16b-v23.16b}, [x2], #64
|
||||
|
||||
zip1 v2.2d, v24.2d, v26.2d
|
||||
zip2 v6.2d, v24.2d, v26.2d
|
||||
zip1 v10.2d, v25.2d, v27.2d
|
||||
zip2 v14.2d, v25.2d, v27.2d
|
||||
ld1 {v24.16b-v27.16b}, [x2], #64
|
||||
|
||||
zip1 v3.2d, v28.2d, v30.2d
|
||||
zip2 v7.2d, v28.2d, v30.2d
|
||||
zip1 v11.2d, v29.2d, v31.2d
|
||||
zip2 v15.2d, v29.2d, v31.2d
|
||||
ld1 {v28.16b-v31.16b}, [x2]
|
||||
|
||||
// xor with corresponding input, write to output
|
||||
eor v16.16b, v16.16b, v0.16b
|
||||
eor v17.16b, v17.16b, v1.16b
|
||||
eor v18.16b, v18.16b, v2.16b
|
||||
eor v19.16b, v19.16b, v3.16b
|
||||
eor v20.16b, v20.16b, v4.16b
|
||||
eor v21.16b, v21.16b, v5.16b
|
||||
st1 {v16.16b-v19.16b}, [x1], #64
|
||||
eor v22.16b, v22.16b, v6.16b
|
||||
eor v23.16b, v23.16b, v7.16b
|
||||
eor v24.16b, v24.16b, v8.16b
|
||||
eor v25.16b, v25.16b, v9.16b
|
||||
st1 {v20.16b-v23.16b}, [x1], #64
|
||||
eor v26.16b, v26.16b, v10.16b
|
||||
eor v27.16b, v27.16b, v11.16b
|
||||
eor v28.16b, v28.16b, v12.16b
|
||||
st1 {v24.16b-v27.16b}, [x1], #64
|
||||
eor v29.16b, v29.16b, v13.16b
|
||||
eor v30.16b, v30.16b, v14.16b
|
||||
eor v31.16b, v31.16b, v15.16b
|
||||
st1 {v28.16b-v31.16b}, [x1]
|
||||
|
||||
ret
|
||||
ENDPROC(chacha20_4block_xor_neon)
|
||||
|
||||
CTRINC: .word 0, 1, 2, 3
|
||||
ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
|
126
arch/arm64/crypto/chacha20-neon-glue.c
Normal file
126
arch/arm64/crypto/chacha20-neon-glue.c
Normal file
@ -0,0 +1,126 @@
|
||||
/*
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/chacha20.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
|
||||
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||
|
||||
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes)
|
||||
{
|
||||
u8 buf[CHACHA20_BLOCK_SIZE];
|
||||
|
||||
while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
|
||||
chacha20_4block_xor_neon(state, dst, src);
|
||||
bytes -= CHACHA20_BLOCK_SIZE * 4;
|
||||
src += CHACHA20_BLOCK_SIZE * 4;
|
||||
dst += CHACHA20_BLOCK_SIZE * 4;
|
||||
state[12] += 4;
|
||||
}
|
||||
while (bytes >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_block_xor_neon(state, dst, src);
|
||||
bytes -= CHACHA20_BLOCK_SIZE;
|
||||
src += CHACHA20_BLOCK_SIZE;
|
||||
dst += CHACHA20_BLOCK_SIZE;
|
||||
state[12]++;
|
||||
}
|
||||
if (bytes) {
|
||||
memcpy(buf, src, bytes);
|
||||
chacha20_block_xor_neon(state, buf, buf);
|
||||
memcpy(dst, buf, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static int chacha20_neon(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
if (req->cryptlen <= CHACHA20_BLOCK_SIZE)
|
||||
return crypto_chacha20_crypt(req);
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_chacha20_init(state, ctx, walk.iv);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
nbytes);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct skcipher_alg alg = {
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA20_KEY_SIZE,
|
||||
.max_keysize = CHACHA20_KEY_SIZE,
|
||||
.ivsize = CHACHA20_IV_SIZE,
|
||||
.chunksize = CHACHA20_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA20_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = chacha20_neon,
|
||||
.decrypt = chacha20_neon,
|
||||
};
|
||||
|
||||
static int __init chacha20_simd_mod_init(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_skcipher(&alg);
|
||||
}
|
||||
|
||||
static void __exit chacha20_simd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_skcipher(&alg);
|
||||
}
|
||||
|
||||
module_init(chacha20_simd_mod_init);
|
||||
module_exit(chacha20_simd_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("chacha20");
|
@ -1,290 +0,0 @@
|
||||
/*
|
||||
* crc32-arm64.c - CRC32 and CRC32C using optional ARMv8 instructions
|
||||
*
|
||||
* Module based on crypto/crc32c_generic.c
|
||||
*
|
||||
* CRC32 loop taken from Ed Nevill's Hadoop CRC patch
|
||||
* http://mail-archives.apache.org/mod_mbox/hadoop-common-dev/201406.mbox/%3C1403687030.3355.19.camel%40localhost.localdomain%3E
|
||||
*
|
||||
* Using inline assembly instead of intrinsics in order to be backwards
|
||||
* compatible with older compilers.
|
||||
*
|
||||
* Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/unaligned/access_ok.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
MODULE_AUTHOR("Yazen Ghannam <yazen.ghannam@linaro.org>");
|
||||
MODULE_DESCRIPTION("CRC32 and CRC32C using optional ARMv8 instructions");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
|
||||
|
||||
static u32 crc32_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
|
||||
{
|
||||
s64 length = len;
|
||||
|
||||
while ((length -= sizeof(u64)) >= 0) {
|
||||
CRC32X(crc, get_unaligned_le64(p));
|
||||
p += sizeof(u64);
|
||||
}
|
||||
|
||||
/* The following is more efficient than the straight loop */
|
||||
if (length & sizeof(u32)) {
|
||||
CRC32W(crc, get_unaligned_le32(p));
|
||||
p += sizeof(u32);
|
||||
}
|
||||
if (length & sizeof(u16)) {
|
||||
CRC32H(crc, get_unaligned_le16(p));
|
||||
p += sizeof(u16);
|
||||
}
|
||||
if (length & sizeof(u8))
|
||||
CRC32B(crc, *p);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
static u32 crc32c_arm64_le_hw(u32 crc, const u8 *p, unsigned int len)
|
||||
{
|
||||
s64 length = len;
|
||||
|
||||
while ((length -= sizeof(u64)) >= 0) {
|
||||
CRC32CX(crc, get_unaligned_le64(p));
|
||||
p += sizeof(u64);
|
||||
}
|
||||
|
||||
/* The following is more efficient than the straight loop */
|
||||
if (length & sizeof(u32)) {
|
||||
CRC32CW(crc, get_unaligned_le32(p));
|
||||
p += sizeof(u32);
|
||||
}
|
||||
if (length & sizeof(u16)) {
|
||||
CRC32CH(crc, get_unaligned_le16(p));
|
||||
p += sizeof(u16);
|
||||
}
|
||||
if (length & sizeof(u8))
|
||||
CRC32CB(crc, *p);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
#define CHKSUM_BLOCK_SIZE 1
|
||||
#define CHKSUM_DIGEST_SIZE 4
|
||||
|
||||
struct chksum_ctx {
|
||||
u32 key;
|
||||
};
|
||||
|
||||
struct chksum_desc_ctx {
|
||||
u32 crc;
|
||||
};
|
||||
|
||||
static int chksum_init(struct shash_desc *desc)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = mctx->key;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting the seed allows arbitrary accumulators and flexible XOR policy
|
||||
* If your algorithm starts with ~0, then XOR with ~0 before you set
|
||||
* the seed.
|
||||
*/
|
||||
static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
|
||||
|
||||
if (keylen != sizeof(mctx->key)) {
|
||||
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
mctx->key = get_unaligned_le32(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = crc32_arm64_le_hw(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksumc_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = crc32c_arm64_le_hw(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
put_unaligned_le32(ctx->crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksumc_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
put_unaligned_le32(~ctx->crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
put_unaligned_le32(~crc32c_arm64_le_hw(crc, data, len), out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup(ctx->crc, data, len, out);
|
||||
}
|
||||
|
||||
static int chksumc_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksumc_finup(ctx->crc, data, len, out);
|
||||
}
|
||||
|
||||
static int chksum_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length, u8 *out)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
return __chksum_finup(mctx->key, data, length, out);
|
||||
}
|
||||
|
||||
static int chksumc_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length, u8 *out)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
|
||||
|
||||
return __chksumc_finup(mctx->key, data, length, out);
|
||||
}
|
||||
|
||||
static int crc32_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
mctx->key = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
mctx->key = ~0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg crc32_alg = {
|
||||
.digestsize = CHKSUM_DIGEST_SIZE,
|
||||
.setkey = chksum_setkey,
|
||||
.init = chksum_init,
|
||||
.update = chksum_update,
|
||||
.final = chksum_final,
|
||||
.finup = chksum_finup,
|
||||
.digest = chksum_digest,
|
||||
.descsize = sizeof(struct chksum_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "crc32",
|
||||
.cra_driver_name = "crc32-arm64-hw",
|
||||
.cra_priority = 300,
|
||||
.cra_blocksize = CHKSUM_BLOCK_SIZE,
|
||||
.cra_alignmask = 0,
|
||||
.cra_ctxsize = sizeof(struct chksum_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = crc32_cra_init,
|
||||
}
|
||||
};
|
||||
|
||||
static struct shash_alg crc32c_alg = {
|
||||
.digestsize = CHKSUM_DIGEST_SIZE,
|
||||
.setkey = chksum_setkey,
|
||||
.init = chksum_init,
|
||||
.update = chksumc_update,
|
||||
.final = chksumc_final,
|
||||
.finup = chksumc_finup,
|
||||
.digest = chksumc_digest,
|
||||
.descsize = sizeof(struct chksum_desc_ctx),
|
||||
.base = {
|
||||
.cra_name = "crc32c",
|
||||
.cra_driver_name = "crc32c-arm64-hw",
|
||||
.cra_priority = 300,
|
||||
.cra_blocksize = CHKSUM_BLOCK_SIZE,
|
||||
.cra_alignmask = 0,
|
||||
.cra_ctxsize = sizeof(struct chksum_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = crc32c_cra_init,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init crc32_mod_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = crypto_register_shash(&crc32_alg);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = crypto_register_shash(&crc32c_alg);
|
||||
|
||||
if (err) {
|
||||
crypto_unregister_shash(&crc32_alg);
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit crc32_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&crc32_alg);
|
||||
crypto_unregister_shash(&crc32c_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(CRC32, crc32_mod_init);
|
||||
module_exit(crc32_mod_exit);
|
@ -72,6 +72,24 @@ static int crc32_pmull_init(struct shash_desc *desc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = crc32_armv8_le(*crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = crc32c_armv8_le(*crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
@ -156,7 +174,7 @@ static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
|
||||
static struct shash_alg crc32_pmull_algs[] = { {
|
||||
.setkey = crc32_pmull_setkey,
|
||||
.init = crc32_pmull_init,
|
||||
.update = crc32_pmull_update,
|
||||
.update = crc32_update,
|
||||
.final = crc32_pmull_final,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = sizeof(u32),
|
||||
@ -171,7 +189,7 @@ static struct shash_alg crc32_pmull_algs[] = { {
|
||||
}, {
|
||||
.setkey = crc32_pmull_setkey,
|
||||
.init = crc32_pmull_init,
|
||||
.update = crc32c_pmull_update,
|
||||
.update = crc32c_update,
|
||||
.final = crc32c_pmull_final,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = sizeof(u32),
|
||||
@ -187,14 +205,20 @@ static struct shash_alg crc32_pmull_algs[] = { {
|
||||
|
||||
static int __init crc32_pmull_mod_init(void)
|
||||
{
|
||||
if (elf_hwcap & HWCAP_CRC32) {
|
||||
fallback_crc32 = crc32_armv8_le;
|
||||
fallback_crc32c = crc32c_armv8_le;
|
||||
} else {
|
||||
fallback_crc32 = crc32_le;
|
||||
fallback_crc32c = __crc32c_le;
|
||||
}
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
|
||||
crc32_pmull_algs[0].update = crc32_pmull_update;
|
||||
crc32_pmull_algs[1].update = crc32c_pmull_update;
|
||||
|
||||
if (elf_hwcap & HWCAP_CRC32) {
|
||||
fallback_crc32 = crc32_armv8_le;
|
||||
fallback_crc32c = crc32c_armv8_le;
|
||||
} else {
|
||||
fallback_crc32 = crc32_le;
|
||||
fallback_crc32c = __crc32c_le;
|
||||
}
|
||||
} else if (!(elf_hwcap & HWCAP_CRC32)) {
|
||||
return -ENODEV;
|
||||
}
|
||||
return crypto_register_shashes(crc32_pmull_algs,
|
||||
ARRAY_SIZE(crc32_pmull_algs));
|
||||
}
|
||||
@ -205,7 +229,12 @@ static void __exit crc32_pmull_mod_exit(void)
|
||||
ARRAY_SIZE(crc32_pmull_algs));
|
||||
}
|
||||
|
||||
module_cpu_feature_match(PMULL, crc32_pmull_mod_init);
|
||||
static const struct cpu_feature crc32_cpu_feature[] = {
|
||||
{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
|
||||
|
||||
module_init(crc32_pmull_mod_init);
|
||||
module_exit(crc32_pmull_mod_exit);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
|
@ -46,27 +46,48 @@
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
.data
|
||||
# constants in mergeable sections, linker can reorder and merge
|
||||
.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lgf128mul_x_ble_mask:
|
||||
.octa 0x00000000000000010000000000000087
|
||||
.section .rodata.cst16.POLY, "aM", @progbits, 16
|
||||
.align 16
|
||||
POLY: .octa 0xC2000000000000000000000000000001
|
||||
.section .rodata.cst16.TWOONE, "aM", @progbits, 16
|
||||
.align 16
|
||||
TWOONE: .octa 0x00000001000000000000000000000001
|
||||
|
||||
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
|
||||
.section .rodata.cst16.MASK1, "aM", @progbits, 16
|
||||
.align 16
|
||||
MASK1: .octa 0x0000000000000000ffffffffffffffff
|
||||
.section .rodata.cst16.MASK2, "aM", @progbits, 16
|
||||
.align 16
|
||||
MASK2: .octa 0xffffffffffffffff0000000000000000
|
||||
.section .rodata.cst16.ONE, "aM", @progbits, 16
|
||||
.align 16
|
||||
ONE: .octa 0x00000000000000000000000000000001
|
||||
.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
|
||||
.section .rodata.cst16.dec, "aM", @progbits, 16
|
||||
.align 16
|
||||
dec: .octa 0x1
|
||||
.section .rodata.cst16.enc, "aM", @progbits, 16
|
||||
.align 16
|
||||
enc: .octa 0x2
|
||||
|
||||
# order of these constants should not change.
|
||||
# more specifically, ALL_F should follow SHIFT_MASK,
|
||||
# and ZERO should follow ALL_F
|
||||
|
||||
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
|
||||
MASK1: .octa 0x0000000000000000ffffffffffffffff
|
||||
MASK2: .octa 0xffffffffffffffff0000000000000000
|
||||
# and zero should follow ALL_F
|
||||
.section .rodata, "a", @progbits
|
||||
.align 16
|
||||
SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
|
||||
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
||||
ZERO: .octa 0x00000000000000000000000000000000
|
||||
ONE: .octa 0x00000000000000000000000000000001
|
||||
F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
|
||||
dec: .octa 0x1
|
||||
enc: .octa 0x2
|
||||
.octa 0x00000000000000000000000000000000
|
||||
|
||||
|
||||
.text
|
||||
|
@ -122,22 +122,38 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/inst.h>
|
||||
|
||||
.data
|
||||
# constants in mergeable sections, linker can reorder and merge
|
||||
.section .rodata.cst16.POLY, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
POLY: .octa 0xC2000000000000000000000000000001
|
||||
|
||||
.section .rodata.cst16.POLY2, "aM", @progbits, 16
|
||||
.align 16
|
||||
POLY2: .octa 0xC20000000000000000000001C2000000
|
||||
|
||||
.section .rodata.cst16.TWOONE, "aM", @progbits, 16
|
||||
.align 16
|
||||
TWOONE: .octa 0x00000001000000000000000000000001
|
||||
|
||||
# order of these constants should not change.
|
||||
# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F
|
||||
|
||||
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
|
||||
|
||||
.section .rodata.cst16.ONE, "aM", @progbits, 16
|
||||
.align 16
|
||||
ONE: .octa 0x00000000000000000000000000000001
|
||||
|
||||
.section .rodata.cst16.ONEf, "aM", @progbits, 16
|
||||
.align 16
|
||||
ONEf: .octa 0x01000000000000000000000000000000
|
||||
|
||||
# order of these constants should not change.
|
||||
# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F
|
||||
.section .rodata, "a", @progbits
|
||||
.align 16
|
||||
SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
|
||||
ALL_F: .octa 0xffffffffffffffffffffffffffffffff
|
||||
ZERO: .octa 0x00000000000000000000000000000000
|
||||
ONE: .octa 0x00000000000000000000000000000001
|
||||
ONEf: .octa 0x01000000000000000000000000000000
|
||||
.octa 0x00000000000000000000000000000000
|
||||
|
||||
.text
|
||||
|
||||
|
@ -740,9 +740,11 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
|
||||
*((__be32 *)(iv+12)) = counter;
|
||||
|
||||
if (sg_is_last(req->src) &&
|
||||
req->src->offset + req->src->length <= PAGE_SIZE &&
|
||||
(!PageHighMem(sg_page(req->src)) ||
|
||||
req->src->offset + req->src->length <= PAGE_SIZE) &&
|
||||
sg_is_last(req->dst) &&
|
||||
req->dst->offset + req->dst->length <= PAGE_SIZE) {
|
||||
(!PageHighMem(sg_page(req->dst)) ||
|
||||
req->dst->offset + req->dst->length <= PAGE_SIZE)) {
|
||||
one_entry_in_sg = 1;
|
||||
scatterwalk_start(&src_sg_walk, req->src);
|
||||
assoc = scatterwalk_map(&src_sg_walk);
|
||||
@ -822,9 +824,11 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
|
||||
*((__be32 *)(iv+12)) = counter;
|
||||
|
||||
if (sg_is_last(req->src) &&
|
||||
req->src->offset + req->src->length <= PAGE_SIZE &&
|
||||
(!PageHighMem(sg_page(req->src)) ||
|
||||
req->src->offset + req->src->length <= PAGE_SIZE) &&
|
||||
sg_is_last(req->dst) &&
|
||||
req->dst->offset + req->dst->length <= PAGE_SIZE) {
|
||||
(!PageHighMem(sg_page(req->dst)) ||
|
||||
req->dst->offset + req->dst->length <= PAGE_SIZE)) {
|
||||
one_entry_in_sg = 1;
|
||||
scatterwalk_start(&src_sg_walk, req->src);
|
||||
assoc = scatterwalk_map(&src_sg_walk);
|
||||
|
@ -571,7 +571,9 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
vmovdqu y6, 14 * 16(rio); \
|
||||
vmovdqu y7, 15 * 16(rio);
|
||||
|
||||
.data
|
||||
|
||||
/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
|
||||
.section .rodata.cst16, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
#define SHUFB_BYTES(idx) \
|
||||
@ -711,6 +713,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
|
||||
|
||||
/* 4-bit mask */
|
||||
.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
|
||||
.align 4
|
||||
.L0f0f0f0f:
|
||||
.long 0x0f0f0f0f
|
||||
|
@ -610,20 +610,25 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
vmovdqu y6, 14 * 32(rio); \
|
||||
vmovdqu y7, 15 * 32(rio);
|
||||
|
||||
.data
|
||||
.align 32
|
||||
|
||||
.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32
|
||||
.align 32
|
||||
#define SHUFB_BYTES(idx) \
|
||||
0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
|
||||
|
||||
.Lshufb_16x16b:
|
||||
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
|
||||
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
|
||||
|
||||
.section .rodata.cst32.pack_bswap, "aM", @progbits, 32
|
||||
.align 32
|
||||
.Lpack_bswap:
|
||||
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
|
||||
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
|
||||
|
||||
/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
|
||||
.section .rodata.cst16, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
/* For CTR-mode IV byteswap */
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
@ -750,6 +755,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
||||
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
|
||||
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
|
||||
|
||||
.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
|
||||
.align 4
|
||||
/* 4-bit mask */
|
||||
.L0f0f0f0f:
|
||||
|
@ -195,19 +195,29 @@
|
||||
vpshufb rmask, x0, x0; \
|
||||
vpshufb rmask, x1, x1;
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lbswap_mask:
|
||||
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
|
||||
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.section .rodata.cst16.bswap_iv_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lbswap_iv_mask:
|
||||
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.section .rodata.cst4.16_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.L16_mask:
|
||||
.byte 16, 16, 16, 16
|
||||
.section .rodata.cst4.32_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.L32_mask:
|
||||
.byte 32, 0, 0, 0
|
||||
.section .rodata.cst4.first_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.Lfirst_mask:
|
||||
.byte 0x1f, 0, 0, 0
|
||||
|
||||
|
@ -225,8 +225,7 @@
|
||||
vpshufb rmask, x2, x2; \
|
||||
vpshufb rmask, x3, x3;
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask:
|
||||
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
@ -244,10 +243,19 @@
|
||||
.byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.Lrkr_dec_QBAR_QBAR_QBAR_QBAR:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.section .rodata.cst4.L16_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.L16_mask:
|
||||
.byte 16, 16, 16, 16
|
||||
|
||||
.section .rodata.cst4.L32_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.L32_mask:
|
||||
.byte 32, 0, 0, 0
|
||||
|
||||
.section .rodata.cst4.first_mask, "aM", @progbits, 4
|
||||
.align 4
|
||||
.Lfirst_mask:
|
||||
.byte 0x1f, 0, 0, 0
|
||||
|
||||
|
@ -11,13 +11,18 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.data
|
||||
.section .rodata.cst32.ROT8, "aM", @progbits, 32
|
||||
.align 32
|
||||
|
||||
ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
|
||||
.octa 0x0e0d0c0f0a09080b0605040702010003
|
||||
|
||||
.section .rodata.cst32.ROT16, "aM", @progbits, 32
|
||||
.align 32
|
||||
ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
|
||||
.octa 0x0d0c0f0e09080b0a0504070601000302
|
||||
|
||||
.section .rodata.cst32.CTRINC, "aM", @progbits, 32
|
||||
.align 32
|
||||
CTRINC: .octa 0x00000003000000020000000100000000
|
||||
.octa 0x00000007000000060000000500000004
|
||||
|
||||
|
@ -11,11 +11,14 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.data
|
||||
.section .rodata.cst16.ROT8, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
|
||||
.section .rodata.cst16.ROT16, "aM", @progbits, 16
|
||||
.align 16
|
||||
ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
|
||||
.section .rodata.cst16.CTRINC, "aM", @progbits, 16
|
||||
.align 16
|
||||
CTRINC: .octa 0x00000003000000020000000100000000
|
||||
|
||||
.text
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/chacha20.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/fpu/api.h>
|
||||
@ -63,36 +63,37 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
|
||||
}
|
||||
}
|
||||
|
||||
static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
static int chacha20_simd(struct skcipher_request *req)
|
||||
{
|
||||
u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1];
|
||||
struct blkcipher_walk walk;
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
u32 *state, state_buf[16 + 2] __aligned(8);
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd())
|
||||
return crypto_chacha20_crypt(desc, dst, src, nbytes);
|
||||
BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
|
||||
state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
|
||||
|
||||
state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN);
|
||||
if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
|
||||
return crypto_chacha20_crypt(req);
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
|
||||
crypto_chacha20_init(state, ctx, walk.iv);
|
||||
|
||||
kernel_fpu_begin();
|
||||
|
||||
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
|
||||
err = blkcipher_walk_done(desc, &walk,
|
||||
walk.nbytes % CHACHA20_BLOCK_SIZE);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % CHACHA20_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (walk.nbytes) {
|
||||
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
walk.nbytes);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
kernel_fpu_end();
|
||||
@ -100,27 +101,22 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_alg alg = {
|
||||
.cra_name = "chacha20",
|
||||
.cra_driver_name = "chacha20-simd",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.cra_alignmask = sizeof(u32) - 1,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CHACHA20_KEY_SIZE,
|
||||
.max_keysize = CHACHA20_KEY_SIZE,
|
||||
.ivsize = CHACHA20_IV_SIZE,
|
||||
.geniv = "seqiv",
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = chacha20_simd,
|
||||
.decrypt = chacha20_simd,
|
||||
},
|
||||
},
|
||||
static struct skcipher_alg alg = {
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-simd",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.base.cra_alignmask = sizeof(u32) - 1,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA20_KEY_SIZE,
|
||||
.max_keysize = CHACHA20_KEY_SIZE,
|
||||
.ivsize = CHACHA20_IV_SIZE,
|
||||
.chunksize = CHACHA20_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = chacha20_simd,
|
||||
.decrypt = chacha20_simd,
|
||||
};
|
||||
|
||||
static int __init chacha20_simd_mod_init(void)
|
||||
@ -133,12 +129,12 @@ static int __init chacha20_simd_mod_init(void)
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
#endif
|
||||
return crypto_register_alg(&alg);
|
||||
return crypto_register_skcipher(&alg);
|
||||
}
|
||||
|
||||
static void __exit chacha20_simd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&alg);
|
||||
crypto_unregister_skcipher(&alg);
|
||||
}
|
||||
|
||||
module_init(chacha20_simd_mod_init);
|
||||
|
@ -312,7 +312,7 @@ do_return:
|
||||
ret
|
||||
ENDPROC(crc_pcl)
|
||||
|
||||
.section .rodata, "a", %progbits
|
||||
.section .rodata, "a", @progbits
|
||||
################################################################
|
||||
## jump table Table is 129 entries x 2 bytes each
|
||||
################################################################
|
||||
|
@ -554,12 +554,11 @@ _only_less_than_2:
|
||||
|
||||
ENDPROC(crc_t10dif_pcl)
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata, "a", @progbits
|
||||
.align 16
|
||||
# precomputed constants
|
||||
# these constants are precomputed from the poly:
|
||||
# 0x8bb70000 (0x8bb7 scaled to 32 bits)
|
||||
.align 16
|
||||
# Q = 0x18BB70000
|
||||
# rk1 = 2^(32*3) mod Q << 32
|
||||
# rk2 = 2^(32*5) mod Q << 32
|
||||
@ -613,14 +612,23 @@ rk20:
|
||||
|
||||
|
||||
|
||||
.section .rodata.cst16.mask1, "aM", @progbits, 16
|
||||
.align 16
|
||||
mask1:
|
||||
.octa 0x80808080808080808080808080808080
|
||||
|
||||
.section .rodata.cst16.mask2, "aM", @progbits, 16
|
||||
.align 16
|
||||
mask2:
|
||||
.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
|
||||
|
||||
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
SHUF_MASK:
|
||||
.octa 0x000102030405060708090A0B0C0D0E0F
|
||||
|
||||
.section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32
|
||||
.align 32
|
||||
pshufb_shf_table:
|
||||
# use these values for shift constants for the pshufb instruction
|
||||
# different alignments result in values as shown:
|
||||
|
@ -537,7 +537,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
|
||||
ret;
|
||||
ENDPROC(des3_ede_x86_64_crypt_blk_3way)
|
||||
|
||||
.data
|
||||
.section .rodata, "a", @progbits
|
||||
.align 16
|
||||
.L_s1:
|
||||
.quad 0x0010100001010400, 0x0000000000000000
|
||||
|
@ -20,8 +20,7 @@
|
||||
#include <asm/inst.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lbswap_mask:
|
||||
.octa 0x000102030405060708090a0b0c0d0e0f
|
||||
|
@ -11,11 +11,13 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.data
|
||||
.section .rodata.cst32.ANMASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
|
||||
ANMASK: .octa 0x0000000003ffffff0000000003ffffff
|
||||
.octa 0x0000000003ffffff0000000003ffffff
|
||||
|
||||
.section .rodata.cst32.ORMASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
ORMASK: .octa 0x00000000010000000000000001000000
|
||||
.octa 0x00000000010000000000000001000000
|
||||
|
||||
|
@ -11,10 +11,12 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.data
|
||||
.section .rodata.cst16.ANMASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
ANMASK: .octa 0x0000000003ffffff0000000003ffffff
|
||||
|
||||
.section .rodata.cst16.ORMASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
ORMASK: .octa 0x00000000010000000000000001000000
|
||||
|
||||
.text
|
||||
|
@ -29,11 +29,12 @@
|
||||
|
||||
.file "serpent-avx-x86_64-asm_64.S"
|
||||
|
||||
.data
|
||||
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask:
|
||||
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
|
@ -20,13 +20,18 @@
|
||||
|
||||
.file "serpent-avx2-asm_64.S"
|
||||
|
||||
.data
|
||||
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask_0:
|
||||
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask_1:
|
||||
.byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
|
@ -281,11 +281,13 @@ ENTRY(sha1_mb_mgr_get_comp_job_avx2)
|
||||
ret
|
||||
ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
||||
.section .rodata.cst8, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
two:
|
||||
|
@ -203,8 +203,7 @@ return_null:
|
||||
|
||||
ENDPROC(sha1_mb_mgr_submit_avx2)
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
@ -461,21 +461,32 @@ lloop:
|
||||
ENDPROC(sha1_x8_avx2)
|
||||
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst32.K00_19, "aM", @progbits, 32
|
||||
.align 32
|
||||
K00_19:
|
||||
.octa 0x5A8279995A8279995A8279995A827999
|
||||
.octa 0x5A8279995A8279995A8279995A827999
|
||||
|
||||
.section .rodata.cst32.K20_39, "aM", @progbits, 32
|
||||
.align 32
|
||||
K20_39:
|
||||
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
|
||||
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
|
||||
|
||||
.section .rodata.cst32.K40_59, "aM", @progbits, 32
|
||||
.align 32
|
||||
K40_59:
|
||||
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
|
||||
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
|
||||
|
||||
.section .rodata.cst32.K60_79, "aM", @progbits, 32
|
||||
.align 32
|
||||
K60_79:
|
||||
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
|
||||
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
@ -293,10 +293,12 @@ ENTRY(sha1_ni_transform)
|
||||
ret
|
||||
ENDPROC(sha1_ni_transform)
|
||||
|
||||
.data
|
||||
|
||||
.align 64
|
||||
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x000102030405060708090a0b0c0d0e0f
|
||||
|
||||
.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
UPPER_WORD_MASK:
|
||||
.octa 0xFFFFFFFF000000000000000000000000
|
||||
|
@ -463,7 +463,7 @@ done_hash:
|
||||
ret
|
||||
ENDPROC(sha256_transform_avx)
|
||||
|
||||
.data
|
||||
.section .rodata.cst256.K256, "aM", @progbits, 256
|
||||
.align 64
|
||||
K256:
|
||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
@ -483,14 +483,21 @@ K256:
|
||||
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16
|
||||
.align 16
|
||||
# shuffle xBxA -> 00BA
|
||||
_SHUF_00BA:
|
||||
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
|
||||
|
||||
.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16
|
||||
.align 16
|
||||
# shuffle xDxC -> DC00
|
||||
_SHUF_DC00:
|
||||
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
|
||||
|
||||
#endif
|
||||
|
@ -723,7 +723,7 @@ done_hash:
|
||||
ret
|
||||
ENDPROC(sha256_transform_rorx)
|
||||
|
||||
.data
|
||||
.section .rodata.cst512.K256, "aM", @progbits, 512
|
||||
.align 64
|
||||
K256:
|
||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
@ -759,14 +759,21 @@ K256:
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
# shuffle xBxA -> 00BA
|
||||
.section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32
|
||||
.align 32
|
||||
_SHUF_00BA:
|
||||
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100
|
||||
|
||||
# shuffle xDxC -> DC00
|
||||
.section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32
|
||||
.align 32
|
||||
_SHUF_DC00:
|
||||
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF
|
||||
|
||||
#endif
|
||||
|
@ -284,11 +284,13 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2)
|
||||
ret
|
||||
ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
||||
.section .rodata.cst8, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
two:
|
||||
|
@ -208,8 +208,7 @@ return_null:
|
||||
|
||||
ENDPROC(sha256_mb_mgr_submit_avx2)
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
@ -437,7 +437,8 @@ Lrounds_16_xx:
|
||||
|
||||
ret
|
||||
ENDPROC(sha256_x8_avx2)
|
||||
.data
|
||||
|
||||
.section .rodata.K256_8, "a", @progbits
|
||||
.align 64
|
||||
K256_8:
|
||||
.octa 0x428a2f98428a2f98428a2f98428a2f98
|
||||
@ -568,10 +569,14 @@ K256_8:
|
||||
.octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
|
||||
.octa 0xc67178f2c67178f2c67178f2c67178f2
|
||||
.octa 0xc67178f2c67178f2c67178f2c67178f2
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
.section .rodata.cst256.K256, "aM", @progbits, 256
|
||||
.align 64
|
||||
.global K256
|
||||
K256:
|
||||
|
@ -474,7 +474,7 @@ done_hash:
|
||||
ret
|
||||
ENDPROC(sha256_transform_ssse3)
|
||||
|
||||
.data
|
||||
.section .rodata.cst256.K256, "aM", @progbits, 256
|
||||
.align 64
|
||||
K256:
|
||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
@ -494,13 +494,19 @@ K256:
|
||||
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16
|
||||
.align 16
|
||||
# shuffle xBxA -> 00BA
|
||||
_SHUF_00BA:
|
||||
.octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
|
||||
|
||||
.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16
|
||||
.align 16
|
||||
# shuffle xDxC -> DC00
|
||||
_SHUF_DC00:
|
||||
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
|
||||
|
@ -329,7 +329,7 @@ ENTRY(sha256_ni_transform)
|
||||
ret
|
||||
ENDPROC(sha256_ni_transform)
|
||||
|
||||
.data
|
||||
.section .rodata.cst256.K256, "aM", @progbits, 256
|
||||
.align 64
|
||||
K256:
|
||||
.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
@ -349,5 +349,7 @@ K256:
|
||||
.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
||||
|
||||
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
|
||||
.align 16
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
@ -370,14 +370,17 @@ ENDPROC(sha512_transform_avx)
|
||||
########################################################################
|
||||
### Binary Data
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
|
||||
XMM_QWORD_BSWAP:
|
||||
.octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
|
||||
# Mergeable 640-byte rodata section. This allows linker to merge the table
|
||||
# with other, exactly the same 640-byte fragment of another rodata section
|
||||
# (if such section exists).
|
||||
.section .rodata.cst640.K512, "aM", @progbits, 640
|
||||
.align 64
|
||||
# K[t] used in SHA512 hashing
|
||||
K512:
|
||||
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
|
||||
|
@ -684,8 +684,11 @@ ENDPROC(sha512_transform_rorx)
|
||||
########################################################################
|
||||
### Binary Data
|
||||
|
||||
.data
|
||||
|
||||
# Mergeable 640-byte rodata section. This allows linker to merge the table
|
||||
# with other, exactly the same 640-byte fragment of another rodata section
|
||||
# (if such section exists).
|
||||
.section .rodata.cst640.K512, "aM", @progbits, 640
|
||||
.align 64
|
||||
# K[t] used in SHA512 hashing
|
||||
K512:
|
||||
@ -730,14 +733,17 @@ K512:
|
||||
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
|
||||
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
|
||||
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
.octa 0x18191a1b1c1d1e1f1011121314151617
|
||||
|
||||
.section .rodata.cst32.MASK_YMM_LO, "aM", @progbits, 32
|
||||
.align 32
|
||||
MASK_YMM_LO:
|
||||
.octa 0x00000000000000000000000000000000
|
||||
.octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
|
||||
|
||||
#endif
|
||||
|
@ -221,7 +221,7 @@ static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
|
||||
}
|
||||
|
||||
static struct sha512_hash_ctx
|
||||
*sha512_ctx_mgr_get_comp_ctx(struct sha512_ctx_mgr *mgr)
|
||||
*sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
|
||||
{
|
||||
/*
|
||||
* If get_comp_job returns NULL, there are no jobs complete.
|
||||
@ -233,11 +233,17 @@ static struct sha512_hash_ctx
|
||||
* Otherwise, all jobs currently being managed by the hash_ctx_mgr
|
||||
* still need processing.
|
||||
*/
|
||||
struct sha512_ctx_mgr *mgr;
|
||||
struct sha512_hash_ctx *ctx;
|
||||
unsigned long flags;
|
||||
|
||||
mgr = cstate->mgr;
|
||||
spin_lock_irqsave(&cstate->work_lock, flags);
|
||||
ctx = (struct sha512_hash_ctx *)
|
||||
sha512_job_mgr_get_comp_job(&mgr->mgr);
|
||||
return sha512_ctx_mgr_resubmit(mgr, ctx);
|
||||
ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
|
||||
spin_unlock_irqrestore(&cstate->work_lock, flags);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
|
||||
@ -246,12 +252,17 @@ static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
|
||||
}
|
||||
|
||||
static struct sha512_hash_ctx
|
||||
*sha512_ctx_mgr_submit(struct sha512_ctx_mgr *mgr,
|
||||
*sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
|
||||
struct sha512_hash_ctx *ctx,
|
||||
const void *buffer,
|
||||
uint32_t len,
|
||||
int flags)
|
||||
{
|
||||
struct sha512_ctx_mgr *mgr;
|
||||
unsigned long irqflags;
|
||||
|
||||
mgr = cstate->mgr;
|
||||
spin_lock_irqsave(&cstate->work_lock, irqflags);
|
||||
if (flags & (~HASH_ENTIRE)) {
|
||||
/*
|
||||
* User should not pass anything other than FIRST, UPDATE, or
|
||||
@ -351,20 +362,26 @@ static struct sha512_hash_ctx
|
||||
}
|
||||
}
|
||||
|
||||
return sha512_ctx_mgr_resubmit(mgr, ctx);
|
||||
ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
|
||||
spin_unlock_irqrestore(&cstate->work_lock, irqflags);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr)
|
||||
static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
|
||||
{
|
||||
struct sha512_ctx_mgr *mgr;
|
||||
struct sha512_hash_ctx *ctx;
|
||||
unsigned long flags;
|
||||
|
||||
mgr = cstate->mgr;
|
||||
spin_lock_irqsave(&cstate->work_lock, flags);
|
||||
while (1) {
|
||||
ctx = (struct sha512_hash_ctx *)
|
||||
sha512_job_mgr_flush(&mgr->mgr);
|
||||
|
||||
/* If flush returned 0, there are no more jobs in flight. */
|
||||
if (!ctx)
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
/*
|
||||
* If flush returned a job, resubmit the job to finish
|
||||
@ -378,8 +395,10 @@ static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr)
|
||||
* the sha512_ctx_mgr still need processing. Loop.
|
||||
*/
|
||||
if (ctx)
|
||||
return ctx;
|
||||
break;
|
||||
}
|
||||
spin_unlock_irqrestore(&cstate->work_lock, flags);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static int sha512_mb_init(struct ahash_request *areq)
|
||||
@ -439,11 +458,11 @@ static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
|
||||
sha_ctx = (struct sha512_hash_ctx *)
|
||||
ahash_request_ctx(&rctx->areq);
|
||||
kernel_fpu_begin();
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx,
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
|
||||
rctx->walk.data, nbytes, flag);
|
||||
if (!sha_ctx) {
|
||||
if (flush)
|
||||
sha_ctx = sha512_ctx_mgr_flush(cstate->mgr);
|
||||
sha_ctx = sha512_ctx_mgr_flush(cstate);
|
||||
}
|
||||
kernel_fpu_end();
|
||||
if (sha_ctx)
|
||||
@ -471,11 +490,12 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
|
||||
struct sha512_hash_ctx *sha_ctx;
|
||||
struct mcryptd_hash_request_ctx *req_ctx;
|
||||
int ret;
|
||||
unsigned long flags;
|
||||
|
||||
/* remove from work list */
|
||||
spin_lock(&cstate->work_lock);
|
||||
spin_lock_irqsave(&cstate->work_lock, flags);
|
||||
list_del(&rctx->waiter);
|
||||
spin_unlock(&cstate->work_lock);
|
||||
spin_unlock_irqrestore(&cstate->work_lock, flags);
|
||||
|
||||
if (irqs_disabled())
|
||||
rctx->complete(&req->base, err);
|
||||
@ -486,14 +506,14 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
|
||||
}
|
||||
|
||||
/* check to see if there are other jobs that are done */
|
||||
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr);
|
||||
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
|
||||
while (sha_ctx) {
|
||||
req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
|
||||
ret = sha_finish_walk(&req_ctx, cstate, false);
|
||||
if (req_ctx) {
|
||||
spin_lock(&cstate->work_lock);
|
||||
spin_lock_irqsave(&cstate->work_lock, flags);
|
||||
list_del(&req_ctx->waiter);
|
||||
spin_unlock(&cstate->work_lock);
|
||||
spin_unlock_irqrestore(&cstate->work_lock, flags);
|
||||
|
||||
req = cast_mcryptd_ctx_to_req(req_ctx);
|
||||
if (irqs_disabled())
|
||||
@ -504,7 +524,7 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
|
||||
local_bh_enable();
|
||||
}
|
||||
}
|
||||
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr);
|
||||
sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -515,6 +535,7 @@ static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
|
||||
{
|
||||
unsigned long next_flush;
|
||||
unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
|
||||
unsigned long flags;
|
||||
|
||||
/* initialize tag */
|
||||
rctx->tag.arrival = jiffies; /* tag the arrival time */
|
||||
@ -522,9 +543,9 @@ static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
|
||||
next_flush = rctx->tag.arrival + delay;
|
||||
rctx->tag.expire = next_flush;
|
||||
|
||||
spin_lock(&cstate->work_lock);
|
||||
spin_lock_irqsave(&cstate->work_lock, flags);
|
||||
list_add_tail(&rctx->waiter, &cstate->work_list);
|
||||
spin_unlock(&cstate->work_lock);
|
||||
spin_unlock_irqrestore(&cstate->work_lock, flags);
|
||||
|
||||
mcryptd_arm_flusher(cstate, delay);
|
||||
}
|
||||
@ -565,7 +586,7 @@ static int sha512_mb_update(struct ahash_request *areq)
|
||||
sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
|
||||
sha512_mb_add_list(rctx, cstate);
|
||||
kernel_fpu_begin();
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
|
||||
nbytes, HASH_UPDATE);
|
||||
kernel_fpu_end();
|
||||
|
||||
@ -628,7 +649,7 @@ static int sha512_mb_finup(struct ahash_request *areq)
|
||||
sha512_mb_add_list(rctx, cstate);
|
||||
|
||||
kernel_fpu_begin();
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data,
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
|
||||
nbytes, flag);
|
||||
kernel_fpu_end();
|
||||
|
||||
@ -677,8 +698,7 @@ static int sha512_mb_final(struct ahash_request *areq)
|
||||
/* flag HASH_FINAL and 0 data size */
|
||||
sha512_mb_add_list(rctx, cstate);
|
||||
kernel_fpu_begin();
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0,
|
||||
HASH_LAST);
|
||||
sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
|
||||
kernel_fpu_end();
|
||||
|
||||
/* check if anything is returned */
|
||||
@ -940,7 +960,7 @@ static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
|
||||
break;
|
||||
kernel_fpu_begin();
|
||||
sha_ctx = (struct sha512_hash_ctx *)
|
||||
sha512_ctx_mgr_flush(cstate->mgr);
|
||||
sha512_ctx_mgr_flush(cstate);
|
||||
kernel_fpu_end();
|
||||
if (!sha_ctx) {
|
||||
pr_err("sha512_mb error: nothing got flushed for"
|
||||
|
@ -280,12 +280,18 @@ ENTRY(sha512_mb_mgr_get_comp_job_avx2)
|
||||
pop %rbx
|
||||
ret
|
||||
ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
|
||||
.data
|
||||
|
||||
.align 16
|
||||
.section .rodata.cst8.one, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
|
||||
.section .rodata.cst8.two, "aM", @progbits, 8
|
||||
.align 8
|
||||
two:
|
||||
.quad 2
|
||||
|
||||
.section .rodata.cst8.three, "aM", @progbits, 8
|
||||
.align 8
|
||||
three:
|
||||
.quad 3
|
||||
|
@ -209,8 +209,9 @@ return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
ENDPROC(sha512_mb_mgr_submit_avx2)
|
||||
.data
|
||||
|
||||
/* UNUSED?
|
||||
.section .rodata.cst16, "aM", @progbits, 16
|
||||
.align 16
|
||||
H0: .int 0x6a09e667
|
||||
H1: .int 0xbb67ae85
|
||||
@ -220,3 +221,4 @@ H4: .int 0x510e527f
|
||||
H5: .int 0x9b05688c
|
||||
H6: .int 0x1f83d9ab
|
||||
H7: .int 0x5be0cd19
|
||||
*/
|
||||
|
@ -361,7 +361,7 @@ Lrounds_16_xx:
|
||||
ret
|
||||
ENDPROC(sha512_x4_avx2)
|
||||
|
||||
.data
|
||||
.section .rodata.K512_4, "a", @progbits
|
||||
.align 64
|
||||
K512_4:
|
||||
.octa 0x428a2f98d728ae22428a2f98d728ae22,\
|
||||
@ -525,5 +525,7 @@ K512_4:
|
||||
.octa 0x6c44198c4a4758176c44198c4a475817,\
|
||||
0x6c44198c4a4758176c44198c4a475817
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
.octa 0x18191a1b1c1d1e1f1011121314151617
|
||||
|
@ -369,14 +369,17 @@ ENDPROC(sha512_transform_ssse3)
|
||||
########################################################################
|
||||
### Binary Data
|
||||
|
||||
.data
|
||||
|
||||
.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
# Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
|
||||
XMM_QWORD_BSWAP:
|
||||
.octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
|
||||
# Mergeable 640-byte rodata section. This allows linker to merge the table
|
||||
# with other, exactly the same 640-byte fragment of another rodata section
|
||||
# (if such section exists).
|
||||
.section .rodata.cst640.K512, "aM", @progbits, 640
|
||||
.align 64
|
||||
# K[t] used in SHA512 hashing
|
||||
K512:
|
||||
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
|
||||
|
@ -29,11 +29,13 @@
|
||||
|
||||
.file "twofish-avx-x86_64-asm_64.S"
|
||||
|
||||
.data
|
||||
.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
|
||||
.Lbswap128_mask:
|
||||
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
|
||||
.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lxts_gf128mul_and_shl1_mask:
|
||||
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
|
@ -263,6 +263,7 @@ comment "Authenticated Encryption with Associated Data"
|
||||
config CRYPTO_CCM
|
||||
tristate "CCM support"
|
||||
select CRYPTO_CTR
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_AEAD
|
||||
help
|
||||
Support for Counter with CBC MAC. Required for IPsec.
|
||||
@ -374,6 +375,7 @@ config CRYPTO_XTS
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
select CRYPTO_GF128MUL
|
||||
select CRYPTO_ECB
|
||||
help
|
||||
XTS: IEEE1619/D16 narrow block cipher use with aes-xts-plain,
|
||||
key size 256, 384 or 512 bits. This implementation currently
|
||||
@ -895,6 +897,23 @@ config CRYPTO_AES
|
||||
|
||||
See <http://csrc.nist.gov/CryptoToolkit/aes/> for more information.
|
||||
|
||||
config CRYPTO_AES_TI
|
||||
tristate "Fixed time AES cipher"
|
||||
select CRYPTO_ALGAPI
|
||||
help
|
||||
This is a generic implementation of AES that attempts to eliminate
|
||||
data dependent latencies as much as possible without affecting
|
||||
performance too much. It is intended for use by the generic CCM
|
||||
and GCM drivers, and other CTR or CMAC/XCBC based modes that rely
|
||||
solely on encryption (although decryption is supported as well, but
|
||||
with a more dramatic performance hit)
|
||||
|
||||
Instead of using 16 lookup tables of 1 KB each, (8 for encryption and
|
||||
8 for decryption), this implementation only uses just two S-boxes of
|
||||
256 bytes each, and attempts to eliminate data dependent latencies by
|
||||
prefetching the entire table into the cache at the start of each
|
||||
block.
|
||||
|
||||
config CRYPTO_AES_586
|
||||
tristate "AES cipher algorithms (i586)"
|
||||
depends on (X86 || UML_X86) && !64BIT
|
||||
|
@ -75,6 +75,7 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
|
||||
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
|
||||
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
|
||||
obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
|
||||
obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
|
||||
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
|
||||
@ -98,7 +99,9 @@ obj-$(CONFIG_CRYPTO_BLOWFISH_COMMON) += blowfish_common.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH) += twofish_generic.o
|
||||
obj-$(CONFIG_CRYPTO_TWOFISH_COMMON) += twofish_common.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
|
||||
CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
|
||||
obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
|
||||
obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
|
||||
obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
|
||||
obj-$(CONFIG_CRYPTO_CAST5) += cast5_generic.o
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
|
||||
#include <crypto/scatterwalk.h>
|
||||
@ -394,7 +395,7 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;
|
||||
@ -468,7 +469,7 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
#include <crypto/internal/acompress.h>
|
||||
#include <crypto/internal/scompress.h>
|
||||
@ -50,7 +51,7 @@ static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
|
||||
static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -132,7 +133,7 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
struct aead_alg *aead = container_of(alg, struct aead_alg, base);
|
||||
|
@ -54,6 +54,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
static inline u8 byte(const u32 x, const unsigned n)
|
||||
{
|
||||
@ -1216,7 +1217,6 @@ EXPORT_SYMBOL_GPL(crypto_il_tab);
|
||||
int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
const __le32 *key = (const __le32 *)in_key;
|
||||
u32 i, t, u, v, w, j;
|
||||
|
||||
if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
|
||||
@ -1225,10 +1225,15 @@ int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
|
||||
ctx->key_length = key_len;
|
||||
|
||||
ctx->key_dec[key_len + 24] = ctx->key_enc[0] = le32_to_cpu(key[0]);
|
||||
ctx->key_dec[key_len + 25] = ctx->key_enc[1] = le32_to_cpu(key[1]);
|
||||
ctx->key_dec[key_len + 26] = ctx->key_enc[2] = le32_to_cpu(key[2]);
|
||||
ctx->key_dec[key_len + 27] = ctx->key_enc[3] = le32_to_cpu(key[3]);
|
||||
ctx->key_enc[0] = get_unaligned_le32(in_key);
|
||||
ctx->key_enc[1] = get_unaligned_le32(in_key + 4);
|
||||
ctx->key_enc[2] = get_unaligned_le32(in_key + 8);
|
||||
ctx->key_enc[3] = get_unaligned_le32(in_key + 12);
|
||||
|
||||
ctx->key_dec[key_len + 24] = ctx->key_enc[0];
|
||||
ctx->key_dec[key_len + 25] = ctx->key_enc[1];
|
||||
ctx->key_dec[key_len + 26] = ctx->key_enc[2];
|
||||
ctx->key_dec[key_len + 27] = ctx->key_enc[3];
|
||||
|
||||
switch (key_len) {
|
||||
case AES_KEYSIZE_128:
|
||||
@ -1238,17 +1243,17 @@ int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
break;
|
||||
|
||||
case AES_KEYSIZE_192:
|
||||
ctx->key_enc[4] = le32_to_cpu(key[4]);
|
||||
t = ctx->key_enc[5] = le32_to_cpu(key[5]);
|
||||
ctx->key_enc[4] = get_unaligned_le32(in_key + 16);
|
||||
t = ctx->key_enc[5] = get_unaligned_le32(in_key + 20);
|
||||
for (i = 0; i < 8; ++i)
|
||||
loop6(i);
|
||||
break;
|
||||
|
||||
case AES_KEYSIZE_256:
|
||||
ctx->key_enc[4] = le32_to_cpu(key[4]);
|
||||
ctx->key_enc[5] = le32_to_cpu(key[5]);
|
||||
ctx->key_enc[6] = le32_to_cpu(key[6]);
|
||||
t = ctx->key_enc[7] = le32_to_cpu(key[7]);
|
||||
ctx->key_enc[4] = get_unaligned_le32(in_key + 16);
|
||||
ctx->key_enc[5] = get_unaligned_le32(in_key + 20);
|
||||
ctx->key_enc[6] = get_unaligned_le32(in_key + 24);
|
||||
t = ctx->key_enc[7] = get_unaligned_le32(in_key + 28);
|
||||
for (i = 0; i < 6; ++i)
|
||||
loop8(i);
|
||||
loop8tophalf(i);
|
||||
@ -1329,16 +1334,14 @@ EXPORT_SYMBOL_GPL(crypto_aes_set_key);
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const __le32 *src = (const __le32 *)in;
|
||||
__le32 *dst = (__le32 *)out;
|
||||
u32 b0[4], b1[4];
|
||||
const u32 *kp = ctx->key_enc + 4;
|
||||
const int key_len = ctx->key_length;
|
||||
|
||||
b0[0] = le32_to_cpu(src[0]) ^ ctx->key_enc[0];
|
||||
b0[1] = le32_to_cpu(src[1]) ^ ctx->key_enc[1];
|
||||
b0[2] = le32_to_cpu(src[2]) ^ ctx->key_enc[2];
|
||||
b0[3] = le32_to_cpu(src[3]) ^ ctx->key_enc[3];
|
||||
b0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
|
||||
b0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
|
||||
b0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
|
||||
b0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
|
||||
|
||||
if (key_len > 24) {
|
||||
f_nround(b1, b0, kp);
|
||||
@ -1361,10 +1364,10 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
f_nround(b1, b0, kp);
|
||||
f_lround(b0, b1, kp);
|
||||
|
||||
dst[0] = cpu_to_le32(b0[0]);
|
||||
dst[1] = cpu_to_le32(b0[1]);
|
||||
dst[2] = cpu_to_le32(b0[2]);
|
||||
dst[3] = cpu_to_le32(b0[3]);
|
||||
put_unaligned_le32(b0[0], out);
|
||||
put_unaligned_le32(b0[1], out + 4);
|
||||
put_unaligned_le32(b0[2], out + 8);
|
||||
put_unaligned_le32(b0[3], out + 12);
|
||||
}
|
||||
|
||||
/* decrypt a block of text */
|
||||
@ -1401,16 +1404,14 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const __le32 *src = (const __le32 *)in;
|
||||
__le32 *dst = (__le32 *)out;
|
||||
u32 b0[4], b1[4];
|
||||
const int key_len = ctx->key_length;
|
||||
const u32 *kp = ctx->key_dec + 4;
|
||||
|
||||
b0[0] = le32_to_cpu(src[0]) ^ ctx->key_dec[0];
|
||||
b0[1] = le32_to_cpu(src[1]) ^ ctx->key_dec[1];
|
||||
b0[2] = le32_to_cpu(src[2]) ^ ctx->key_dec[2];
|
||||
b0[3] = le32_to_cpu(src[3]) ^ ctx->key_dec[3];
|
||||
b0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
|
||||
b0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
|
||||
b0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
|
||||
b0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
|
||||
|
||||
if (key_len > 24) {
|
||||
i_nround(b1, b0, kp);
|
||||
@ -1433,10 +1434,10 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
i_nround(b1, b0, kp);
|
||||
i_lround(b0, b1, kp);
|
||||
|
||||
dst[0] = cpu_to_le32(b0[0]);
|
||||
dst[1] = cpu_to_le32(b0[1]);
|
||||
dst[2] = cpu_to_le32(b0[2]);
|
||||
dst[3] = cpu_to_le32(b0[3]);
|
||||
put_unaligned_le32(b0[0], out);
|
||||
put_unaligned_le32(b0[1], out + 4);
|
||||
put_unaligned_le32(b0[2], out + 8);
|
||||
put_unaligned_le32(b0[3], out + 12);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
@ -1446,7 +1447,6 @@ static struct crypto_alg aes_alg = {
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_alignmask = 3,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
|
375
crypto/aes_ti.c
Normal file
375
crypto/aes_ti.c
Normal file
@ -0,0 +1,375 @@
|
||||
/*
|
||||
* Scalar fixed time AES core transform
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <crypto/aes.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
/*
|
||||
* Emit the sbox as volatile const to prevent the compiler from doing
|
||||
* constant folding on sbox references involving fixed indexes.
|
||||
*/
|
||||
static volatile const u8 __cacheline_aligned __aesti_sbox[] = {
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
|
||||
0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
|
||||
0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
|
||||
0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
|
||||
0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
|
||||
0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
|
||||
0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
|
||||
0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
|
||||
0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
|
||||
0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
|
||||
0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
|
||||
0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
|
||||
0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
|
||||
0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
|
||||
0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
|
||||
0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
|
||||
0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
|
||||
};
|
||||
|
||||
static volatile const u8 __cacheline_aligned __aesti_inv_sbox[] = {
|
||||
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
|
||||
0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
|
||||
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
|
||||
0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
|
||||
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
|
||||
0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
|
||||
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
|
||||
0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
|
||||
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
|
||||
0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
|
||||
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
|
||||
0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
|
||||
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
|
||||
0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
|
||||
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
|
||||
0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
|
||||
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
|
||||
0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
|
||||
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
|
||||
0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
|
||||
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
|
||||
0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
|
||||
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
|
||||
0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
|
||||
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
|
||||
0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
|
||||
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
|
||||
0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
|
||||
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
|
||||
0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
|
||||
0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
|
||||
};
|
||||
|
||||
static u32 mul_by_x(u32 w)
|
||||
{
|
||||
u32 x = w & 0x7f7f7f7f;
|
||||
u32 y = w & 0x80808080;
|
||||
|
||||
/* multiply by polynomial 'x' (0b10) in GF(2^8) */
|
||||
return (x << 1) ^ (y >> 7) * 0x1b;
|
||||
}
|
||||
|
||||
static u32 mul_by_x2(u32 w)
|
||||
{
|
||||
u32 x = w & 0x3f3f3f3f;
|
||||
u32 y = w & 0x80808080;
|
||||
u32 z = w & 0x40404040;
|
||||
|
||||
/* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
|
||||
return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
|
||||
}
|
||||
|
||||
static u32 mix_columns(u32 x)
|
||||
{
|
||||
/*
|
||||
* Perform the following matrix multiplication in GF(2^8)
|
||||
*
|
||||
* | 0x2 0x3 0x1 0x1 | | x[0] |
|
||||
* | 0x1 0x2 0x3 0x1 | | x[1] |
|
||||
* | 0x1 0x1 0x2 0x3 | x | x[2] |
|
||||
* | 0x3 0x1 0x1 0x3 | | x[3] |
|
||||
*/
|
||||
u32 y = mul_by_x(x) ^ ror32(x, 16);
|
||||
|
||||
return y ^ ror32(x ^ y, 8);
|
||||
}
|
||||
|
||||
static u32 inv_mix_columns(u32 x)
|
||||
{
|
||||
/*
|
||||
* Perform the following matrix multiplication in GF(2^8)
|
||||
*
|
||||
* | 0xe 0xb 0xd 0x9 | | x[0] |
|
||||
* | 0x9 0xe 0xb 0xd | | x[1] |
|
||||
* | 0xd 0x9 0xe 0xb | x | x[2] |
|
||||
* | 0xb 0xd 0x9 0xe | | x[3] |
|
||||
*
|
||||
* which can conveniently be reduced to
|
||||
*
|
||||
* | 0x2 0x3 0x1 0x1 | | 0x5 0x0 0x4 0x0 | | x[0] |
|
||||
* | 0x1 0x2 0x3 0x1 | | 0x0 0x5 0x0 0x4 | | x[1] |
|
||||
* | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
|
||||
* | 0x3 0x1 0x1 0x2 | | 0x0 0x4 0x0 0x5 | | x[3] |
|
||||
*/
|
||||
u32 y = mul_by_x2(x);
|
||||
|
||||
return mix_columns(x ^ y ^ ror32(y, 16));
|
||||
}
|
||||
|
||||
static __always_inline u32 subshift(u32 in[], int pos)
|
||||
{
|
||||
return (__aesti_sbox[in[pos] & 0xff]) ^
|
||||
(__aesti_sbox[(in[(pos + 1) % 4] >> 8) & 0xff] << 8) ^
|
||||
(__aesti_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
|
||||
(__aesti_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
|
||||
}
|
||||
|
||||
static __always_inline u32 inv_subshift(u32 in[], int pos)
|
||||
{
|
||||
return (__aesti_inv_sbox[in[pos] & 0xff]) ^
|
||||
(__aesti_inv_sbox[(in[(pos + 3) % 4] >> 8) & 0xff] << 8) ^
|
||||
(__aesti_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
|
||||
(__aesti_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
|
||||
}
|
||||
|
||||
static u32 subw(u32 in)
|
||||
{
|
||||
return (__aesti_sbox[in & 0xff]) ^
|
||||
(__aesti_sbox[(in >> 8) & 0xff] << 8) ^
|
||||
(__aesti_sbox[(in >> 16) & 0xff] << 16) ^
|
||||
(__aesti_sbox[(in >> 24) & 0xff] << 24);
|
||||
}
|
||||
|
||||
static int aesti_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
u32 kwords = key_len / sizeof(u32);
|
||||
u32 rc, i, j;
|
||||
|
||||
if (key_len != AES_KEYSIZE_128 &&
|
||||
key_len != AES_KEYSIZE_192 &&
|
||||
key_len != AES_KEYSIZE_256)
|
||||
return -EINVAL;
|
||||
|
||||
ctx->key_length = key_len;
|
||||
|
||||
for (i = 0; i < kwords; i++)
|
||||
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
|
||||
|
||||
for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
|
||||
u32 *rki = ctx->key_enc + (i * kwords);
|
||||
u32 *rko = rki + kwords;
|
||||
|
||||
rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
|
||||
rko[1] = rko[0] ^ rki[1];
|
||||
rko[2] = rko[1] ^ rki[2];
|
||||
rko[3] = rko[2] ^ rki[3];
|
||||
|
||||
if (key_len == 24) {
|
||||
if (i >= 7)
|
||||
break;
|
||||
rko[4] = rko[3] ^ rki[4];
|
||||
rko[5] = rko[4] ^ rki[5];
|
||||
} else if (key_len == 32) {
|
||||
if (i >= 6)
|
||||
break;
|
||||
rko[4] = subw(rko[3]) ^ rki[4];
|
||||
rko[5] = rko[4] ^ rki[5];
|
||||
rko[6] = rko[5] ^ rki[6];
|
||||
rko[7] = rko[6] ^ rki[7];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate the decryption keys for the Equivalent Inverse Cipher.
|
||||
* This involves reversing the order of the round keys, and applying
|
||||
* the Inverse Mix Columns transformation to all but the first and
|
||||
* the last one.
|
||||
*/
|
||||
ctx->key_dec[0] = ctx->key_enc[key_len + 24];
|
||||
ctx->key_dec[1] = ctx->key_enc[key_len + 25];
|
||||
ctx->key_dec[2] = ctx->key_enc[key_len + 26];
|
||||
ctx->key_dec[3] = ctx->key_enc[key_len + 27];
|
||||
|
||||
for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
|
||||
ctx->key_dec[i] = inv_mix_columns(ctx->key_enc[j]);
|
||||
ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]);
|
||||
ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]);
|
||||
ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]);
|
||||
}
|
||||
|
||||
ctx->key_dec[i] = ctx->key_enc[0];
|
||||
ctx->key_dec[i + 1] = ctx->key_enc[1];
|
||||
ctx->key_dec[i + 2] = ctx->key_enc[2];
|
||||
ctx->key_dec[i + 3] = ctx->key_enc[3];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = aesti_expand_key(ctx, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* In order to force the compiler to emit data independent Sbox lookups
|
||||
* at the start of each block, xor the first round key with values at
|
||||
* fixed indexes in the Sbox. This will need to be repeated each time
|
||||
* the key is used, which will pull the entire Sbox into the D-cache
|
||||
* before any data dependent Sbox lookups are performed.
|
||||
*/
|
||||
ctx->key_enc[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
|
||||
ctx->key_enc[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
|
||||
ctx->key_enc[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
|
||||
ctx->key_enc[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
|
||||
|
||||
ctx->key_dec[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
|
||||
ctx->key_dec[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
|
||||
ctx->key_dec[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
|
||||
ctx->key_dec[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const u32 *rkp = ctx->key_enc + 4;
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
u32 st0[4], st1[4];
|
||||
int round;
|
||||
|
||||
st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
|
||||
st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
|
||||
st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
|
||||
st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
|
||||
|
||||
st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
|
||||
st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
|
||||
st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
|
||||
st0[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
|
||||
|
||||
for (round = 0;; round += 2, rkp += 8) {
|
||||
st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
|
||||
st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
|
||||
st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
|
||||
st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
|
||||
|
||||
if (round == rounds - 2)
|
||||
break;
|
||||
|
||||
st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
|
||||
st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
|
||||
st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
|
||||
st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
|
||||
}
|
||||
|
||||
put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
|
||||
put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
|
||||
put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
|
||||
put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
|
||||
}
|
||||
|
||||
static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
const u32 *rkp = ctx->key_dec + 4;
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
u32 st0[4], st1[4];
|
||||
int round;
|
||||
|
||||
st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
|
||||
st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
|
||||
st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
|
||||
st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
|
||||
|
||||
st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
|
||||
st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
|
||||
st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
|
||||
st0[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224];
|
||||
|
||||
for (round = 0;; round += 2, rkp += 8) {
|
||||
st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
|
||||
st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
|
||||
st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
|
||||
st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
|
||||
|
||||
if (round == rounds - 2)
|
||||
break;
|
||||
|
||||
st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
|
||||
st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
|
||||
st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
|
||||
st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
|
||||
}
|
||||
|
||||
put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
|
||||
put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
|
||||
put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
|
||||
put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
|
||||
}
|
||||
|
||||
static struct crypto_alg aes_alg = {
|
||||
.cra_name = "aes",
|
||||
.cra_driver_name = "aes-fixed-time",
|
||||
.cra_priority = 100 + 1,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
|
||||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cra_cipher.cia_setkey = aesti_set_key,
|
||||
.cra_cipher.cia_encrypt = aesti_encrypt,
|
||||
.cra_cipher.cia_decrypt = aesti_decrypt
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
{
|
||||
return crypto_register_alg(&aes_alg);
|
||||
}
|
||||
|
||||
static void __exit aes_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&aes_alg);
|
||||
}
|
||||
|
||||
module_init(aes_init);
|
||||
module_exit(aes_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Generic fixed time AES");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -23,6 +23,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -493,7 +494,7 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
seq_printf(m, "type : ahash\n");
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <net/netlink.h>
|
||||
@ -47,7 +48,7 @@ static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
|
||||
static void crypto_akcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
|
@ -962,34 +962,66 @@ void crypto_inc(u8 *a, unsigned int size)
|
||||
__be32 *b = (__be32 *)(a + size);
|
||||
u32 c;
|
||||
|
||||
for (; size >= 4; size -= 4) {
|
||||
c = be32_to_cpu(*--b) + 1;
|
||||
*b = cpu_to_be32(c);
|
||||
if (c)
|
||||
return;
|
||||
}
|
||||
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
|
||||
!((unsigned long)b & (__alignof__(*b) - 1)))
|
||||
for (; size >= 4; size -= 4) {
|
||||
c = be32_to_cpu(*--b) + 1;
|
||||
*b = cpu_to_be32(c);
|
||||
if (c)
|
||||
return;
|
||||
}
|
||||
|
||||
crypto_inc_byte(a, size);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_inc);
|
||||
|
||||
static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size)
|
||||
void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
|
||||
{
|
||||
for (; size; size--)
|
||||
*a++ ^= *b++;
|
||||
int relalign = 0;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
|
||||
int size = sizeof(unsigned long);
|
||||
int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
|
||||
|
||||
relalign = d ? 1 << __ffs(d) : size;
|
||||
|
||||
/*
|
||||
* If we care about alignment, process as many bytes as
|
||||
* needed to advance dst and src to values whose alignments
|
||||
* equal their relative alignment. This will allow us to
|
||||
* process the remainder of the input using optimal strides.
|
||||
*/
|
||||
while (((unsigned long)dst & (relalign - 1)) && len > 0) {
|
||||
*dst++ ^= *src++;
|
||||
len--;
|
||||
}
|
||||
}
|
||||
|
||||
while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
|
||||
*(u64 *)dst ^= *(u64 *)src;
|
||||
dst += 8;
|
||||
src += 8;
|
||||
len -= 8;
|
||||
}
|
||||
|
||||
while (len >= 4 && !(relalign & 3)) {
|
||||
*(u32 *)dst ^= *(u32 *)src;
|
||||
dst += 4;
|
||||
src += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
while (len >= 2 && !(relalign & 1)) {
|
||||
*(u16 *)dst ^= *(u16 *)src;
|
||||
dst += 2;
|
||||
src += 2;
|
||||
len -= 2;
|
||||
}
|
||||
|
||||
while (len--)
|
||||
*dst++ ^= *src++;
|
||||
}
|
||||
|
||||
void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
|
||||
{
|
||||
u32 *a = (u32 *)dst;
|
||||
u32 *b = (u32 *)src;
|
||||
|
||||
for (; size >= 4; size -= 4)
|
||||
*a++ ^= *b++;
|
||||
|
||||
crypto_xor_byte((u8 *)a, (u8 *)b, size);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_xor);
|
||||
EXPORT_SYMBOL_GPL(__crypto_xor);
|
||||
|
||||
unsigned int crypto_alg_extsize(struct crypto_alg *alg)
|
||||
{
|
||||
|
@ -245,7 +245,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags)
|
||||
struct alg_sock *ask = alg_sk(sk);
|
||||
struct hash_ctx *ctx = ask->private;
|
||||
struct ahash_request *req = &ctx->req;
|
||||
char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))];
|
||||
char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1];
|
||||
struct sock *sk2;
|
||||
struct alg_sock *ask2;
|
||||
struct hash_ctx *ctx2;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Block chaining cipher operations.
|
||||
*
|
||||
*
|
||||
* Generic encrypt/decrypt wrapper for ciphers, handles operations across
|
||||
* multiple page boundaries by using temporary blocks. In user context,
|
||||
* the kernel is given a chance to schedule us once per page.
|
||||
@ -9,7 +9,7 @@
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
@ -25,6 +25,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -534,7 +535,7 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
seq_printf(m, "type : blkcipher\n");
|
||||
|
@ -145,9 +145,6 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
inst->alg.base.cra_blocksize = alg->cra_blocksize;
|
||||
inst->alg.base.cra_alignmask = alg->cra_alignmask;
|
||||
|
||||
/* We access the data as u32s when xoring. */
|
||||
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
|
||||
|
||||
inst->alg.ivsize = alg->cra_blocksize;
|
||||
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
|
||||
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
|
||||
|
386
crypto/ccm.c
386
crypto/ccm.c
@ -11,6 +11,7 @@
|
||||
*/
|
||||
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/err.h>
|
||||
@ -23,11 +24,11 @@
|
||||
|
||||
struct ccm_instance_ctx {
|
||||
struct crypto_skcipher_spawn ctr;
|
||||
struct crypto_spawn cipher;
|
||||
struct crypto_ahash_spawn mac;
|
||||
};
|
||||
|
||||
struct crypto_ccm_ctx {
|
||||
struct crypto_cipher *cipher;
|
||||
struct crypto_ahash *mac;
|
||||
struct crypto_skcipher *ctr;
|
||||
};
|
||||
|
||||
@ -44,15 +45,21 @@ struct crypto_rfc4309_req_ctx {
|
||||
|
||||
struct crypto_ccm_req_priv_ctx {
|
||||
u8 odata[16];
|
||||
u8 idata[16];
|
||||
u8 auth_tag[16];
|
||||
u32 ilen;
|
||||
u32 flags;
|
||||
struct scatterlist src[3];
|
||||
struct scatterlist dst[3];
|
||||
struct skcipher_request skreq;
|
||||
};
|
||||
|
||||
struct cbcmac_tfm_ctx {
|
||||
struct crypto_cipher *child;
|
||||
};
|
||||
|
||||
struct cbcmac_desc_ctx {
|
||||
unsigned int len;
|
||||
};
|
||||
|
||||
static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx(
|
||||
struct aead_request *req)
|
||||
{
|
||||
@ -84,7 +91,7 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
{
|
||||
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
|
||||
struct crypto_skcipher *ctr = ctx->ctr;
|
||||
struct crypto_cipher *tfm = ctx->cipher;
|
||||
struct crypto_ahash *mac = ctx->mac;
|
||||
int err = 0;
|
||||
|
||||
crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
|
||||
@ -96,11 +103,11 @@ static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key,
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
crypto_cipher_clear_flags(tfm, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_cipher_set_flags(tfm, crypto_aead_get_flags(aead) &
|
||||
crypto_ahash_clear_flags(mac, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_ahash_set_flags(mac, crypto_aead_get_flags(aead) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_cipher_setkey(tfm, key, keylen);
|
||||
crypto_aead_set_flags(aead, crypto_cipher_get_flags(tfm) &
|
||||
err = crypto_ahash_setkey(mac, key, keylen);
|
||||
crypto_aead_set_flags(aead, crypto_ahash_get_flags(mac) &
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
|
||||
out:
|
||||
@ -167,119 +174,61 @@ static int format_adata(u8 *adata, unsigned int a)
|
||||
return len;
|
||||
}
|
||||
|
||||
static void compute_mac(struct crypto_cipher *tfm, u8 *data, int n,
|
||||
struct crypto_ccm_req_priv_ctx *pctx)
|
||||
{
|
||||
unsigned int bs = 16;
|
||||
u8 *odata = pctx->odata;
|
||||
u8 *idata = pctx->idata;
|
||||
int datalen, getlen;
|
||||
|
||||
datalen = n;
|
||||
|
||||
/* first time in here, block may be partially filled. */
|
||||
getlen = bs - pctx->ilen;
|
||||
if (datalen >= getlen) {
|
||||
memcpy(idata + pctx->ilen, data, getlen);
|
||||
crypto_xor(odata, idata, bs);
|
||||
crypto_cipher_encrypt_one(tfm, odata, odata);
|
||||
datalen -= getlen;
|
||||
data += getlen;
|
||||
pctx->ilen = 0;
|
||||
}
|
||||
|
||||
/* now encrypt rest of data */
|
||||
while (datalen >= bs) {
|
||||
crypto_xor(odata, data, bs);
|
||||
crypto_cipher_encrypt_one(tfm, odata, odata);
|
||||
|
||||
datalen -= bs;
|
||||
data += bs;
|
||||
}
|
||||
|
||||
/* check and see if there's leftover data that wasn't
|
||||
* enough to fill a block.
|
||||
*/
|
||||
if (datalen) {
|
||||
memcpy(idata + pctx->ilen, data, datalen);
|
||||
pctx->ilen += datalen;
|
||||
}
|
||||
}
|
||||
|
||||
static void get_data_to_compute(struct crypto_cipher *tfm,
|
||||
struct crypto_ccm_req_priv_ctx *pctx,
|
||||
struct scatterlist *sg, unsigned int len)
|
||||
{
|
||||
struct scatter_walk walk;
|
||||
u8 *data_src;
|
||||
int n;
|
||||
|
||||
scatterwalk_start(&walk, sg);
|
||||
|
||||
while (len) {
|
||||
n = scatterwalk_clamp(&walk, len);
|
||||
if (!n) {
|
||||
scatterwalk_start(&walk, sg_next(walk.sg));
|
||||
n = scatterwalk_clamp(&walk, len);
|
||||
}
|
||||
data_src = scatterwalk_map(&walk);
|
||||
|
||||
compute_mac(tfm, data_src, n, pctx);
|
||||
len -= n;
|
||||
|
||||
scatterwalk_unmap(data_src);
|
||||
scatterwalk_advance(&walk, n);
|
||||
scatterwalk_done(&walk, 0, len);
|
||||
if (len)
|
||||
crypto_yield(pctx->flags);
|
||||
}
|
||||
|
||||
/* any leftover needs padding and then encrypted */
|
||||
if (pctx->ilen) {
|
||||
int padlen;
|
||||
u8 *odata = pctx->odata;
|
||||
u8 *idata = pctx->idata;
|
||||
|
||||
padlen = 16 - pctx->ilen;
|
||||
memset(idata + pctx->ilen, 0, padlen);
|
||||
crypto_xor(odata, idata, 16);
|
||||
crypto_cipher_encrypt_one(tfm, odata, odata);
|
||||
pctx->ilen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
|
||||
unsigned int cryptlen)
|
||||
{
|
||||
struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
|
||||
struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
|
||||
struct crypto_cipher *cipher = ctx->cipher;
|
||||
AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
|
||||
unsigned int assoclen = req->assoclen;
|
||||
u8 *odata = pctx->odata;
|
||||
u8 *idata = pctx->idata;
|
||||
int err;
|
||||
struct scatterlist sg[3];
|
||||
u8 odata[16];
|
||||
u8 idata[16];
|
||||
int ilen, err;
|
||||
|
||||
/* format control data for input */
|
||||
err = format_input(odata, req, cryptlen);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* encrypt first block to use as start in computing mac */
|
||||
crypto_cipher_encrypt_one(cipher, odata, odata);
|
||||
sg_init_table(sg, 3);
|
||||
sg_set_buf(&sg[0], odata, 16);
|
||||
|
||||
/* format associated data and compute into mac */
|
||||
if (assoclen) {
|
||||
pctx->ilen = format_adata(idata, assoclen);
|
||||
get_data_to_compute(cipher, pctx, req->src, req->assoclen);
|
||||
ilen = format_adata(idata, assoclen);
|
||||
sg_set_buf(&sg[1], idata, ilen);
|
||||
sg_chain(sg, 3, req->src);
|
||||
} else {
|
||||
pctx->ilen = 0;
|
||||
ilen = 0;
|
||||
sg_chain(sg, 2, req->src);
|
||||
}
|
||||
|
||||
/* compute plaintext into mac */
|
||||
if (cryptlen)
|
||||
get_data_to_compute(cipher, pctx, plain, cryptlen);
|
||||
ahash_request_set_tfm(ahreq, ctx->mac);
|
||||
ahash_request_set_callback(ahreq, pctx->flags, NULL, NULL);
|
||||
ahash_request_set_crypt(ahreq, sg, NULL, assoclen + ilen + 16);
|
||||
err = crypto_ahash_init(ahreq);
|
||||
if (err)
|
||||
goto out;
|
||||
err = crypto_ahash_update(ahreq);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* we need to pad the MAC input to a round multiple of the block size */
|
||||
ilen = 16 - (assoclen + ilen) % 16;
|
||||
if (ilen < 16) {
|
||||
memset(idata, 0, ilen);
|
||||
sg_init_table(sg, 2);
|
||||
sg_set_buf(&sg[0], idata, ilen);
|
||||
if (plain)
|
||||
sg_chain(sg, 2, plain);
|
||||
plain = sg;
|
||||
cryptlen += ilen;
|
||||
}
|
||||
|
||||
ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen);
|
||||
err = crypto_ahash_finup(ahreq);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
@ -453,21 +402,21 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
|
||||
struct aead_instance *inst = aead_alg_instance(tfm);
|
||||
struct ccm_instance_ctx *ictx = aead_instance_ctx(inst);
|
||||
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct crypto_cipher *cipher;
|
||||
struct crypto_ahash *mac;
|
||||
struct crypto_skcipher *ctr;
|
||||
unsigned long align;
|
||||
int err;
|
||||
|
||||
cipher = crypto_spawn_cipher(&ictx->cipher);
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
mac = crypto_spawn_ahash(&ictx->mac);
|
||||
if (IS_ERR(mac))
|
||||
return PTR_ERR(mac);
|
||||
|
||||
ctr = crypto_spawn_skcipher(&ictx->ctr);
|
||||
err = PTR_ERR(ctr);
|
||||
if (IS_ERR(ctr))
|
||||
goto err_free_cipher;
|
||||
goto err_free_mac;
|
||||
|
||||
ctx->cipher = cipher;
|
||||
ctx->mac = mac;
|
||||
ctx->ctr = ctr;
|
||||
|
||||
align = crypto_aead_alignmask(tfm);
|
||||
@ -479,8 +428,8 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_cipher:
|
||||
crypto_free_cipher(cipher);
|
||||
err_free_mac:
|
||||
crypto_free_ahash(mac);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -488,7 +437,7 @@ static void crypto_ccm_exit_tfm(struct crypto_aead *tfm)
|
||||
{
|
||||
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
|
||||
crypto_free_cipher(ctx->cipher);
|
||||
crypto_free_ahash(ctx->mac);
|
||||
crypto_free_skcipher(ctx->ctr);
|
||||
}
|
||||
|
||||
@ -496,7 +445,7 @@ static void crypto_ccm_free(struct aead_instance *inst)
|
||||
{
|
||||
struct ccm_instance_ctx *ctx = aead_instance_ctx(inst);
|
||||
|
||||
crypto_drop_spawn(&ctx->cipher);
|
||||
crypto_drop_ahash(&ctx->mac);
|
||||
crypto_drop_skcipher(&ctx->ctr);
|
||||
kfree(inst);
|
||||
}
|
||||
@ -505,12 +454,13 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
|
||||
struct rtattr **tb,
|
||||
const char *full_name,
|
||||
const char *ctr_name,
|
||||
const char *cipher_name)
|
||||
const char *mac_name)
|
||||
{
|
||||
struct crypto_attr_type *algt;
|
||||
struct aead_instance *inst;
|
||||
struct skcipher_alg *ctr;
|
||||
struct crypto_alg *cipher;
|
||||
struct crypto_alg *mac_alg;
|
||||
struct hash_alg_common *mac;
|
||||
struct ccm_instance_ctx *ictx;
|
||||
int err;
|
||||
|
||||
@ -521,25 +471,26 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
|
||||
if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
|
||||
return -EINVAL;
|
||||
|
||||
cipher = crypto_alg_mod_lookup(cipher_name, CRYPTO_ALG_TYPE_CIPHER,
|
||||
CRYPTO_ALG_TYPE_MASK);
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
mac_alg = crypto_find_alg(mac_name, &crypto_ahash_type,
|
||||
CRYPTO_ALG_TYPE_HASH,
|
||||
CRYPTO_ALG_TYPE_AHASH_MASK |
|
||||
CRYPTO_ALG_ASYNC);
|
||||
if (IS_ERR(mac_alg))
|
||||
return PTR_ERR(mac_alg);
|
||||
|
||||
mac = __crypto_hash_alg_common(mac_alg);
|
||||
err = -EINVAL;
|
||||
if (cipher->cra_blocksize != 16)
|
||||
goto out_put_cipher;
|
||||
if (mac->digestsize != 16)
|
||||
goto out_put_mac;
|
||||
|
||||
inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
|
||||
err = -ENOMEM;
|
||||
if (!inst)
|
||||
goto out_put_cipher;
|
||||
goto out_put_mac;
|
||||
|
||||
ictx = aead_instance_ctx(inst);
|
||||
|
||||
err = crypto_init_spawn(&ictx->cipher, cipher,
|
||||
aead_crypto_instance(inst),
|
||||
CRYPTO_ALG_TYPE_MASK);
|
||||
err = crypto_init_ahash_spawn(&ictx->mac, mac,
|
||||
aead_crypto_instance(inst));
|
||||
if (err)
|
||||
goto err_free_inst;
|
||||
|
||||
@ -548,7 +499,7 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
|
||||
crypto_requires_sync(algt->type,
|
||||
algt->mask));
|
||||
if (err)
|
||||
goto err_drop_cipher;
|
||||
goto err_drop_mac;
|
||||
|
||||
ctr = crypto_spawn_skcipher_alg(&ictx->ctr);
|
||||
|
||||
@ -564,18 +515,17 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
|
||||
err = -ENAMETOOLONG;
|
||||
if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
|
||||
"ccm_base(%s,%s)", ctr->base.cra_driver_name,
|
||||
cipher->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
goto err_drop_ctr;
|
||||
|
||||
memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
|
||||
|
||||
inst->alg.base.cra_flags = ctr->base.cra_flags & CRYPTO_ALG_ASYNC;
|
||||
inst->alg.base.cra_priority = (cipher->cra_priority +
|
||||
inst->alg.base.cra_priority = (mac->base.cra_priority +
|
||||
ctr->base.cra_priority) / 2;
|
||||
inst->alg.base.cra_blocksize = 1;
|
||||
inst->alg.base.cra_alignmask = cipher->cra_alignmask |
|
||||
ctr->base.cra_alignmask |
|
||||
(__alignof__(u32) - 1);
|
||||
inst->alg.base.cra_alignmask = mac->base.cra_alignmask |
|
||||
ctr->base.cra_alignmask;
|
||||
inst->alg.ivsize = 16;
|
||||
inst->alg.chunksize = crypto_skcipher_alg_chunksize(ctr);
|
||||
inst->alg.maxauthsize = 16;
|
||||
@ -593,23 +543,24 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
|
||||
if (err)
|
||||
goto err_drop_ctr;
|
||||
|
||||
out_put_cipher:
|
||||
crypto_mod_put(cipher);
|
||||
out_put_mac:
|
||||
crypto_mod_put(mac_alg);
|
||||
return err;
|
||||
|
||||
err_drop_ctr:
|
||||
crypto_drop_skcipher(&ictx->ctr);
|
||||
err_drop_cipher:
|
||||
crypto_drop_spawn(&ictx->cipher);
|
||||
err_drop_mac:
|
||||
crypto_drop_ahash(&ictx->mac);
|
||||
err_free_inst:
|
||||
kfree(inst);
|
||||
goto out_put_cipher;
|
||||
goto out_put_mac;
|
||||
}
|
||||
|
||||
static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
const char *cipher_name;
|
||||
char ctr_name[CRYPTO_MAX_ALG_NAME];
|
||||
char mac_name[CRYPTO_MAX_ALG_NAME];
|
||||
char full_name[CRYPTO_MAX_ALG_NAME];
|
||||
|
||||
cipher_name = crypto_attr_alg_name(tb[1]);
|
||||
@ -620,12 +571,16 @@ static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
cipher_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
if (snprintf(mac_name, CRYPTO_MAX_ALG_NAME, "cbcmac(%s)",
|
||||
cipher_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm(%s)", cipher_name) >=
|
||||
CRYPTO_MAX_ALG_NAME)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name,
|
||||
cipher_name);
|
||||
mac_name);
|
||||
}
|
||||
|
||||
static struct crypto_template crypto_ccm_tmpl = {
|
||||
@ -899,14 +854,164 @@ static struct crypto_template crypto_rfc4309_tmpl = {
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent,
|
||||
const u8 *inkey, unsigned int keylen)
|
||||
{
|
||||
struct cbcmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
|
||||
|
||||
return crypto_cipher_setkey(ctx->child, inkey, keylen);
|
||||
}
|
||||
|
||||
static int crypto_cbcmac_digest_init(struct shash_desc *pdesc)
|
||||
{
|
||||
struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
|
||||
int bs = crypto_shash_digestsize(pdesc->tfm);
|
||||
u8 *dg = (u8 *)ctx + crypto_shash_descsize(pdesc->tfm) - bs;
|
||||
|
||||
ctx->len = 0;
|
||||
memset(dg, 0, bs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_cbcmac_digest_update(struct shash_desc *pdesc, const u8 *p,
|
||||
unsigned int len)
|
||||
{
|
||||
struct crypto_shash *parent = pdesc->tfm;
|
||||
struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
|
||||
struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
|
||||
struct crypto_cipher *tfm = tctx->child;
|
||||
int bs = crypto_shash_digestsize(parent);
|
||||
u8 *dg = (u8 *)ctx + crypto_shash_descsize(parent) - bs;
|
||||
|
||||
while (len > 0) {
|
||||
unsigned int l = min(len, bs - ctx->len);
|
||||
|
||||
crypto_xor(dg + ctx->len, p, l);
|
||||
ctx->len +=l;
|
||||
len -= l;
|
||||
p += l;
|
||||
|
||||
if (ctx->len == bs) {
|
||||
crypto_cipher_encrypt_one(tfm, dg, dg);
|
||||
ctx->len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crypto_cbcmac_digest_final(struct shash_desc *pdesc, u8 *out)
|
||||
{
|
||||
struct crypto_shash *parent = pdesc->tfm;
|
||||
struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
|
||||
struct cbcmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
|
||||
struct crypto_cipher *tfm = tctx->child;
|
||||
int bs = crypto_shash_digestsize(parent);
|
||||
u8 *dg = (u8 *)ctx + crypto_shash_descsize(parent) - bs;
|
||||
|
||||
if (ctx->len)
|
||||
crypto_cipher_encrypt_one(tfm, dg, dg);
|
||||
|
||||
memcpy(out, dg, bs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cbcmac_init_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct crypto_cipher *cipher;
|
||||
struct crypto_instance *inst = (void *)tfm->__crt_alg;
|
||||
struct crypto_spawn *spawn = crypto_instance_ctx(inst);
|
||||
struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
cipher = crypto_spawn_cipher(spawn);
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
|
||||
ctx->child = cipher;
|
||||
|
||||
return 0;
|
||||
};
|
||||
|
||||
static void cbcmac_exit_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
crypto_free_cipher(ctx->child);
|
||||
}
|
||||
|
||||
static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
struct shash_instance *inst;
|
||||
struct crypto_alg *alg;
|
||||
int err;
|
||||
|
||||
err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
|
||||
CRYPTO_ALG_TYPE_MASK);
|
||||
if (IS_ERR(alg))
|
||||
return PTR_ERR(alg);
|
||||
|
||||
inst = shash_alloc_instance("cbcmac", alg);
|
||||
err = PTR_ERR(inst);
|
||||
if (IS_ERR(inst))
|
||||
goto out_put_alg;
|
||||
|
||||
err = crypto_init_spawn(shash_instance_ctx(inst), alg,
|
||||
shash_crypto_instance(inst),
|
||||
CRYPTO_ALG_TYPE_MASK);
|
||||
if (err)
|
||||
goto out_free_inst;
|
||||
|
||||
inst->alg.base.cra_priority = alg->cra_priority;
|
||||
inst->alg.base.cra_blocksize = 1;
|
||||
|
||||
inst->alg.digestsize = alg->cra_blocksize;
|
||||
inst->alg.descsize = ALIGN(sizeof(struct cbcmac_desc_ctx),
|
||||
alg->cra_alignmask + 1) +
|
||||
alg->cra_blocksize;
|
||||
|
||||
inst->alg.base.cra_ctxsize = sizeof(struct cbcmac_tfm_ctx);
|
||||
inst->alg.base.cra_init = cbcmac_init_tfm;
|
||||
inst->alg.base.cra_exit = cbcmac_exit_tfm;
|
||||
|
||||
inst->alg.init = crypto_cbcmac_digest_init;
|
||||
inst->alg.update = crypto_cbcmac_digest_update;
|
||||
inst->alg.final = crypto_cbcmac_digest_final;
|
||||
inst->alg.setkey = crypto_cbcmac_digest_setkey;
|
||||
|
||||
err = shash_register_instance(tmpl, inst);
|
||||
|
||||
out_free_inst:
|
||||
if (err)
|
||||
shash_free_instance(shash_crypto_instance(inst));
|
||||
|
||||
out_put_alg:
|
||||
crypto_mod_put(alg);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct crypto_template crypto_cbcmac_tmpl = {
|
||||
.name = "cbcmac",
|
||||
.create = cbcmac_create,
|
||||
.free = shash_free_instance,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init crypto_ccm_module_init(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = crypto_register_template(&crypto_ccm_base_tmpl);
|
||||
err = crypto_register_template(&crypto_cbcmac_tmpl);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = crypto_register_template(&crypto_ccm_base_tmpl);
|
||||
if (err)
|
||||
goto out_undo_cbcmac;
|
||||
|
||||
err = crypto_register_template(&crypto_ccm_tmpl);
|
||||
if (err)
|
||||
goto out_undo_base;
|
||||
@ -922,6 +1027,8 @@ out_undo_ccm:
|
||||
crypto_unregister_template(&crypto_ccm_tmpl);
|
||||
out_undo_base:
|
||||
crypto_unregister_template(&crypto_ccm_base_tmpl);
|
||||
out_undo_cbcmac:
|
||||
crypto_register_template(&crypto_cbcmac_tmpl);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -930,6 +1037,7 @@ static void __exit crypto_ccm_module_exit(void)
|
||||
crypto_unregister_template(&crypto_rfc4309_tmpl);
|
||||
crypto_unregister_template(&crypto_ccm_tmpl);
|
||||
crypto_unregister_template(&crypto_ccm_base_tmpl);
|
||||
crypto_unregister_template(&crypto_cbcmac_tmpl);
|
||||
}
|
||||
|
||||
module_init(crypto_ccm_module_init);
|
||||
|
@ -10,10 +10,9 @@
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <crypto/chacha20.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
static inline u32 le32_to_cpuvp(const void *p)
|
||||
{
|
||||
@ -63,10 +62,10 @@ void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_chacha20_init);
|
||||
|
||||
int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keysize)
|
||||
{
|
||||
struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int i;
|
||||
|
||||
if (keysize != CHACHA20_KEY_SIZE)
|
||||
@ -79,66 +78,54 @@ int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
|
||||
|
||||
int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
int crypto_chacha20_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct blkcipher_walk walk;
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
blkcipher_walk_init(&walk, dst, src, nbytes);
|
||||
err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
|
||||
crypto_chacha20_init(state, ctx, walk.iv);
|
||||
|
||||
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
|
||||
err = blkcipher_walk_done(desc, &walk,
|
||||
walk.nbytes % CHACHA20_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (walk.nbytes) {
|
||||
while (walk.nbytes > 0) {
|
||||
chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
walk.nbytes);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
|
||||
|
||||
static struct crypto_alg alg = {
|
||||
.cra_name = "chacha20",
|
||||
.cra_driver_name = "chacha20-generic",
|
||||
.cra_priority = 100,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
|
||||
.cra_blocksize = 1,
|
||||
.cra_type = &crypto_blkcipher_type,
|
||||
.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.cra_alignmask = sizeof(u32) - 1,
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.blkcipher = {
|
||||
.min_keysize = CHACHA20_KEY_SIZE,
|
||||
.max_keysize = CHACHA20_KEY_SIZE,
|
||||
.ivsize = CHACHA20_IV_SIZE,
|
||||
.geniv = "seqiv",
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = crypto_chacha20_crypt,
|
||||
.decrypt = crypto_chacha20_crypt,
|
||||
},
|
||||
},
|
||||
static struct skcipher_alg alg = {
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-generic",
|
||||
.base.cra_priority = 100,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha20_ctx),
|
||||
.base.cra_alignmask = sizeof(u32) - 1,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA20_KEY_SIZE,
|
||||
.max_keysize = CHACHA20_KEY_SIZE,
|
||||
.ivsize = CHACHA20_IV_SIZE,
|
||||
.chunksize = CHACHA20_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = crypto_chacha20_crypt,
|
||||
.decrypt = crypto_chacha20_crypt,
|
||||
};
|
||||
|
||||
static int __init chacha20_generic_mod_init(void)
|
||||
{
|
||||
return crypto_register_alg(&alg);
|
||||
return crypto_register_skcipher(&alg);
|
||||
}
|
||||
|
||||
static void __exit chacha20_generic_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&alg);
|
||||
crypto_unregister_skcipher(&alg);
|
||||
}
|
||||
|
||||
module_init(chacha20_generic_mod_init);
|
||||
|
@ -260,8 +260,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
if (err)
|
||||
goto out_free_inst;
|
||||
|
||||
/* We access the data as u32s when xoring. */
|
||||
alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
|
||||
alignmask = alg->cra_alignmask;
|
||||
inst->alg.base.cra_alignmask = alignmask;
|
||||
inst->alg.base.cra_priority = alg->cra_priority;
|
||||
inst->alg.base.cra_blocksize = alg->cra_blocksize;
|
||||
|
@ -209,7 +209,7 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb)
|
||||
inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
|
||||
inst->alg.cra_priority = alg->cra_priority;
|
||||
inst->alg.cra_blocksize = 1;
|
||||
inst->alg.cra_alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
|
||||
inst->alg.cra_alignmask = alg->cra_alignmask;
|
||||
inst->alg.cra_type = &crypto_blkcipher_type;
|
||||
|
||||
inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
|
||||
|
@ -49,6 +49,7 @@
|
||||
#include <linux/scatterlist.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
struct crypto_cts_ctx {
|
||||
struct crypto_skcipher *child;
|
||||
@ -103,7 +104,7 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct skcipher_request *subreq = &rctx->subreq;
|
||||
int bsize = crypto_skcipher_blocksize(tfm);
|
||||
u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32))));
|
||||
u8 d[bsize * 2] __aligned(__alignof__(u32));
|
||||
struct scatterlist *sg;
|
||||
unsigned int offset;
|
||||
int lastn;
|
||||
@ -183,7 +184,7 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct skcipher_request *subreq = &rctx->subreq;
|
||||
int bsize = crypto_skcipher_blocksize(tfm);
|
||||
u8 d[bsize * 2] __attribute__ ((aligned(__alignof__(u32))));
|
||||
u8 d[bsize * 2] __aligned(__alignof__(u32));
|
||||
struct scatterlist *sg;
|
||||
unsigned int offset;
|
||||
u8 *space;
|
||||
@ -373,9 +374,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
|
||||
inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
|
||||
|
||||
/* We access the data as u32s when xoring. */
|
||||
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
|
||||
|
||||
inst->alg.ivsize = alg->base.cra_blocksize;
|
||||
inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
|
||||
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <crypto/internal/kpp.h>
|
||||
@ -47,7 +48,7 @@ static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
|
||||
static void crypto_kpp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
struct crypto_pcbc_ctx {
|
||||
struct crypto_cipher *child;
|
||||
@ -146,7 +147,7 @@ static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
|
||||
unsigned int nbytes = walk->nbytes;
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *iv = walk->iv;
|
||||
u8 tmpbuf[bsize] __attribute__ ((aligned(__alignof__(u32))));
|
||||
u8 tmpbuf[bsize] __aligned(__alignof__(u32));
|
||||
|
||||
do {
|
||||
memcpy(tmpbuf, src, bsize);
|
||||
@ -259,9 +260,6 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
inst->alg.base.cra_blocksize = alg->cra_blocksize;
|
||||
inst->alg.base.cra_alignmask = alg->cra_alignmask;
|
||||
|
||||
/* We access the data as u32s when xoring. */
|
||||
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
|
||||
|
||||
inst->alg.ivsize = alg->cra_blocksize;
|
||||
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
|
||||
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <net/netlink.h>
|
||||
|
||||
#include "internal.h"
|
||||
@ -95,7 +96,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
seq_printf(m, "type : rng\n");
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <linux/cryptouser.h>
|
||||
@ -57,7 +58,7 @@ static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
|
||||
static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
|
@ -153,8 +153,6 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
if (IS_ERR(inst))
|
||||
return PTR_ERR(inst);
|
||||
|
||||
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
|
||||
|
||||
spawn = aead_instance_ctx(inst);
|
||||
alg = crypto_spawn_aead_alg(spawn);
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <net/netlink.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@ -67,7 +68,7 @@ EXPORT_SYMBOL_GPL(crypto_shash_setkey);
|
||||
static inline unsigned int shash_align_buffer_size(unsigned len,
|
||||
unsigned long mask)
|
||||
{
|
||||
typedef u8 __attribute__ ((aligned)) u8_aligned;
|
||||
typedef u8 __aligned_largest u8_aligned;
|
||||
return len + (mask & ~(__alignof__(u8_aligned) - 1));
|
||||
}
|
||||
|
||||
@ -80,7 +81,7 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int unaligned_len = alignmask + 1 -
|
||||
((unsigned long)data & alignmask);
|
||||
u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)]
|
||||
__attribute__ ((aligned));
|
||||
__aligned_largest;
|
||||
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
|
||||
int err;
|
||||
|
||||
@ -116,7 +117,7 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
|
||||
struct shash_alg *shash = crypto_shash_alg(tfm);
|
||||
unsigned int ds = crypto_shash_digestsize(tfm);
|
||||
u8 ubuf[shash_align_buffer_size(ds, alignmask)]
|
||||
__attribute__ ((aligned));
|
||||
__aligned_largest;
|
||||
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
|
||||
int err;
|
||||
|
||||
@ -403,7 +404,7 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
#endif
|
||||
|
||||
static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
struct shash_alg *salg = __crypto_shash_alg(alg);
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
@ -185,12 +186,12 @@ void skcipher_walk_complete(struct skcipher_walk *walk, int err)
|
||||
data = p->data;
|
||||
if (!data) {
|
||||
data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1);
|
||||
data = skcipher_get_spot(data, walk->chunksize);
|
||||
data = skcipher_get_spot(data, walk->stride);
|
||||
}
|
||||
|
||||
scatterwalk_copychunks(data, &p->dst, p->len, 1);
|
||||
|
||||
if (offset_in_page(p->data) + p->len + walk->chunksize >
|
||||
if (offset_in_page(p->data) + p->len + walk->stride >
|
||||
PAGE_SIZE)
|
||||
free_page((unsigned long)p->data);
|
||||
|
||||
@ -299,7 +300,7 @@ static int skcipher_next_copy(struct skcipher_walk *walk)
|
||||
p->len = walk->nbytes;
|
||||
skcipher_queue_write(walk, p);
|
||||
|
||||
if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize >
|
||||
if (offset_in_page(walk->page) + walk->nbytes + walk->stride >
|
||||
PAGE_SIZE)
|
||||
walk->page = NULL;
|
||||
else
|
||||
@ -344,7 +345,7 @@ static int skcipher_walk_next(struct skcipher_walk *walk)
|
||||
SKCIPHER_WALK_DIFF);
|
||||
|
||||
n = walk->total;
|
||||
bsize = min(walk->chunksize, max(n, walk->blocksize));
|
||||
bsize = min(walk->stride, max(n, walk->blocksize));
|
||||
n = scatterwalk_clamp(&walk->in, n);
|
||||
n = scatterwalk_clamp(&walk->out, n);
|
||||
|
||||
@ -393,7 +394,7 @@ static int skcipher_copy_iv(struct skcipher_walk *walk)
|
||||
unsigned a = crypto_tfm_ctx_alignment() - 1;
|
||||
unsigned alignmask = walk->alignmask;
|
||||
unsigned ivsize = walk->ivsize;
|
||||
unsigned bs = walk->chunksize;
|
||||
unsigned bs = walk->stride;
|
||||
unsigned aligned_bs;
|
||||
unsigned size;
|
||||
u8 *iv;
|
||||
@ -463,7 +464,7 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
|
||||
SKCIPHER_WALK_SLEEP : 0;
|
||||
|
||||
walk->blocksize = crypto_skcipher_blocksize(tfm);
|
||||
walk->chunksize = crypto_skcipher_chunksize(tfm);
|
||||
walk->stride = crypto_skcipher_walksize(tfm);
|
||||
walk->ivsize = crypto_skcipher_ivsize(tfm);
|
||||
walk->alignmask = crypto_skcipher_alignmask(tfm);
|
||||
|
||||
@ -525,7 +526,7 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
|
||||
walk->flags &= ~SKCIPHER_WALK_SLEEP;
|
||||
|
||||
walk->blocksize = crypto_aead_blocksize(tfm);
|
||||
walk->chunksize = crypto_aead_chunksize(tfm);
|
||||
walk->stride = crypto_aead_chunksize(tfm);
|
||||
walk->ivsize = crypto_aead_ivsize(tfm);
|
||||
walk->alignmask = crypto_aead_alignmask(tfm);
|
||||
|
||||
@ -807,7 +808,7 @@ static void crypto_skcipher_free_instance(struct crypto_instance *inst)
|
||||
}
|
||||
|
||||
static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
__attribute__ ((unused));
|
||||
__maybe_unused;
|
||||
static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
{
|
||||
struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg,
|
||||
@ -821,6 +822,7 @@ static void crypto_skcipher_show(struct seq_file *m, struct crypto_alg *alg)
|
||||
seq_printf(m, "max keysize : %u\n", skcipher->max_keysize);
|
||||
seq_printf(m, "ivsize : %u\n", skcipher->ivsize);
|
||||
seq_printf(m, "chunksize : %u\n", skcipher->chunksize);
|
||||
seq_printf(m, "walksize : %u\n", skcipher->walksize);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NET
|
||||
@ -893,11 +895,14 @@ static int skcipher_prepare_alg(struct skcipher_alg *alg)
|
||||
{
|
||||
struct crypto_alg *base = &alg->base;
|
||||
|
||||
if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8)
|
||||
if (alg->ivsize > PAGE_SIZE / 8 || alg->chunksize > PAGE_SIZE / 8 ||
|
||||
alg->walksize > PAGE_SIZE / 8)
|
||||
return -EINVAL;
|
||||
|
||||
if (!alg->chunksize)
|
||||
alg->chunksize = base->cra_blocksize;
|
||||
if (!alg->walksize)
|
||||
alg->walksize = alg->chunksize;
|
||||
|
||||
base->cra_type = &crypto_skcipher_type2;
|
||||
base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
|
||||
|
@ -22,6 +22,8 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <crypto/aead.h>
|
||||
#include <crypto/hash.h>
|
||||
#include <crypto/skcipher.h>
|
||||
@ -1010,6 +1012,8 @@ static inline int tcrypt_test(const char *alg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
pr_debug("testing %s\n", alg);
|
||||
|
||||
ret = alg_test(alg, alg, 0, 0);
|
||||
/* non-fips algs return -EINVAL in fips mode */
|
||||
if (fips_enabled && ret == -EINVAL)
|
||||
@ -2059,6 +2063,8 @@ static int __init tcrypt_mod_init(void)
|
||||
if (err) {
|
||||
printk(KERN_ERR "tcrypt: one or more tests failed!\n");
|
||||
goto err_free_tv;
|
||||
} else {
|
||||
pr_debug("all tests passed\n");
|
||||
}
|
||||
|
||||
/* We intentionaly return -EAGAIN to prevent keeping the module,
|
||||
|
1055
crypto/testmgr.c
1055
crypto/testmgr.c
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user