2013-12-11 14:28:41 -08:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Copyright ( c ) 2 0 1 3 , I n t e l C o r p o r a t i o n
#
# This s o f t w a r e i s a v a i l a b l e t o y o u u n d e r a c h o i c e o f o n e o f t w o
# licenses. Y o u m a y c h o o s e t o b e l i c e n s e d u n d e r t h e t e r m s o f t h e G N U
# General P u b l i c L i c e n s e ( G P L ) V e r s i o n 2 , a v a i l a b l e f r o m t h e f i l e
# COPYING i n t h e m a i n d i r e c t o r y o f t h i s s o u r c e t r e e , o r t h e
# OpenIB. o r g B S D l i c e n s e b e l o w :
#
# Redistribution a n d u s e i n s o u r c e a n d b i n a r y f o r m s , w i t h o r w i t h o u t
# modification, a r e p e r m i t t e d p r o v i d e d t h a t t h e f o l l o w i n g c o n d i t i o n s a r e
# met :
#
# * Redistributions o f s o u r c e c o d e m u s t r e t a i n t h e a b o v e c o p y r i g h t
# notice, t h i s l i s t o f c o n d i t i o n s a n d t h e f o l l o w i n g d i s c l a i m e r .
#
# * Redistributions i n b i n a r y f o r m m u s t r e p r o d u c e t h e a b o v e c o p y r i g h t
# notice, t h i s l i s t o f c o n d i t i o n s a n d t h e f o l l o w i n g d i s c l a i m e r i n t h e
# documentation a n d / o r o t h e r m a t e r i a l s p r o v i d e d w i t h t h e
# distribution.
#
# * Neither t h e n a m e o f t h e I n t e l C o r p o r a t i o n n o r t h e n a m e s o f i t s
# contributors m a y b e u s e d t o e n d o r s e o r p r o m o t e p r o d u c t s d e r i v e d f r o m
# this s o f t w a r e w i t h o u t s p e c i f i c p r i o r w r i t t e n p e r m i s s i o n .
#
#
# THIS S O F T W A R E I S P R O V I D E D B Y I N T E L C O R P O R A T I O N " " A S I S " " A N D A N Y
# EXPRESS O R I M P L I E D W A R R A N T I E S , I N C L U D I N G , B U T N O T L I M I T E D T O , T H E
# IMPLIED W A R R A N T I E S O F M E R C H A N T A B I L I T Y A N D F I T N E S S F O R A P A R T I C U L A R
# PURPOSE A R E D I S C L A I M E D . I N N O E V E N T S H A L L I N T E L C O R P O R A T I O N O R
# CONTRIBUTORS B E L I A B L E F O R A N Y D I R E C T , I N D I R E C T , I N C I D E N T A L , S P E C I A L ,
# EXEMPLARY, O R C O N S E Q U E N T I A L D A M A G E S ( I N C L U D I N G , B U T N O T L I M I T E D T O ,
# PROCUREMENT O F S U B S T I T U T E G O O D S O R S E R V I C E S # L O S S O F U S E , D A T A , O R
# PROFITS# O R B U S I N E S S I N T E R R U P T I O N ) H O W E V E R C A U S E D A N D O N A N Y T H E O R Y O F
# LIABILITY, W H E T H E R I N C O N T R A C T , S T R I C T L I A B I L I T Y , O R T O R T ( I N C L U D I N G
# NEGLIGENCE O R O T H E R W I S E ) A R I S I N G I N A N Y W A Y O U T O F T H E U S E O F T H I S
# SOFTWARE, E V E N I F A D V I S E D O F T H E P O S S I B I L I T Y O F S U C H D A M A G E .
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# #
# # Authors :
# # Erdinc O z t u r k < e r d i n c . o z t u r k @intel.com>
# # Vinodh G o p a l < v i n o d h . g o p a l @intel.com>
# # James G u i l f o r d < j a m e s . g u i l f o r d @intel.com>
# # Tim C h e n < t i m . c . c h e n @linux.intel.com>
# #
# # References :
# # This c o d e w a s d e r i v e d a n d h i g h l y o p t i m i z e d f r o m t h e c o d e d e s c r i b e d i n p a p e r :
# # Vinodh G o p a l e t . a l . O p t i m i z e d G a l o i s - C o u n t e r - M o d e I m p l e m e n t a t i o n
# # on I n t e l A r c h i t e c t u r e P r o c e s s o r s . A u g u s t , 2 0 1 0
# # The d e t a i l s o f t h e i m p l e m e n t a t i o n i s e x p l a i n e d i n :
# # Erdinc O z t u r k e t . a l . E n a b l i n g H i g h - P e r f o r m a n c e G a l o i s - C o u n t e r - M o d e
# # on I n t e l A r c h i t e c t u r e P r o c e s s o r s . O c t o b e r , 2 0 1 2 .
# #
# # Assumptions :
# #
# #
# #
# # iv :
# # 0 1 2 3
# # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# # | Salt ( F r o m t h e S A ) |
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# # | Initialization V e c t o r |
# # | ( This i s t h e s e q u e n c e n u m b e r f r o m I P S e c h e a d e r ) |
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# # | 0 x1 |
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# #
# #
# #
# # AAD :
# # AAD p a d d e d t o 1 2 8 b i t s w i t h 0
# # for e x a m p l e , a s s u m e A A D i s a u 3 2 v e c t o r
# #
# # if A A D i s 8 b y t e s :
# # AAD[ 3 ] = { A 0 , A 1 } #
# # padded A A D i n x m m r e g i s t e r = { A 1 A 0 0 0 }
# #
# # 0 1 2 3
# # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# # | SPI ( A 1 ) |
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# # | 3 2 - bit S e q u e n c e N u m b e r ( A 0 ) |
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# # | 0 x0 |
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# #
# # AAD F o r m a t w i t h 3 2 - b i t S e q u e n c e N u m b e r
# #
# # if A A D i s 1 2 b y t e s :
# # AAD[ 3 ] = { A 0 , A 1 , A 2 } #
# # padded A A D i n x m m r e g i s t e r = { A 2 A 1 A 0 0 }
# #
# # 0 1 2 3
# # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# # | SPI ( A 2 ) |
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# # | 6 4 - bit E x t e n d e d S e q u e n c e N u m b e r { A 1 ,A 0 } |
# # | |
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# # | 0 x0 |
# # + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - +
# #
# # AAD F o r m a t w i t h 6 4 - b i t E x t e n d e d S e q u e n c e N u m b e r
# #
# #
# # aadLen :
# # from t h e d e f i n i t i o n o f t h e s p e c , a a d L e n c a n o n l y b e 8 o r 1 2 b y t e s .
# # The c o d e a d d i t i o n a l l y s u p p o r t s a a d L e n o f l e n g t h 1 6 b y t e s .
# #
# # TLen :
# # from t h e d e f i n i t i o n o f t h e s p e c , T L e n c a n o n l y b e 8 , 1 2 o r 1 6 b y t e s .
# #
# # poly = x ^ 1 2 8 + x ^ 1 2 7 + x ^ 1 2 6 + x ^ 1 2 1 + 1
# # throughout t h e c o d e , o n e t a b a n d t w o t a b i n d e n t a t i o n s a r e u s e d . o n e t a b i s
# # for G H A S H p a r t , t w o t a b s i s f o r A E S p a r t .
# #
# include < l i n u x / l i n k a g e . h >
# include < a s m / i n s t . h >
crypto: x86 - make constants readonly, allow linker to merge them
A lot of asm-optimized routines in arch/x86/crypto/ keep its
constants in .data. This is wrong, they should be on .rodata.
Mnay of these constants are the same in different modules.
For example, 128-bit shuffle mask 0x000102030405060708090A0B0C0D0E0F
exists in at least half a dozen places.
There is a way to let linker merge them and use just one copy.
The rules are as follows: mergeable objects of different sizes
should not share sections. You can't put them all in one .rodata
section, they will lose "mergeability".
GCC puts its mergeable constants in ".rodata.cstSIZE" sections,
or ".rodata.cstSIZE.<object_name>" if -fdata-sections is used.
This patch does the same:
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
It is important that all data in such section consists of
16-byte elements, not larger ones, and there are no implicit
use of one element from another.
When this is not the case, use non-mergeable section:
.section .rodata[.VAR_NAME], "a", @progbits
This reduces .data by ~15 kbytes:
text data bss dec hex filename
11097415 2705840 2630712 16433967 fac32f vmlinux-prev.o
11112095 2690672 2630712 16433479 fac147 vmlinux.o
Merged objects are visible in System.map:
ffffffff81a28810 r POLY
ffffffff81a28810 r POLY
ffffffff81a28820 r TWOONE
ffffffff81a28820 r TWOONE
ffffffff81a28830 r PSHUFFLE_BYTE_FLIP_MASK <- merged regardless of
ffffffff81a28830 r SHUF_MASK <------------- the name difference
ffffffff81a28830 r SHUF_MASK
ffffffff81a28830 r SHUF_MASK
..
ffffffff81a28d00 r K512 <- merged three identical 640-byte tables
ffffffff81a28d00 r K512
ffffffff81a28d00 r K512
Use of object names in section name suffixes is not strictly necessary,
but might help if someday link stage will use garbage collection
to eliminate unused sections (ld --gc-sections).
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Josh Poimboeuf <jpoimboe@redhat.com>
CC: Xiaodong Liu <xiaodong.liu@intel.com>
CC: Megha Dey <megha.dey@intel.com>
CC: linux-crypto@vger.kernel.org
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-01-19 22:33:04 +01:00
# constants i n m e r g e a b l e s e c t i o n s , l i n k e r c a n r e o r d e r a n d m e r g e
.section .rodata .cst16 .POLY , " aM" , @progbits, 16
2013-12-11 14:28:41 -08:00
.align 16
POLY : .octa 0xC2000000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
crypto: x86 - make constants readonly, allow linker to merge them
A lot of asm-optimized routines in arch/x86/crypto/ keep its
constants in .data. This is wrong, they should be on .rodata.
Mnay of these constants are the same in different modules.
For example, 128-bit shuffle mask 0x000102030405060708090A0B0C0D0E0F
exists in at least half a dozen places.
There is a way to let linker merge them and use just one copy.
The rules are as follows: mergeable objects of different sizes
should not share sections. You can't put them all in one .rodata
section, they will lose "mergeability".
GCC puts its mergeable constants in ".rodata.cstSIZE" sections,
or ".rodata.cstSIZE.<object_name>" if -fdata-sections is used.
This patch does the same:
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
It is important that all data in such section consists of
16-byte elements, not larger ones, and there are no implicit
use of one element from another.
When this is not the case, use non-mergeable section:
.section .rodata[.VAR_NAME], "a", @progbits
This reduces .data by ~15 kbytes:
text data bss dec hex filename
11097415 2705840 2630712 16433967 fac32f vmlinux-prev.o
11112095 2690672 2630712 16433479 fac147 vmlinux.o
Merged objects are visible in System.map:
ffffffff81a28810 r POLY
ffffffff81a28810 r POLY
ffffffff81a28820 r TWOONE
ffffffff81a28820 r TWOONE
ffffffff81a28830 r PSHUFFLE_BYTE_FLIP_MASK <- merged regardless of
ffffffff81a28830 r SHUF_MASK <------------- the name difference
ffffffff81a28830 r SHUF_MASK
ffffffff81a28830 r SHUF_MASK
..
ffffffff81a28d00 r K512 <- merged three identical 640-byte tables
ffffffff81a28d00 r K512
ffffffff81a28d00 r K512
Use of object names in section name suffixes is not strictly necessary,
but might help if someday link stage will use garbage collection
to eliminate unused sections (ld --gc-sections).
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Josh Poimboeuf <jpoimboe@redhat.com>
CC: Xiaodong Liu <xiaodong.liu@intel.com>
CC: Megha Dey <megha.dey@intel.com>
CC: linux-crypto@vger.kernel.org
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-01-19 22:33:04 +01:00
.section .rodata .cst16 .POLY2 , " aM" , @progbits, 16
.align 16
2013-12-11 14:28:41 -08:00
POLY2 : .octa 0xC2000000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 C2 0 0 0 0 0 0
crypto: x86 - make constants readonly, allow linker to merge them
A lot of asm-optimized routines in arch/x86/crypto/ keep its
constants in .data. This is wrong, they should be on .rodata.
Mnay of these constants are the same in different modules.
For example, 128-bit shuffle mask 0x000102030405060708090A0B0C0D0E0F
exists in at least half a dozen places.
There is a way to let linker merge them and use just one copy.
The rules are as follows: mergeable objects of different sizes
should not share sections. You can't put them all in one .rodata
section, they will lose "mergeability".
GCC puts its mergeable constants in ".rodata.cstSIZE" sections,
or ".rodata.cstSIZE.<object_name>" if -fdata-sections is used.
This patch does the same:
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
It is important that all data in such section consists of
16-byte elements, not larger ones, and there are no implicit
use of one element from another.
When this is not the case, use non-mergeable section:
.section .rodata[.VAR_NAME], "a", @progbits
This reduces .data by ~15 kbytes:
text data bss dec hex filename
11097415 2705840 2630712 16433967 fac32f vmlinux-prev.o
11112095 2690672 2630712 16433479 fac147 vmlinux.o
Merged objects are visible in System.map:
ffffffff81a28810 r POLY
ffffffff81a28810 r POLY
ffffffff81a28820 r TWOONE
ffffffff81a28820 r TWOONE
ffffffff81a28830 r PSHUFFLE_BYTE_FLIP_MASK <- merged regardless of
ffffffff81a28830 r SHUF_MASK <------------- the name difference
ffffffff81a28830 r SHUF_MASK
ffffffff81a28830 r SHUF_MASK
..
ffffffff81a28d00 r K512 <- merged three identical 640-byte tables
ffffffff81a28d00 r K512
ffffffff81a28d00 r K512
Use of object names in section name suffixes is not strictly necessary,
but might help if someday link stage will use garbage collection
to eliminate unused sections (ld --gc-sections).
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Josh Poimboeuf <jpoimboe@redhat.com>
CC: Xiaodong Liu <xiaodong.liu@intel.com>
CC: Megha Dey <megha.dey@intel.com>
CC: linux-crypto@vger.kernel.org
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-01-19 22:33:04 +01:00
.section .rodata .cst16 .TWOONE , " aM" , @progbits, 16
.align 16
TWOONE : .octa 0x00000001 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
2013-12-11 14:28:41 -08:00
crypto: x86 - make constants readonly, allow linker to merge them
A lot of asm-optimized routines in arch/x86/crypto/ keep its
constants in .data. This is wrong, they should be on .rodata.
Mnay of these constants are the same in different modules.
For example, 128-bit shuffle mask 0x000102030405060708090A0B0C0D0E0F
exists in at least half a dozen places.
There is a way to let linker merge them and use just one copy.
The rules are as follows: mergeable objects of different sizes
should not share sections. You can't put them all in one .rodata
section, they will lose "mergeability".
GCC puts its mergeable constants in ".rodata.cstSIZE" sections,
or ".rodata.cstSIZE.<object_name>" if -fdata-sections is used.
This patch does the same:
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
It is important that all data in such section consists of
16-byte elements, not larger ones, and there are no implicit
use of one element from another.
When this is not the case, use non-mergeable section:
.section .rodata[.VAR_NAME], "a", @progbits
This reduces .data by ~15 kbytes:
text data bss dec hex filename
11097415 2705840 2630712 16433967 fac32f vmlinux-prev.o
11112095 2690672 2630712 16433479 fac147 vmlinux.o
Merged objects are visible in System.map:
ffffffff81a28810 r POLY
ffffffff81a28810 r POLY
ffffffff81a28820 r TWOONE
ffffffff81a28820 r TWOONE
ffffffff81a28830 r PSHUFFLE_BYTE_FLIP_MASK <- merged regardless of
ffffffff81a28830 r SHUF_MASK <------------- the name difference
ffffffff81a28830 r SHUF_MASK
ffffffff81a28830 r SHUF_MASK
..
ffffffff81a28d00 r K512 <- merged three identical 640-byte tables
ffffffff81a28d00 r K512
ffffffff81a28d00 r K512
Use of object names in section name suffixes is not strictly necessary,
but might help if someday link stage will use garbage collection
to eliminate unused sections (ld --gc-sections).
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Josh Poimboeuf <jpoimboe@redhat.com>
CC: Xiaodong Liu <xiaodong.liu@intel.com>
CC: Megha Dey <megha.dey@intel.com>
CC: linux-crypto@vger.kernel.org
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-01-19 22:33:04 +01:00
.section .rodata .cst16 .SHUF_MASK , " aM" , @progbits, 16
.align 16
2013-12-11 14:28:41 -08:00
SHUF_MASK : .octa 0x00010203 0 4 0 5 0 6 0 7 0 8 0 9 0 A0 B 0 C 0 D 0 E 0 F
crypto: x86 - make constants readonly, allow linker to merge them
A lot of asm-optimized routines in arch/x86/crypto/ keep its
constants in .data. This is wrong, they should be on .rodata.
Mnay of these constants are the same in different modules.
For example, 128-bit shuffle mask 0x000102030405060708090A0B0C0D0E0F
exists in at least half a dozen places.
There is a way to let linker merge them and use just one copy.
The rules are as follows: mergeable objects of different sizes
should not share sections. You can't put them all in one .rodata
section, they will lose "mergeability".
GCC puts its mergeable constants in ".rodata.cstSIZE" sections,
or ".rodata.cstSIZE.<object_name>" if -fdata-sections is used.
This patch does the same:
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
It is important that all data in such section consists of
16-byte elements, not larger ones, and there are no implicit
use of one element from another.
When this is not the case, use non-mergeable section:
.section .rodata[.VAR_NAME], "a", @progbits
This reduces .data by ~15 kbytes:
text data bss dec hex filename
11097415 2705840 2630712 16433967 fac32f vmlinux-prev.o
11112095 2690672 2630712 16433479 fac147 vmlinux.o
Merged objects are visible in System.map:
ffffffff81a28810 r POLY
ffffffff81a28810 r POLY
ffffffff81a28820 r TWOONE
ffffffff81a28820 r TWOONE
ffffffff81a28830 r PSHUFFLE_BYTE_FLIP_MASK <- merged regardless of
ffffffff81a28830 r SHUF_MASK <------------- the name difference
ffffffff81a28830 r SHUF_MASK
ffffffff81a28830 r SHUF_MASK
..
ffffffff81a28d00 r K512 <- merged three identical 640-byte tables
ffffffff81a28d00 r K512
ffffffff81a28d00 r K512
Use of object names in section name suffixes is not strictly necessary,
but might help if someday link stage will use garbage collection
to eliminate unused sections (ld --gc-sections).
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Josh Poimboeuf <jpoimboe@redhat.com>
CC: Xiaodong Liu <xiaodong.liu@intel.com>
CC: Megha Dey <megha.dey@intel.com>
CC: linux-crypto@vger.kernel.org
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-01-19 22:33:04 +01:00
.section .rodata .cst16 .ONE , " aM" , @progbits, 16
.align 16
2013-12-11 14:28:41 -08:00
ONE : .octa 0x00000000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
crypto: x86 - make constants readonly, allow linker to merge them
A lot of asm-optimized routines in arch/x86/crypto/ keep its
constants in .data. This is wrong, they should be on .rodata.
Mnay of these constants are the same in different modules.
For example, 128-bit shuffle mask 0x000102030405060708090A0B0C0D0E0F
exists in at least half a dozen places.
There is a way to let linker merge them and use just one copy.
The rules are as follows: mergeable objects of different sizes
should not share sections. You can't put them all in one .rodata
section, they will lose "mergeability".
GCC puts its mergeable constants in ".rodata.cstSIZE" sections,
or ".rodata.cstSIZE.<object_name>" if -fdata-sections is used.
This patch does the same:
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
It is important that all data in such section consists of
16-byte elements, not larger ones, and there are no implicit
use of one element from another.
When this is not the case, use non-mergeable section:
.section .rodata[.VAR_NAME], "a", @progbits
This reduces .data by ~15 kbytes:
text data bss dec hex filename
11097415 2705840 2630712 16433967 fac32f vmlinux-prev.o
11112095 2690672 2630712 16433479 fac147 vmlinux.o
Merged objects are visible in System.map:
ffffffff81a28810 r POLY
ffffffff81a28810 r POLY
ffffffff81a28820 r TWOONE
ffffffff81a28820 r TWOONE
ffffffff81a28830 r PSHUFFLE_BYTE_FLIP_MASK <- merged regardless of
ffffffff81a28830 r SHUF_MASK <------------- the name difference
ffffffff81a28830 r SHUF_MASK
ffffffff81a28830 r SHUF_MASK
..
ffffffff81a28d00 r K512 <- merged three identical 640-byte tables
ffffffff81a28d00 r K512
ffffffff81a28d00 r K512
Use of object names in section name suffixes is not strictly necessary,
but might help if someday link stage will use garbage collection
to eliminate unused sections (ld --gc-sections).
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Josh Poimboeuf <jpoimboe@redhat.com>
CC: Xiaodong Liu <xiaodong.liu@intel.com>
CC: Megha Dey <megha.dey@intel.com>
CC: linux-crypto@vger.kernel.org
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-01-19 22:33:04 +01:00
.section .rodata .cst16 .ONEf , " aM" , @progbits, 16
.align 16
2013-12-11 14:28:41 -08:00
ONEf : .octa 0x01000000 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
crypto: x86 - make constants readonly, allow linker to merge them
A lot of asm-optimized routines in arch/x86/crypto/ keep its
constants in .data. This is wrong, they should be on .rodata.
Mnay of these constants are the same in different modules.
For example, 128-bit shuffle mask 0x000102030405060708090A0B0C0D0E0F
exists in at least half a dozen places.
There is a way to let linker merge them and use just one copy.
The rules are as follows: mergeable objects of different sizes
should not share sections. You can't put them all in one .rodata
section, they will lose "mergeability".
GCC puts its mergeable constants in ".rodata.cstSIZE" sections,
or ".rodata.cstSIZE.<object_name>" if -fdata-sections is used.
This patch does the same:
.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
It is important that all data in such section consists of
16-byte elements, not larger ones, and there are no implicit
use of one element from another.
When this is not the case, use non-mergeable section:
.section .rodata[.VAR_NAME], "a", @progbits
This reduces .data by ~15 kbytes:
text data bss dec hex filename
11097415 2705840 2630712 16433967 fac32f vmlinux-prev.o
11112095 2690672 2630712 16433479 fac147 vmlinux.o
Merged objects are visible in System.map:
ffffffff81a28810 r POLY
ffffffff81a28810 r POLY
ffffffff81a28820 r TWOONE
ffffffff81a28820 r TWOONE
ffffffff81a28830 r PSHUFFLE_BYTE_FLIP_MASK <- merged regardless of
ffffffff81a28830 r SHUF_MASK <------------- the name difference
ffffffff81a28830 r SHUF_MASK
ffffffff81a28830 r SHUF_MASK
..
ffffffff81a28d00 r K512 <- merged three identical 640-byte tables
ffffffff81a28d00 r K512
ffffffff81a28d00 r K512
Use of object names in section name suffixes is not strictly necessary,
but might help if someday link stage will use garbage collection
to eliminate unused sections (ld --gc-sections).
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Josh Poimboeuf <jpoimboe@redhat.com>
CC: Xiaodong Liu <xiaodong.liu@intel.com>
CC: Megha Dey <megha.dey@intel.com>
CC: linux-crypto@vger.kernel.org
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2017-01-19 22:33:04 +01:00
# order o f t h e s e c o n s t a n t s s h o u l d n o t c h a n g e .
# more s p e c i f i c a l l y , A L L _ F s h o u l d f o l l o w S H I F T _ M A S K , a n d z e r o s h o u l d f o l l o w A L L _ F
.section .rodata , " a" , @progbits
.align 16
SHIFT_MASK : .octa 0x0f0e0d0c 0 b0 a09 0 8 0 7 0 6 0 5 0 4 0 3 0 2 0 1 0 0
ALL_F : .octa 0xffffffff ffffffffffffffffffffffff
.octa 0x00000000000000000000000000000000
2017-04-28 18:11:58 +02:00
.section .rodata
.align 16
.type aad_ s h i f t _ a r r , @object
.size aad_ s h i f t _ a r r , 2 7 2
aad_shift_arr :
.octa 0xffffffffffffffffffffffffffffffff
.octa 0xffffffffffffffffffffffffffffff0C
.octa 0xffffffffffffffffffffffffffff0D0C
.octa 0xffffffffffffffffffffffffff0E0D0C
.octa 0xffffffffffffffffffffffff0F0E0D0C
.octa 0xffffffffffffffffffffff0C0B0A0908
.octa 0xffffffffffffffffffff0D0C0B0A0908
.octa 0xffffffffffffffffff0E0D0C0B0A0908
.octa 0xffffffffffffffff0F0E0D0C0B0A0908
.octa 0xffffffffffffff0C0B0A090807060504
.octa 0xffffffffffff0D0C0B0A090807060504
.octa 0xffffffffff0E0D0C0B0A090807060504
.octa 0xffffffff0F0E0D0C0B0A090807060504
.octa 0xffffff0C0B0A09080706050403020100
.octa 0xffff0D0C0B0A09080706050403020100
.octa 0xff0E0D0C0B0A09080706050403020100
.octa 0x0F0E0D0C0B0A09080706050403020100
2013-12-11 14:28:41 -08:00
.text
2018-12-10 19:57:00 +00:00
HashKey = 1 6 * 6 # s t o r e H a s h K e y < < 1 m o d p o l y h e r e
HashKey_ 2 = 1 6 * 7 # s t o r e H a s h K e y ^ 2 < < 1 m o d p o l y h e r e
HashKey_ 3 = 1 6 * 8 # s t o r e H a s h K e y ^ 3 < < 1 m o d p o l y h e r e
HashKey_ 4 = 1 6 * 9 # s t o r e H a s h K e y ^ 4 < < 1 m o d p o l y h e r e
HashKey_ 5 = 1 6 * 1 0 # s t o r e H a s h K e y ^ 5 < < 1 m o d p o l y h e r e
HashKey_ 6 = 1 6 * 1 1 # s t o r e H a s h K e y ^ 6 < < 1 m o d p o l y h e r e
HashKey_ 7 = 1 6 * 1 2 # s t o r e H a s h K e y ^ 7 < < 1 m o d p o l y h e r e
HashKey_ 8 = 1 6 * 1 3 # s t o r e H a s h K e y ^ 8 < < 1 m o d p o l y h e r e
HashKey_ k = 1 6 * 1 4 # s t o r e X O R o f H a s h K e y < < 1 m o d p o l y h e r e ( f o r K a r a t s u b a p u r p o s e s )
HashKey_ 2 _ k = 1 6 * 1 5 # s t o r e X O R o f H a s h K e y ^ 2 < < 1 m o d p o l y h e r e ( f o r K a r a t s u b a p u r p o s e s )
HashKey_ 3 _ k = 1 6 * 1 6 # s t o r e X O R o f H a s h K e y ^ 3 < < 1 m o d p o l y h e r e ( f o r K a r a t s u b a p u r p o s e s )
HashKey_ 4 _ k = 1 6 * 1 7 # s t o r e X O R o f H a s h K e y ^ 4 < < 1 m o d p o l y h e r e ( f o r K a r a t s u b a p u r p o s e s )
HashKey_ 5 _ k = 1 6 * 1 8 # s t o r e X O R o f H a s h K e y ^ 5 < < 1 m o d p o l y h e r e ( f o r K a r a t s u b a p u r p o s e s )
HashKey_ 6 _ k = 1 6 * 1 9 # s t o r e X O R o f H a s h K e y ^ 6 < < 1 m o d p o l y h e r e ( f o r K a r a t s u b a p u r p o s e s )
HashKey_ 7 _ k = 1 6 * 2 0 # s t o r e X O R o f H a s h K e y ^ 7 < < 1 m o d p o l y h e r e ( f o r K a r a t s u b a p u r p o s e s )
HashKey_ 8 _ k = 1 6 * 2 1 # s t o r e X O R o f H a s h K e y ^ 8 < < 1 m o d p o l y h e r e ( f o r K a r a t s u b a p u r p o s e s )
2013-12-11 14:28:41 -08:00
# define a r g 1 % r d i
# define a r g 2 % r s i
# define a r g 3 % r d x
# define a r g 4 % r c x
# define a r g 5 % r8
# define a r g 6 % r9
# define a r g 7 S T A C K _ O F F S E T + 8 * 1 ( % r14 )
# define a r g 8 S T A C K _ O F F S E T + 8 * 2 ( % r14 )
# define a r g 9 S T A C K _ O F F S E T + 8 * 3 ( % r14 )
2018-12-10 19:57:00 +00:00
# define a r g 1 0 S T A C K _ O F F S E T + 8 * 4 ( % r14 )
2018-12-10 19:57:36 +00:00
# define k e y s i z e 2 * 1 5 * 1 6 ( a r g 1 )
2013-12-11 14:28:41 -08:00
i = 0
j = 0
out_ o r d e r = 0
in_ o r d e r = 1
DEC = 0
ENC = 1
.macro define_reg r n
reg_ \ r = % x m m \ n
.endm
.macro setreg
.altmacro
define_ r e g i % i
define_ r e g j % j
.noaltmacro
.endm
# need t o p u s h 4 r e g i s t e r s i n t o s t a c k t o m a i n t a i n
STACK_ O F F S E T = 8 * 4
TMP1 = 1 6 * 0 # T e m p o r a r y s t o r a g e f o r A A D
TMP2 = 1 6 * 1 # T e m p o r a r y s t o r a g e f o r A E S S t a t e 2 ( S t a t e 1 i s s t o r e d i n a n X M M r e g i s t e r )
TMP3 = 1 6 * 2 # T e m p o r a r y s t o r a g e f o r A E S S t a t e 3
TMP4 = 1 6 * 3 # T e m p o r a r y s t o r a g e f o r A E S S t a t e 4
TMP5 = 1 6 * 4 # T e m p o r a r y s t o r a g e f o r A E S S t a t e 5
TMP6 = 1 6 * 5 # T e m p o r a r y s t o r a g e f o r A E S S t a t e 6
TMP7 = 1 6 * 6 # T e m p o r a r y s t o r a g e f o r A E S S t a t e 7
TMP8 = 1 6 * 7 # T e m p o r a r y s t o r a g e f o r A E S S t a t e 8
VARIABLE_ O F F S E T = 1 6 * 8
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Utility M a c r o s
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2018-12-10 19:57:12 +00:00
.macro FUNC_SAVE
# the n u m b e r o f p u s h e s m u s t e q u a l S T A C K _ O F F S E T
push % r12
push % r13
push % r14
push % r15
mov % r s p , % r14
sub $ V A R I A B L E _ O F F S E T , % r s p
and $ ~ 6 3 , % r s p # a l i g n r s p t o 64 b y t e s
.endm
.macro FUNC_RESTORE
mov % r14 , % r s p
pop % r15
pop % r14
pop % r13
pop % r12
.endm
2013-12-11 14:28:41 -08:00
# Encryption o f a s i n g l e b l o c k
2018-12-10 19:57:36 +00:00
.macro ENCRYPT_SINGLE_BLOCK REP X M M 0
2013-12-11 14:28:41 -08:00
vpxor ( a r g 1 ) , \ X M M 0 , \ X M M 0
2018-12-10 19:57:36 +00:00
i = 1
setreg
.rep \ REP
2013-12-11 14:28:41 -08:00
vaesenc 1 6 * i ( a r g 1 ) , \ X M M 0 , \ X M M 0
2018-12-10 19:57:36 +00:00
i = ( i + 1 )
setreg
2013-12-11 14:28:41 -08:00
.endr
2018-12-10 19:57:36 +00:00
vaesenclast 1 6 * i ( a r g 1 ) , \ X M M 0 , \ X M M 0
2013-12-11 14:28:41 -08:00
.endm
2018-12-10 19:56:45 +00:00
# combined f o r G C M e n c r y p t a n d d e c r y p t f u n c t i o n s
# clobbering a l l x m m r e g i s t e r s
# clobbering r10 , r11 , r12 , r13 , r14 , r15
2018-12-10 19:57:36 +00:00
.macro GCM_ENC_DEC INITIAL_ B L O C K S G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L G H A S H _ L A S T _ 8 G H A S H _ M U L E N C _ D E C R E P
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y ( a r g 2 ) , % x m m 1 3 # x m m 13 = H a s h K e y
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
mov a r g 5 , % r13 # s a v e t h e n u m b e r o f b y t e s o f p l a i n t e x t / c i p h e r t e x t
2018-12-10 19:56:45 +00:00
and $ - 1 6 , % r13 # r 13 = r13 - ( r13 m o d 1 6 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
mov % r13 , % r12
shr $ 4 , % r12
and $ 7 , % r12
jz _ i n i t i a l _ n u m _ b l o c k s _ i s _ 0 \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
cmp $ 7 , % r12
je _ i n i t i a l _ n u m _ b l o c k s _ i s _ 7 \ @
cmp $ 6 , % r12
je _ i n i t i a l _ n u m _ b l o c k s _ i s _ 6 \ @
cmp $ 5 , % r12
je _ i n i t i a l _ n u m _ b l o c k s _ i s _ 5 \ @
cmp $ 4 , % r12
je _ i n i t i a l _ n u m _ b l o c k s _ i s _ 4 \ @
cmp $ 3 , % r12
je _ i n i t i a l _ n u m _ b l o c k s _ i s _ 3 \ @
cmp $ 2 , % r12
je _ i n i t i a l _ n u m _ b l o c k s _ i s _ 2 \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
jmp _ i n i t i a l _ n u m _ b l o c k s _ i s _ 1 \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ initial_ n u m _ b l o c k s _ i s _ 7 \ @:
2018-12-10 19:57:36 +00:00
\ INITIAL_ B L O C K S \ R E P , 7 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 1 5 , % x m m 1 1 , % x m m 9 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8 , % x m m 1 0 , % x m m 0 , \ E N C _ D E C
2018-12-10 19:56:45 +00:00
sub $ 1 6 * 7 , % r13
jmp _ i n i t i a l _ b l o c k s _ e n c r y p t e d \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ initial_ n u m _ b l o c k s _ i s _ 6 \ @:
2018-12-10 19:57:36 +00:00
\ INITIAL_ B L O C K S \ R E P , 6 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 1 5 , % x m m 1 1 , % x m m 9 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8 , % x m m 1 0 , % x m m 0 , \ E N C _ D E C
2018-12-10 19:56:45 +00:00
sub $ 1 6 * 6 , % r13
jmp _ i n i t i a l _ b l o c k s _ e n c r y p t e d \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ initial_ n u m _ b l o c k s _ i s _ 5 \ @:
2018-12-10 19:57:36 +00:00
\ INITIAL_ B L O C K S \ R E P , 5 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 1 5 , % x m m 1 1 , % x m m 9 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8 , % x m m 1 0 , % x m m 0 , \ E N C _ D E C
2018-12-10 19:56:45 +00:00
sub $ 1 6 * 5 , % r13
jmp _ i n i t i a l _ b l o c k s _ e n c r y p t e d \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ initial_ n u m _ b l o c k s _ i s _ 4 \ @:
2018-12-10 19:57:36 +00:00
\ INITIAL_ B L O C K S \ R E P , 4 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 1 5 , % x m m 1 1 , % x m m 9 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8 , % x m m 1 0 , % x m m 0 , \ E N C _ D E C
2018-12-10 19:56:45 +00:00
sub $ 1 6 * 4 , % r13
jmp _ i n i t i a l _ b l o c k s _ e n c r y p t e d \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ initial_ n u m _ b l o c k s _ i s _ 3 \ @:
2018-12-10 19:57:36 +00:00
\ INITIAL_ B L O C K S \ R E P , 3 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 1 5 , % x m m 1 1 , % x m m 9 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8 , % x m m 1 0 , % x m m 0 , \ E N C _ D E C
2018-12-10 19:56:45 +00:00
sub $ 1 6 * 3 , % r13
jmp _ i n i t i a l _ b l o c k s _ e n c r y p t e d \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ initial_ n u m _ b l o c k s _ i s _ 2 \ @:
2018-12-10 19:57:36 +00:00
\ INITIAL_ B L O C K S \ R E P , 2 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 1 5 , % x m m 1 1 , % x m m 9 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8 , % x m m 1 0 , % x m m 0 , \ E N C _ D E C
2018-12-10 19:56:45 +00:00
sub $ 1 6 * 2 , % r13
jmp _ i n i t i a l _ b l o c k s _ e n c r y p t e d \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ initial_ n u m _ b l o c k s _ i s _ 1 \ @:
2018-12-10 19:57:36 +00:00
\ INITIAL_ B L O C K S \ R E P , 1 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 1 5 , % x m m 1 1 , % x m m 9 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8 , % x m m 1 0 , % x m m 0 , \ E N C _ D E C
2018-12-10 19:56:45 +00:00
sub $ 1 6 * 1 , % r13
jmp _ i n i t i a l _ b l o c k s _ e n c r y p t e d \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ initial_ n u m _ b l o c k s _ i s _ 0 \ @:
2018-12-10 19:57:36 +00:00
\ INITIAL_ B L O C K S \ R E P , 0 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 1 5 , % x m m 1 1 , % x m m 9 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8 , % x m m 1 0 , % x m m 0 , \ E N C _ D E C
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ initial_ b l o c k s _ e n c r y p t e d \ @:
cmp $ 0 , % r13
je _ z e r o _ c i p h e r _ l e f t \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
sub $ 1 2 8 , % r13
je _ e i g h t _ c i p h e r _ l e f t \ @
2013-12-11 14:28:41 -08:00
2017-04-28 18:11:58 +02:00
2018-12-10 19:56:45 +00:00
vmovd % x m m 9 , % r15 d
and $ 2 5 5 , % r15 d
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 9 , % x m m 9
2017-04-28 18:11:58 +02:00
2018-12-10 19:56:45 +00:00
_ encrypt_ b y _ 8 _ n e w \ @:
cmp $ ( 2 5 5 - 8 ) , % r15 d
jg _ e n c r y p t _ b y _ 8 \ @
2017-04-28 18:11:58 +02:00
2018-12-10 19:56:45 +00:00
add $ 8 , % r15 b
2018-12-10 19:57:36 +00:00
\ GHASH_ 8 _ E N C R Y P T _ 8 _ P A R A L L E L \ R E P , % x m m 0 , % x m m 1 0 , % x m m 1 1 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 9 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8 , % x m m 1 5 , o u t _ o r d e r , \ E N C _ D E C
2018-12-10 19:56:45 +00:00
add $ 1 2 8 , % r11
sub $ 1 2 8 , % r13
jne _ e n c r y p t _ b y _ 8 _ n e w \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 9 , % x m m 9
jmp _ e i g h t _ c i p h e r _ l e f t \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ encrypt_ b y _ 8 \ @:
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 9 , % x m m 9
add $ 8 , % r15 b
2018-12-10 19:57:36 +00:00
\ GHASH_ 8 _ E N C R Y P T _ 8 _ P A R A L L E L \ R E P , % x m m 0 , % x m m 1 0 , % x m m 1 1 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 9 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8 , % x m m 1 5 , i n _ o r d e r , \ E N C _ D E C
2018-12-10 19:56:45 +00:00
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 9 , % x m m 9
add $ 1 2 8 , % r11
sub $ 1 2 8 , % r13
jne _ e n c r y p t _ b y _ 8 _ n e w \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 9 , % x m m 9
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ eight_ c i p h e r _ l e f t \ @:
\ GHASH_ L A S T _ 8 % x m m 0 , % x m m 1 0 , % x m m 1 1 , % x m m 1 2 , % x m m 1 3 , % x m m 1 4 , % x m m 1 5 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5 , % x m m 6 , % x m m 7 , % x m m 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ zero_ c i p h e r _ l e f t \ @:
2018-12-10 19:57:00 +00:00
cmp $ 1 6 , a r g 5
2018-12-10 19:56:45 +00:00
jl _ o n l y _ l e s s _ t h a n _ 1 6 \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
mov a r g 5 , % r13
and $ 1 5 , % r13 # r 13 = ( a r g 5 m o d 1 6 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
je _ m u l t i p l e _ o f _ 1 6 _ b y t e s \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# handle t h e l a s t < 1 6 B y t e b l o c k s e p e r a t e l y
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpaddd O N E ( % r i p ) , % x m m 9 , % x m m 9 # I N C R C N T t o g e t Y n
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 9 , % x m m 9
2018-12-10 19:57:36 +00:00
ENCRYPT_ S I N G L E _ B L O C K \ R E P , % x m m 9 # E ( K , Y n )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
sub $ 1 6 , % r11
add % r13 , % r11
2018-12-10 19:57:00 +00:00
vmovdqu ( a r g 4 , % r11 ) , % x m m 1 # r e c e i v e t h e l a s t < 16 B y t e b l o c k
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
lea S H I F T _ M A S K + 1 6 ( % r i p ) , % r12
sub % r13 , % r12 # a d j u s t t h e s h u f f l e m a s k p o i n t e r t o b e
# able t o s h i f t 1 6 - r13 b y t e s ( r13 i s t h e
# number o f b y t e s i n p l a i n t e x t m o d 1 6 )
vmovdqu ( % r12 ) , % x m m 2 # g e t t h e a p p r o p r i a t e s h u f f l e m a s k
vpshufb % x m m 2 , % x m m 1 , % x m m 1 # s h i f t r i g h t 16 - r13 b y t e s
jmp _ f i n a l _ g h a s h _ m u l \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ only_ l e s s _ t h a n _ 1 6 \ @:
# check f o r 0 l e n g t h
2018-12-10 19:57:00 +00:00
mov a r g 5 , % r13
and $ 1 5 , % r13 # r 13 = ( a r g 5 m o d 1 6 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
je _ m u l t i p l e _ o f _ 1 6 _ b y t e s \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# handle t h e l a s t < 1 6 B y t e b l o c k s e p a r a t e l y
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpaddd O N E ( % r i p ) , % x m m 9 , % x m m 9 # I N C R C N T t o g e t Y n
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 9 , % x m m 9
2018-12-10 19:57:36 +00:00
ENCRYPT_ S I N G L E _ B L O C K \ R E P , % x m m 9 # E ( K , Y n )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
lea S H I F T _ M A S K + 1 6 ( % r i p ) , % r12
sub % r13 , % r12 # a d j u s t t h e s h u f f l e m a s k p o i n t e r t o b e
# able t o s h i f t 1 6 - r13 b y t e s ( r13 i s t h e
# number o f b y t e s i n p l a i n t e x t m o d 1 6 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ get_ l a s t _ 1 6 _ b y t e _ l o o p \ @:
2018-12-10 19:57:00 +00:00
movb ( a r g 4 , % r11 ) , % a l
2018-12-10 19:56:45 +00:00
movb % a l , T M P 1 ( % r s p , % r11 )
add $ 1 , % r11
cmp % r13 , % r11
jne _ g e t _ l a s t _ 1 6 _ b y t e _ l o o p \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu T M P 1 ( % r s p ) , % x m m 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
sub $ 1 6 , % r11
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ final_ g h a s h _ m u l \ @:
.if \ ENC_ D E C = = D E C
vmovdqa % x m m 1 , % x m m 2
vpxor % x m m 1 , % x m m 9 , % x m m 9 # P l a i n t e x t X O R E ( K , Y n )
vmovdqu A L L _ F - S H I F T _ M A S K ( % r12 ) , % x m m 1 # g e t t h e a p p r o p r i a t e m a s k t o
# mask o u t t o p 1 6 - r13 b y t e s o f x m m 9
vpand % x m m 1 , % x m m 9 , % x m m 9 # m a s k o u t t o p 16 - r13 b y t e s o f x m m 9
vpand % x m m 1 , % x m m 2 , % x m m 2
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 2 , % x m m 2
vpxor % x m m 2 , % x m m 1 4 , % x m m 1 4
# GHASH c o m p u t a t i o n f o r t h e l a s t < 1 6 B y t e b l o c k
\ GHASH_ M U L % x m m 1 4 , % x m m 1 3 , % x m m 0 , % x m m 1 0 , % x m m 1 1 , % x m m 5 , % x m m 6
sub % r13 , % r11
add $ 1 6 , % r11
.else
vpxor % x m m 1 , % x m m 9 , % x m m 9 # P l a i n t e x t X O R E ( K , Y n )
vmovdqu A L L _ F - S H I F T _ M A S K ( % r12 ) , % x m m 1 # g e t t h e a p p r o p r i a t e m a s k t o
# mask o u t t o p 1 6 - r13 b y t e s o f x m m 9
vpand % x m m 1 , % x m m 9 , % x m m 9 # m a s k o u t t o p 16 - r13 b y t e s o f x m m 9
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 9 , % x m m 9
vpxor % x m m 9 , % x m m 1 4 , % x m m 1 4
# GHASH c o m p u t a t i o n f o r t h e l a s t < 1 6 B y t e b l o c k
\ GHASH_ M U L % x m m 1 4 , % x m m 1 3 , % x m m 0 , % x m m 1 0 , % x m m 1 1 , % x m m 5 , % x m m 6
sub % r13 , % r11
add $ 1 6 , % r11
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 9 , % x m m 9 # s h u f f l e x m m 9 b a c k t o o u t p u t a s c i p h e r t e x t
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# output r13 B y t e s
vmovq % x m m 9 , % r a x
cmp $ 8 , % r13
jle _ l e s s _ t h a n _ 8 _ b y t e s _ l e f t \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
mov % r a x , ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
add $ 8 , % r11
vpsrldq $ 8 , % x m m 9 , % x m m 9
vmovq % x m m 9 , % r a x
sub $ 8 , % r13
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ less_ t h a n _ 8 _ b y t e s _ l e f t \ @:
2018-12-10 19:57:00 +00:00
movb % a l , ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
add $ 1 , % r11
shr $ 8 , % r a x
sub $ 1 , % r13
jne _ l e s s _ t h a n _ 8 _ b y t e s _ l e f t \ @
# # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ multiple_ o f _ 1 6 _ b y t e s \ @:
2018-12-10 19:57:00 +00:00
mov a r g 8 , % r12 # r 12 = a a d L e n ( n u m b e r o f b y t e s )
2018-12-10 19:56:45 +00:00
shl $ 3 , % r12 # c o n v e r t i n t o n u m b e r o f b i t s
vmovd % r12 d , % x m m 1 5 # l e n ( A ) i n x m m 15
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
shl $ 3 , a r g 5 # l e n ( C ) i n b i t s ( * 128 )
vmovq a r g 5 , % x m m 1
2018-12-10 19:56:45 +00:00
vpslldq $ 8 , % x m m 1 5 , % x m m 1 5 # x m m 15 = l e n ( A ) | | 0 x00 0 0 0 0 0 0 0 0 0 0 0 0 0 0
vpxor % x m m 1 , % x m m 1 5 , % x m m 1 5 # x m m 15 = l e n ( A ) | | l e n ( C )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor % x m m 1 5 , % x m m 1 4 , % x m m 1 4
\ GHASH_ M U L % x m m 1 4 , % x m m 1 3 , % x m m 0 , % x m m 1 0 , % x m m 1 1 , % x m m 5 , % x m m 6 # f i n a l G H A S H c o m p u t a t i o n
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 1 4 , % x m m 1 4 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
mov a r g 6 , % r a x # r a x = * Y 0
2018-12-10 19:56:45 +00:00
vmovdqu ( % r a x ) , % x m m 9 # x m m 9 = Y 0
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:36 +00:00
ENCRYPT_ S I N G L E _ B L O C K \ R E P , % x m m 9 # E ( K , Y 0 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor % x m m 1 4 , % x m m 9 , % x m m 9
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ return_ T \ @:
2018-12-10 19:57:00 +00:00
mov a r g 9 , % r10 # r 10 = a u t h T a g
mov a r g 1 0 , % r11 # r 11 = a u t h _ t a g _ l e n
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
cmp $ 1 6 , % r11
je _ T _ 1 6 \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
cmp $ 8 , % r11
jl _ T _ 4 \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ T_ 8 \ @:
vmovq % x m m 9 , % r a x
mov % r a x , ( % r10 )
add $ 8 , % r10
sub $ 8 , % r11
vpsrldq $ 8 , % x m m 9 , % x m m 9
cmp $ 0 , % r11
je _ r e t u r n _ T _ d o n e \ @
_ T_ 4 \ @:
vmovd % x m m 9 , % e a x
mov % e a x , ( % r10 )
add $ 4 , % r10
sub $ 4 , % r11
vpsrldq $ 4 , % x m m 9 , % x m m 9
cmp $ 0 , % r11
je _ r e t u r n _ T _ d o n e \ @
_ T_ 1 2 3 \ @:
vmovd % x m m 9 , % e a x
cmp $ 2 , % r11
jl _ T _ 1 \ @
mov % a x , ( % r10 )
cmp $ 2 , % r11
je _ r e t u r n _ T _ d o n e \ @
add $ 2 , % r10
sar $ 1 6 , % e a x
_ T_ 1 \ @:
mov % a l , ( % r10 )
jmp _ r e t u r n _ T _ d o n e \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ T_ 1 6 \ @:
vmovdqu % x m m 9 , ( % r10 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ return_ T _ d o n e \ @:
.endm
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# ifdef C O N F I G _ A S _ A V X
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# GHASH_ M U L M A C R O t o i m p l e m e n t : D a t a * H a s h K e y m o d ( 1 2 8 ,1 2 7 ,1 2 6 ,1 2 1 ,0 )
# Input : A a n d B ( 1 2 8 - b i t s e a c h , b i t - r e f l e c t e d )
# Output : C = A * B * x m o d p o l y , ( i . e . > > 1 )
# To c o m p u t e G H = G H * H a s h K e y m o d p o l y , g i v e H K = H a s h K e y < < 1 m o d p o l y a s i n p u t
# GH = G H * H K * x m o d p o l y w h i c h i s e q u i v a l e n t t o G H * H a s h K e y m o d p o l y .
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
.macro GHASH_MUL_AVX GH H K T 1 T 2 T 3 T 4 T 5
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ G H , \ T 2
vpshufd $ 0 b01 0 0 1 1 1 0 , \ H K , \ T 3
vpxor \ G H , \ T 2 , \ T 2 # T 2 = ( a1 + a0 )
vpxor \ H K , \ T 3 , \ T 3 # T 3 = ( b1 + b0 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ H K , \ G H , \ T 1 # T 1 = a1 * b1
vpclmulqdq $ 0 x00 , \ H K , \ G H , \ G H # G H = a 0 * b0
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2 # T 2 = ( a1 + a0 ) * ( b1 + b0 )
vpxor \ G H , \ T 2 ,\ T 2
vpxor \ T 1 , \ T 2 ,\ T 2 # T 2 = a0 * b1 + a1 * b0
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpslldq $ 8 , \ T 2 ,\ T 3 # s h i f t - L T 3 2 D W s
vpsrldq $ 8 , \ T 2 ,\ T 2 # s h i f t - R T 2 2 D W s
vpxor \ T 3 , \ G H , \ G H
vpxor \ T 2 , \ T 1 , \ T 1 # < T 1 : G H > = G H x H K
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# first p h a s e o f t h e r e d u c t i o n
vpslld $ 3 1 , \ G H , \ T 2 # p a c k e d r i g h t s h i f t i n g < < 31
vpslld $ 3 0 , \ G H , \ T 3 # p a c k e d r i g h t s h i f t i n g s h i f t < < 30
vpslld $ 2 5 , \ G H , \ T 4 # p a c k e d r i g h t s h i f t i n g s h i f t < < 25
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 3 , \ T 2 , \ T 2 # x o r t h e s h i f t e d v e r s i o n s
vpxor \ T 4 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpsrldq $ 4 , \ T 2 , \ T 5 # s h i f t - R T 5 1 D W
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpslldq $ 1 2 , \ T 2 , \ T 2 # s h i f t - L T 2 3 D W s
vpxor \ T 2 , \ G H , \ G H # f i r s t p h a s e o f t h e r e d u c t i o n c o m p l e t e
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# second p h a s e o f t h e r e d u c t i o n
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpsrld $ 1 ,\ G H , \ T 2 # p a c k e d l e f t s h i f t i n g > > 1
vpsrld $ 2 ,\ G H , \ T 3 # p a c k e d l e f t s h i f t i n g > > 2
vpsrld $ 7 ,\ G H , \ T 4 # p a c k e d l e f t s h i f t i n g > > 7
vpxor \ T 3 , \ T 2 , \ T 2 # x o r t h e s h i f t e d v e r s i o n s
vpxor \ T 4 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 5 , \ T 2 , \ T 2
vpxor \ T 2 , \ G H , \ G H
vpxor \ T 1 , \ G H , \ G H # t h e r e s u l t i s i n G H
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
.endm
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
.macro PRECOMPUTE_AVX HK T 1 T 2 T 3 T 4 T 5 T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# Haskey_ i _ k h o l d s X O R e d v a l u e s o f t h e l o w a n d h i g h p a r t s o f t h e H a s k e y _ i
vmovdqa \ H K , \ T 5
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 1
vpxor \ T 5 , \ T 1 , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu \ T 1 , H a s h K e y _ k ( a r g 2 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
GHASH_ M U L _ A V X \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 2 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 2 ( a r g 2 ) # [ H a s h K e y _ 2 ] = H a s h K e y ^ 2 < < 1 m o d p o l y
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 1
vpxor \ T 5 , \ T 1 , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu \ T 1 , H a s h K e y _ 2 _ k ( a r g 2 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
GHASH_ M U L _ A V X \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 3 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 3 ( a r g 2 )
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 1
vpxor \ T 5 , \ T 1 , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu \ T 1 , H a s h K e y _ 3 _ k ( a r g 2 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
GHASH_ M U L _ A V X \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 4 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 4 ( a r g 2 )
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 1
vpxor \ T 5 , \ T 1 , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu \ T 1 , H a s h K e y _ 4 _ k ( a r g 2 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
GHASH_ M U L _ A V X \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 5 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 5 ( a r g 2 )
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 1
vpxor \ T 5 , \ T 1 , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu \ T 1 , H a s h K e y _ 5 _ k ( a r g 2 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
GHASH_ M U L _ A V X \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 6 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 6 ( a r g 2 )
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 1
vpxor \ T 5 , \ T 1 , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu \ T 1 , H a s h K e y _ 6 _ k ( a r g 2 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
GHASH_ M U L _ A V X \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 7 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 7 ( a r g 2 )
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 1
vpxor \ T 5 , \ T 1 , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu \ T 1 , H a s h K e y _ 7 _ k ( a r g 2 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
GHASH_ M U L _ A V X \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 8 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 8 ( a r g 2 )
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 1
vpxor \ T 5 , \ T 1 , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu \ T 1 , H a s h K e y _ 8 _ k ( a r g 2 )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
.endm
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # if a = n u m b e r o f t o t a l p l a i n t e x t b y t e s
# # b = f l o o r ( a / 1 6 )
# # num_ i n i t i a l _ b l o c k s = b m o d 4 #
# # encrypt t h e i n i t i a l n u m _ i n i t i a l _ b l o c k s b l o c k s a n d a p p l y g h a s h o n t h e c i p h e r t e x t
# # r1 0 , r11 , r12 , r a x a r e c l o b b e r e d
2018-12-10 19:57:00 +00:00
# # arg1 , a r g 3 , a r g 4 , r14 a r e u s e d a s a p o i n t e r o n l y , n o t m o d i f i e d
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:36 +00:00
.macro INITIAL_BLOCKS_AVX REP n u m _ i n i t i a l _ b l o c k s T 1 T 2 T 3 T 4 T 5 C T R X M M 1 X M M 2 X M M 3 X M M 4 X M M 5 X M M 6 X M M 7 X M M 8 T 6 T _ k e y E N C _ D E C
2018-12-10 19:56:45 +00:00
i = ( 8 - \ n u m _ i n i t i a l _ b l o c k s )
j = 0
setreg
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
mov a r g 7 , % r10 # r 10 = A A D
mov a r g 8 , % r12 # r 12 = a a d L e n
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
mov % r12 , % r11
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor r e g _ j , r e g _ j , r e g _ j
vpxor r e g _ i , r e g _ i , r e g _ i
cmp $ 1 6 , % r11
jl _ g e t _ A A D _ r e s t 8 \ @
_ get_ A A D _ b l o c k s \ @:
vmovdqu ( % r10 ) , r e g _ i
vpshufb S H U F _ M A S K ( % r i p ) , r e g _ i , r e g _ i
vpxor r e g _ i , r e g _ j , r e g _ j
GHASH_ M U L _ A V X r e g _ j , \ T 2 , \ T 1 , \ T 3 , \ T 4 , \ T 5 , \ T 6
add $ 1 6 , % r10
sub $ 1 6 , % r12
sub $ 1 6 , % r11
cmp $ 1 6 , % r11
jge _ g e t _ A A D _ b l o c k s \ @
vmovdqu r e g _ j , r e g _ i
cmp $ 0 , % r11
je _ g e t _ A A D _ d o n e \ @
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor r e g _ i , r e g _ i , r e g _ i
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
/ * read t h e l a s t < 1 6 B o f A A D . s i n c e w e h a v e a t l e a s t 4 B o f
data r i g h t a f t e r t h e A A D ( t h e I C V , a n d m a y b e s o m e C T ) , w e c a n
read 4 B / 8 B b l o c k s s a f e l y , a n d t h e n g e t r i d o f t h e e x t r a s t u f f * /
_ get_ A A D _ r e s t 8 \ @:
cmp $ 4 , % r11
jle _ g e t _ A A D _ r e s t 4 \ @
movq ( % r10 ) , \ T 1
add $ 8 , % r10
sub $ 8 , % r11
vpslldq $ 8 , \ T 1 , \ T 1
vpsrldq $ 8 , r e g _ i , r e g _ i
vpxor \ T 1 , r e g _ i , r e g _ i
jmp _ g e t _ A A D _ r e s t 8 \ @
_ get_ A A D _ r e s t 4 \ @:
cmp $ 0 , % r11
jle _ g e t _ A A D _ r e s t 0 \ @
mov ( % r10 ) , % e a x
movq % r a x , \ T 1
add $ 4 , % r10
sub $ 4 , % r11
vpslldq $ 1 2 , \ T 1 , \ T 1
vpsrldq $ 4 , r e g _ i , r e g _ i
vpxor \ T 1 , r e g _ i , r e g _ i
_ get_ A A D _ r e s t 0 \ @:
/ * finalize : shift o u t t h e e x t r a b y t e s w e r e a d , a n d a l i g n
left. s i n c e p s l l d q c a n o n l y s h i f t b y a n i m m e d i a t e , w e u s e
vpshufb a n d a n a r r a y o f s h u f f l e m a s k s * /
movq % r12 , % r11
salq $ 4 , % r11
movdqu a a d _ s h i f t _ a r r ( % r11 ) , \ T 1
vpshufb \ T 1 , r e g _ i , r e g _ i
_ get_ A A D _ r e s t _ f i n a l \ @:
vpshufb S H U F _ M A S K ( % r i p ) , r e g _ i , r e g _ i
vpxor r e g _ j , r e g _ i , r e g _ i
GHASH_ M U L _ A V X r e g _ i , \ T 2 , \ T 1 , \ T 3 , \ T 4 , \ T 5 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ get_ A A D _ d o n e \ @:
# initialize t h e d a t a p o i n t e r o f f s e t a s z e r o
xor % r11 d , % r11 d
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# start A E S f o r n u m _ i n i t i a l _ b l o c k s b l o c k s
2018-12-10 19:57:00 +00:00
mov a r g 6 , % r a x # r a x = * Y 0
2018-12-10 19:56:45 +00:00
vmovdqu ( % r a x ) , \ C T R # C T R = Y 0
vpshufb S H U F _ M A S K ( % r i p ) , \ C T R , \ C T R
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
i = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , r e g _ i
vpshufb S H U F _ M A S K ( % r i p ) , r e g _ i , r e g _ i # p e r f o r m a 16 B y t e s w a p
i = ( i + 1 )
setreg
.endr
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa ( a r g 1 ) , \ T _ k e y
i = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
vpxor \ T _ k e y , r e g _ i , r e g _ i
i = ( i + 1 )
setreg
.endr
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:36 +00:00
j = 1
setreg
.rep \ REP
vmovdqa 1 6 * j ( a r g 1 ) , \ T _ k e y
2018-12-10 19:56:45 +00:00
i = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
vaesenc \ T _ k e y , r e g _ i , r e g _ i
2013-12-11 14:28:41 -08:00
i = ( i + 1 )
2018-12-10 19:56:45 +00:00
setreg
.endr
2018-12-10 19:57:36 +00:00
j = ( j + 1 )
setreg
2013-12-11 14:28:41 -08:00
.endr
2018-12-10 19:57:36 +00:00
vmovdqa 1 6 * j ( a r g 1 ) , \ T _ k e y
2018-12-10 19:56:45 +00:00
i = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
vaesenclast \ T _ k e y , r e g _ i , r e g _ i
i = ( i + 1 )
setreg
.endr
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
i = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
2018-12-10 19:57:00 +00:00
vmovdqu ( a r g 4 , % r11 ) , \ T 1
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , r e g _ i , r e g _ i
2018-12-10 19:57:00 +00:00
vmovdqu r e g _ i , ( a r g 3 , % r11 ) # w r i t e b a c k c i p h e r t e x t f o r n u m _ i n i t i a l _ b l o c k s b l o c k s
2018-12-10 19:56:45 +00:00
add $ 1 6 , % r11
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , r e g _ i
.endif
vpshufb S H U F _ M A S K ( % r i p ) , r e g _ i , r e g _ i # p r e p a r e c i p h e r t e x t f o r G H A S H c o m p u t a t i o n s
i = ( i + 1 )
setreg
.endr
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
i = ( 8 - \ n u m _ i n i t i a l _ b l o c k s )
j = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
.rep \ num_ i n i t i a l _ b l o c k s
vpxor r e g _ i , r e g _ j , r e g _ j
GHASH_ M U L _ A V X r e g _ j , \ T 2 , \ T 1 , \ T 3 , \ T 4 , \ T 5 , \ T 6 # a p p l y G H A S H o n n u m _ i n i t i a l _ b l o c k s b l o c k s
i = ( i + 1 )
j = ( j + 1 )
setreg
.endr
# XMM8 h a s t h e c o m b i n e d r e s u l t h e r e
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa \ X M M 8 , T M P 1 ( % r s p )
vmovdqa \ X M M 8 , \ T 3
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
cmp $ 1 2 8 , % r13
jl _ i n i t i a l _ b l o c k s _ d o n e \ @ # no need for precomputed constants
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Haskey_ i _ k h o l d s X O R e d v a l u e s o f t h e l o w a n d h i g h p a r t s o f t h e H a s k e y _ i
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 1
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 1 , \ X M M 1 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 2
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 2 , \ X M M 2 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 3
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 3 , \ X M M 3 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 4
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 4 , \ X M M 4 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 5
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 5 , \ X M M 5 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 6
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 6 , \ X M M 6 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 7
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 7 , \ X M M 7 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 8
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 8 , \ X M M 8 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa ( a r g 1 ) , \ T _ k e y
vpxor \ T _ k e y , \ X M M 1 , \ X M M 1
vpxor \ T _ k e y , \ X M M 2 , \ X M M 2
vpxor \ T _ k e y , \ X M M 3 , \ X M M 3
vpxor \ T _ k e y , \ X M M 4 , \ X M M 4
vpxor \ T _ k e y , \ X M M 5 , \ X M M 5
vpxor \ T _ k e y , \ X M M 6 , \ X M M 6
vpxor \ T _ k e y , \ X M M 7 , \ X M M 7
vpxor \ T _ k e y , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:36 +00:00
i = 1
setreg
.rep \ REP # d o R E P r o u n d s
2018-12-10 19:56:45 +00:00
vmovdqa 1 6 * i ( a r g 1 ) , \ T _ k e y
vaesenc \ T _ k e y , \ X M M 1 , \ X M M 1
vaesenc \ T _ k e y , \ X M M 2 , \ X M M 2
vaesenc \ T _ k e y , \ X M M 3 , \ X M M 3
vaesenc \ T _ k e y , \ X M M 4 , \ X M M 4
vaesenc \ T _ k e y , \ X M M 5 , \ X M M 5
vaesenc \ T _ k e y , \ X M M 6 , \ X M M 6
vaesenc \ T _ k e y , \ X M M 7 , \ X M M 7
vaesenc \ T _ k e y , \ X M M 8 , \ X M M 8
2018-12-10 19:57:36 +00:00
i = ( i + 1 )
setreg
2018-12-10 19:56:45 +00:00
.endr
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa 1 6 * i ( a r g 1 ) , \ T _ k e y
vaesenclast \ T _ k e y , \ X M M 1 , \ X M M 1
vaesenclast \ T _ k e y , \ X M M 2 , \ X M M 2
vaesenclast \ T _ k e y , \ X M M 3 , \ X M M 3
vaesenclast \ T _ k e y , \ X M M 4 , \ X M M 4
vaesenclast \ T _ k e y , \ X M M 5 , \ X M M 5
vaesenclast \ T _ k e y , \ X M M 6 , \ X M M 6
vaesenclast \ T _ k e y , \ X M M 7 , \ X M M 7
vaesenclast \ T _ k e y , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu ( a r g 4 , % r11 ) , \ T 1
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , \ X M M 1 , \ X M M 1
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 1 , ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 1
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 1 ( a r g 4 , % r11 ) , \ T 1
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , \ X M M 2 , \ X M M 2
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 2 , 1 6 * 1 ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 2
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 2 ( a r g 4 , % r11 ) , \ T 1
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , \ X M M 3 , \ X M M 3
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 3 , 1 6 * 2 ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 3
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 3 ( a r g 4 , % r11 ) , \ T 1
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , \ X M M 4 , \ X M M 4
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 4 , 1 6 * 3 ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 4
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 4 ( a r g 4 , % r11 ) , \ T 1
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , \ X M M 5 , \ X M M 5
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 5 , 1 6 * 4 ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 5
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 5 ( a r g 4 , % r11 ) , \ T 1
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , \ X M M 6 , \ X M M 6
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 6 , 1 6 * 5 ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 6
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 6 ( a r g 4 , % r11 ) , \ T 1
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , \ X M M 7 , \ X M M 7
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 7 , 1 6 * 6 ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 7
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 7 ( a r g 4 , % r11 ) , \ T 1
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , \ X M M 8 , \ X M M 8
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 8 , 1 6 * 7 ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 8
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
add $ 1 2 8 , % r11
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 1 , \ X M M 1 # p e r f o r m a 16 B y t e s w a p
vpxor T M P 1 ( % r s p ) , \ X M M 1 , \ X M M 1 # c o m b i n e G H A S H e d v a l u e w i t h t h e c o r r e s p o n d i n g c i p h e r t e x t
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 2 , \ X M M 2 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 3 , \ X M M 3 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 4 , \ X M M 4 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 5 , \ X M M 5 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 6 , \ X M M 6 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 7 , \ X M M 7 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 8 , \ X M M 8 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
_ initial_ b l o c k s _ d o n e \ @:
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
.endm
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# encrypt 8 b l o c k s a t a t i m e
# ghash t h e 8 p r e v i o u s l y e n c r y p t e d c i p h e r t e x t b l o c k s
2018-12-10 19:57:00 +00:00
# arg1 , a r g 3 , a r g 4 a r e u s e d a s p o i n t e r s o n l y , n o t m o d i f i e d
2018-12-10 19:56:45 +00:00
# r1 1 i s t h e d a t a o f f s e t v a l u e
2018-12-10 19:57:36 +00:00
.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T 1 T 2 T 3 T 4 T 5 T 6 C T R X M M 1 X M M 2 X M M 3 X M M 4 X M M 5 X M M 6 X M M 7 X M M 8 T 7 l o o p _ i d x E N C _ D E C
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa \ X M M 1 , \ T 2
vmovdqa \ X M M 2 , T M P 2 ( % r s p )
vmovdqa \ X M M 3 , T M P 3 ( % r s p )
vmovdqa \ X M M 4 , T M P 4 ( % r s p )
vmovdqa \ X M M 5 , T M P 5 ( % r s p )
vmovdqa \ X M M 6 , T M P 6 ( % r s p )
vmovdqa \ X M M 7 , T M P 7 ( % r s p )
vmovdqa \ X M M 8 , T M P 8 ( % r s p )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
.if \ loop_ i d x = = i n _ o r d e r
vpaddd O N E ( % r i p ) , \ C T R , \ X M M 1 # I N C R C N T
vpaddd O N E ( % r i p ) , \ X M M 1 , \ X M M 2
vpaddd O N E ( % r i p ) , \ X M M 2 , \ X M M 3
vpaddd O N E ( % r i p ) , \ X M M 3 , \ X M M 4
vpaddd O N E ( % r i p ) , \ X M M 4 , \ X M M 5
vpaddd O N E ( % r i p ) , \ X M M 5 , \ X M M 6
vpaddd O N E ( % r i p ) , \ X M M 6 , \ X M M 7
vpaddd O N E ( % r i p ) , \ X M M 7 , \ X M M 8
vmovdqa \ X M M 8 , \ C T R
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 1 , \ X M M 1 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 2 , \ X M M 2 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 3 , \ X M M 3 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 4 , \ X M M 4 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 5 , \ X M M 5 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 6 , \ X M M 6 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 7 , \ X M M 7 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 8 , \ X M M 8 # p e r f o r m a 16 B y t e s w a p
.else
vpaddd O N E f ( % r i p ) , \ C T R , \ X M M 1 # I N C R C N T
vpaddd O N E f ( % r i p ) , \ X M M 1 , \ X M M 2
vpaddd O N E f ( % r i p ) , \ X M M 2 , \ X M M 3
vpaddd O N E f ( % r i p ) , \ X M M 3 , \ X M M 4
vpaddd O N E f ( % r i p ) , \ X M M 4 , \ X M M 5
vpaddd O N E f ( % r i p ) , \ X M M 5 , \ X M M 6
vpaddd O N E f ( % r i p ) , \ X M M 6 , \ X M M 7
vpaddd O N E f ( % r i p ) , \ X M M 7 , \ X M M 8
vmovdqa \ X M M 8 , \ C T R
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu ( a r g 1 ) , \ T 1
vpxor \ T 1 , \ X M M 1 , \ X M M 1
vpxor \ T 1 , \ X M M 2 , \ X M M 2
vpxor \ T 1 , \ X M M 3 , \ X M M 3
vpxor \ T 1 , \ X M M 4 , \ X M M 4
vpxor \ T 1 , \ X M M 5 , \ X M M 5
vpxor \ T 1 , \ X M M 6 , \ X M M 6
vpxor \ T 1 , \ X M M 7 , \ X M M 7
vpxor \ T 1 , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu 1 6 * 1 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu 1 6 * 2 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 8 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 2 , \ T 4 # T 4 = a1 * b1
vpclmulqdq $ 0 x00 , \ T 5 , \ T 2 , \ T 7 # T 7 = a0 * b0
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 2 , \ T 6
vpxor \ T 2 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 8 _ k ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu 1 6 * 3 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa T M P 2 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 7 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 1 , \ T 3
vpxor \ T 1 , \ T 3 , \ T 3
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 7 _ k ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x10 , \ T 5 , \ T 3 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu 1 6 * 4 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa T M P 3 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 6 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 1 , \ T 3
vpxor \ T 1 , \ T 3 , \ T 3
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 6 _ k ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x10 , \ T 5 , \ T 3 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu 1 6 * 5 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa T M P 4 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 5 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 1 , \ T 3
vpxor \ T 1 , \ T 3 , \ T 3
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 5 _ k ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x10 , \ T 5 , \ T 3 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu 1 6 * 6 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa T M P 5 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 4 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 1 , \ T 3
vpxor \ T 1 , \ T 3 , \ T 3
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 4 _ k ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x10 , \ T 5 , \ T 3 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu 1 6 * 7 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa T M P 6 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 3 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 1 , \ T 3
vpxor \ T 1 , \ T 3 , \ T 3
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 3 _ k ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x10 , \ T 5 , \ T 3 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu 1 6 * 8 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa T M P 7 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 2 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 1 , \ T 3
vpxor \ T 1 , \ T 3 , \ T 3
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 2 _ k ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x10 , \ T 5 , \ T 3 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu 1 6 * 9 ( a r g 1 ) , \ T 5
vaesenc \ T 5 , \ X M M 1 , \ X M M 1
vaesenc \ T 5 , \ X M M 2 , \ X M M 2
vaesenc \ T 5 , \ X M M 3 , \ X M M 3
vaesenc \ T 5 , \ X M M 4 , \ X M M 4
vaesenc \ T 5 , \ X M M 5 , \ X M M 5
vaesenc \ T 5 , \ X M M 6 , \ X M M 6
vaesenc \ T 5 , \ X M M 7 , \ X M M 7
vaesenc \ T 5 , \ X M M 8 , \ X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqa T M P 8 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 1 , \ T 3
vpxor \ T 1 , \ T 3 , \ T 3
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ k ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x10 , \ T 5 , \ T 3 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 4 , \ T 6 , \ T 6
vpxor \ T 7 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vmovdqu 1 6 * 1 0 ( a r g 1 ) , \ T 5
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:36 +00:00
i = 1 1
setreg
.rep ( \ REP- 9 )
vaesenc \ T 5 , \ X M M 1 , \ X M M 1
vaesenc \ T 5 , \ X M M 2 , \ X M M 2
vaesenc \ T 5 , \ X M M 3 , \ X M M 3
vaesenc \ T 5 , \ X M M 4 , \ X M M 4
vaesenc \ T 5 , \ X M M 5 , \ X M M 5
vaesenc \ T 5 , \ X M M 6 , \ X M M 6
vaesenc \ T 5 , \ X M M 7 , \ X M M 7
vaesenc \ T 5 , \ X M M 8 , \ X M M 8
vmovdqu 1 6 * i ( a r g 1 ) , \ T 5
i = i + 1
setreg
.endr
2018-12-10 19:56:45 +00:00
i = 0
j = 1
setreg
.rep 8
2018-12-10 19:57:00 +00:00
vpxor 1 6 * i ( a r g 4 , % r11 ) , \ T 5 , \ T 2
2018-12-10 19:56:45 +00:00
.if \ ENC_ D E C = = E N C
vaesenclast \ T 2 , r e g _ j , r e g _ j
.else
vaesenclast \ T 2 , r e g _ j , \ T 3
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * i ( a r g 4 , % r11 ) , r e g _ j
vmovdqu \ T 3 , 1 6 * i ( a r g 3 , % r11 )
2018-12-10 19:56:45 +00:00
.endif
i = ( i + 1 )
j = ( j + 1 )
setreg
.endr
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpslldq $ 8 , \ T 6 , \ T 3 # s h i f t - L T 3 2 D W s
vpsrldq $ 8 , \ T 6 , \ T 6 # s h i f t - R T 2 2 D W s
vpxor \ T 3 , \ T 7 , \ T 7
vpxor \ T 4 , \ T 6 , \ T 6 # a c c u m u l a t e t h e r e s u l t s i n T 6 : T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# first p h a s e o f t h e r e d u c t i o n
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
vpslld $ 3 1 , \ T 7 , \ T 2 # p a c k e d r i g h t s h i f t i n g < < 31
vpslld $ 3 0 , \ T 7 , \ T 3 # p a c k e d r i g h t s h i f t i n g s h i f t < < 30
vpslld $ 2 5 , \ T 7 , \ T 4 # p a c k e d r i g h t s h i f t i n g s h i f t < < 25
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 3 , \ T 2 , \ T 2 # x o r t h e s h i f t e d v e r s i o n s
vpxor \ T 4 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpsrldq $ 4 , \ T 2 , \ T 1 # s h i f t - R T 1 1 D W
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpslldq $ 1 2 , \ T 2 , \ T 2 # s h i f t - L T 2 3 D W s
vpxor \ T 2 , \ T 7 , \ T 7 # f i r s t p h a s e o f t h e r e d u c t i o n c o m p l e t e
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
.if \ ENC_ D E C = = E N C
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 1 , 1 6 * 0 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 2 , 1 6 * 1 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 3 , 1 6 * 2 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 4 , 1 6 * 3 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 5 , 1 6 * 4 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 6 , 1 6 * 5 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 7 , 1 6 * 6 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 8 , 1 6 * 7 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
2018-12-10 19:56:45 +00:00
.endif
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# second p h a s e o f t h e r e d u c t i o n
vpsrld $ 1 , \ T 7 , \ T 2 # p a c k e d l e f t s h i f t i n g > > 1
vpsrld $ 2 , \ T 7 , \ T 3 # p a c k e d l e f t s h i f t i n g > > 2
vpsrld $ 7 , \ T 7 , \ T 4 # p a c k e d l e f t s h i f t i n g > > 7
vpxor \ T 3 , \ T 2 , \ T 2 # x o r t h e s h i f t e d v e r s i o n s
vpxor \ T 4 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , \ T 2 , \ T 2
vpxor \ T 2 , \ T 7 , \ T 7
vpxor \ T 7 , \ T 6 , \ T 6 # t h e r e s u l t i s i n T 6
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 1 , \ X M M 1 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 2 , \ X M M 2 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 3 , \ X M M 3 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 4 , \ X M M 4 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 5 , \ X M M 5 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 6 , \ X M M 6 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 7 , \ X M M 7 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 8 , \ X M M 8 # p e r f o r m a 16 B y t e s w a p
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 6 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
.endm
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# GHASH t h e l a s t 4 c i p h e r t e x t b l o c k s .
.macro GHASH_LAST_8_AVX T1 T 2 T 3 T 4 T 5 T 6 T 7 X M M 1 X M M 2 X M M 3 X M M 4 X M M 5 X M M 6 X M M 7 X M M 8
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # Karatsuba M e t h o d
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 1 , \ T 2
vpxor \ X M M 1 , \ T 2 , \ T 2
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 8 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 1 , \ T 6
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 1 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 8 _ k ( a r g 2 ) , \ T 3
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 2 , \ T 2
vpxor \ X M M 2 , \ T 2 , \ T 2
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 7 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 2 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 2 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 7 _ k ( a r g 2 ) , \ T 3
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 3 , \ T 2
vpxor \ X M M 3 , \ T 2 , \ T 2
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 6 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 3 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 3 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 6 _ k ( a r g 2 ) , \ T 3
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 4 , \ T 2
vpxor \ X M M 4 , \ T 2 , \ T 2
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 5 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 4 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 4 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 5 _ k ( a r g 2 ) , \ T 3
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 5 , \ T 2
vpxor \ X M M 5 , \ T 2 , \ T 2
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 4 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 5 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 5 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 4 _ k ( a r g 2 ) , \ T 3
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 6 , \ T 2
vpxor \ X M M 6 , \ T 2 , \ T 2
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 3 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 6 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 6 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 3 _ k ( a r g 2 ) , \ T 3
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 7 , \ T 2
vpxor \ X M M 7 , \ T 2 , \ T 2
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 2 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 7 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 7 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 2 _ k ( a r g 2 ) , \ T 3
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 8 , \ T 2
vpxor \ X M M 8 , \ T 2 , \ T 2
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 8 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 8 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ k ( a r g 2 ) , \ T 3
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 2 , \ X M M 1 , \ X M M 1
vpxor \ T 6 , \ X M M 1 , \ X M M 1
vpxor \ T 7 , \ X M M 1 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpslldq $ 8 , \ T 2 , \ T 4
vpsrldq $ 8 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 4 , \ T 7 , \ T 7
vpxor \ T 2 , \ T 6 , \ T 6 # < T 6 : T 7 > h o l d s t h e r e s u l t o f
# the a c c u m u l a t e d c a r r y - l e s s m u l t i p l i c a t i o n s
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# first p h a s e o f t h e r e d u c t i o n
vpslld $ 3 1 , \ T 7 , \ T 2 # p a c k e d r i g h t s h i f t i n g < < 31
vpslld $ 3 0 , \ T 7 , \ T 3 # p a c k e d r i g h t s h i f t i n g s h i f t < < 30
vpslld $ 2 5 , \ T 7 , \ T 4 # p a c k e d r i g h t s h i f t i n g s h i f t < < 25
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 3 , \ T 2 , \ T 2 # x o r t h e s h i f t e d v e r s i o n s
vpxor \ T 4 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpsrldq $ 4 , \ T 2 , \ T 1 # s h i f t - R T 1 1 D W
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpslldq $ 1 2 , \ T 2 , \ T 2 # s h i f t - L T 2 3 D W s
vpxor \ T 2 , \ T 7 , \ T 7 # f i r s t p h a s e o f t h e r e d u c t i o n c o m p l e t e
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# second p h a s e o f t h e r e d u c t i o n
vpsrld $ 1 , \ T 7 , \ T 2 # p a c k e d l e f t s h i f t i n g > > 1
vpsrld $ 2 , \ T 7 , \ T 3 # p a c k e d l e f t s h i f t i n g > > 2
vpsrld $ 7 , \ T 7 , \ T 4 # p a c k e d l e f t s h i f t i n g > > 7
vpxor \ T 3 , \ T 2 , \ T 2 # x o r t h e s h i f t e d v e r s i o n s
vpxor \ T 4 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 1 , \ T 2 , \ T 2
vpxor \ T 2 , \ T 7 , \ T 7
vpxor \ T 7 , \ T 6 , \ T 6 # t h e r e s u l t i s i n T 6
2013-12-11 14:28:41 -08:00
.endm
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# void a e s n i _ g c m _ p r e c o m p _ a v x _ g e n 2
# ( gcm_ d a t a * m y _ c t x _ d a t a ,
2018-12-10 19:57:00 +00:00
# gcm_ c o n t e x t _ d a t a * d a t a ,
2013-12-11 14:28:41 -08:00
# u8 * h a s h _ s u b k e y ) # / * H , t h e H a s h s u b k e y i n p u t . D a t a s t a r t s o n a 16 - b y t e b o u n d a r y . * /
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
ENTRY( a e s n i _ g c m _ p r e c o m p _ a v x _ g e n 2 )
2018-12-10 19:57:12 +00:00
FUNC_ S A V E
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu ( a r g 3 ) , % x m m 6 # x m m 6 = H a s h K e y
2013-12-11 14:28:41 -08:00
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 6 , % x m m 6
# # # # # # # # # # # # # # # PRECOMPUTATION o f H a s h K e y < < 1 m o d p o l y f r o m t h e H a s h K e y
vmovdqa % x m m 6 , % x m m 2
vpsllq $ 1 , % x m m 6 , % x m m 6
vpsrlq $ 6 3 , % x m m 2 , % x m m 2
vmovdqa % x m m 2 , % x m m 1
vpslldq $ 8 , % x m m 2 , % x m m 2
vpsrldq $ 8 , % x m m 1 , % x m m 1
vpor % x m m 2 , % x m m 6 , % x m m 6
# reduction
vpshufd $ 0 b00 1 0 0 1 0 0 , % x m m 1 , % x m m 2
vpcmpeqd T W O O N E ( % r i p ) , % x m m 2 , % x m m 2
vpand P O L Y ( % r i p ) , % x m m 2 , % x m m 2
vpxor % x m m 2 , % x m m 6 , % x m m 6 # x m m 6 h o l d s t h e H a s h K e y < < 1 m o d p o l y
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2018-12-10 19:57:00 +00:00
vmovdqu % x m m 6 , H a s h K e y ( a r g 2 ) # s t o r e H a s h K e y < < 1 m o d p o l y
2013-12-11 14:28:41 -08:00
PRECOMPUTE_ A V X % x m m 6 , % x m m 0 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5
2018-12-10 19:57:12 +00:00
FUNC_ R E S T O R E
2013-12-11 14:28:41 -08:00
ret
ENDPROC( a e s n i _ g c m _ p r e c o m p _ a v x _ g e n 2 )
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# void a e s n i _ g c m _ e n c _ a v x _ g e n 2 (
# gcm_ d a t a * m y _ c t x _ d a t a , / * a l i g n e d t o 1 6 B y t e s * /
2018-12-10 19:57:00 +00:00
# gcm_ c o n t e x t _ d a t a * d a t a ,
2013-12-11 14:28:41 -08:00
# u8 * o u t , / * C i p h e r t e x t o u t p u t . E n c r y p t i n - p l a c e i s a l l o w e d . * /
# const u 8 * i n , / * P l a i n t e x t i n p u t * /
# u6 4 p l a i n t e x t _ l e n , / * L e n g t h o f d a t a i n B y t e s f o r e n c r y p t i o n . * /
# u8 * i v , / * P r e - c o u n t e r b l o c k j 0 : 4 b y t e s a l t
# ( from S e c u r i t y A s s o c i a t i o n ) c o n c a t e n a t e d w i t h 8 b y t e
# Initialisation V e c t o r ( f r o m I P S e c E S P P a y l o a d )
# concatenated w i t h 0 x00 0 0 0 0 0 1 . 1 6 - b y t e a l i g n e d p o i n t e r . * /
# const u 8 * a a d , / * A d d i t i o n a l A u t h e n t i c a t i o n D a t a ( A A D ) * /
# u6 4 a a d _ l e n , / * L e n g t h o f A A D i n b y t e s . W i t h R F C 4 1 0 6 t h i s i s g o i n g t o b e 8 o r 1 2 B y t e s * /
# u8 * a u t h _ t a g , / * A u t h e n t i c a t e d T a g o u t p u t . * /
# u6 4 a u t h _ t a g _ l e n ) # / * A u t h e n t i c a t e d T a g L e n g t h i n b y t e s .
# Valid v a l u e s a r e 1 6 ( m o s t l i k e l y ) , 1 2 o r 8 . * /
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
ENTRY( a e s n i _ g c m _ e n c _ a v x _ g e n 2 )
2018-12-10 19:57:12 +00:00
FUNC_ S A V E
2018-12-10 19:57:36 +00:00
mov k e y s i z e , % e a x
cmp $ 3 2 , % e a x
je k e y _ 2 5 6 _ e n c
cmp $ 1 6 , % e a x
je k e y _ 1 2 8 _ e n c
# must b e 1 9 2
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X , G H A S H _ L A S T _ 8 _ A V X , G H A S H _ M U L _ A V X , E N C , 1 1
2018-12-10 19:57:12 +00:00
FUNC_ R E S T O R E
2018-12-10 19:57:36 +00:00
ret
key_128_enc :
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X , G H A S H _ L A S T _ 8 _ A V X , G H A S H _ M U L _ A V X , E N C , 9
FUNC_ R E S T O R E
ret
key_256_enc :
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X , G H A S H _ L A S T _ 8 _ A V X , G H A S H _ M U L _ A V X , E N C , 1 3
FUNC_ R E S T O R E
ret
2013-12-11 14:28:41 -08:00
ENDPROC( a e s n i _ g c m _ e n c _ a v x _ g e n 2 )
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# void a e s n i _ g c m _ d e c _ a v x _ g e n 2 (
# gcm_ d a t a * m y _ c t x _ d a t a , / * a l i g n e d t o 1 6 B y t e s * /
2018-12-10 19:57:00 +00:00
# gcm_ c o n t e x t _ d a t a * d a t a ,
2013-12-11 14:28:41 -08:00
# u8 * o u t , / * P l a i n t e x t o u t p u t . D e c r y p t i n - p l a c e i s a l l o w e d . * /
# const u 8 * i n , / * C i p h e r t e x t i n p u t * /
# u6 4 p l a i n t e x t _ l e n , / * L e n g t h o f d a t a i n B y t e s f o r e n c r y p t i o n . * /
# u8 * i v , / * P r e - c o u n t e r b l o c k j 0 : 4 b y t e s a l t
# ( from S e c u r i t y A s s o c i a t i o n ) c o n c a t e n a t e d w i t h 8 b y t e
# Initialisation V e c t o r ( f r o m I P S e c E S P P a y l o a d )
# concatenated w i t h 0 x00 0 0 0 0 0 1 . 1 6 - b y t e a l i g n e d p o i n t e r . * /
# const u 8 * a a d , / * A d d i t i o n a l A u t h e n t i c a t i o n D a t a ( A A D ) * /
# u6 4 a a d _ l e n , / * L e n g t h o f A A D i n b y t e s . W i t h R F C 4 1 0 6 t h i s i s g o i n g t o b e 8 o r 1 2 B y t e s * /
# u8 * a u t h _ t a g , / * A u t h e n t i c a t e d T a g o u t p u t . * /
# u6 4 a u t h _ t a g _ l e n ) # / * A u t h e n t i c a t e d T a g L e n g t h i n b y t e s .
# Valid v a l u e s a r e 1 6 ( m o s t l i k e l y ) , 1 2 o r 8 . * /
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
ENTRY( a e s n i _ g c m _ d e c _ a v x _ g e n 2 )
2018-12-10 19:57:12 +00:00
FUNC_ S A V E
2018-12-10 19:57:36 +00:00
mov k e y s i z e ,% e a x
cmp $ 3 2 , % e a x
je k e y _ 2 5 6 _ d e c
cmp $ 1 6 , % e a x
je k e y _ 1 2 8 _ d e c
# must b e 1 9 2
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X , G H A S H _ L A S T _ 8 _ A V X , G H A S H _ M U L _ A V X , D E C , 1 1
2018-12-10 19:57:12 +00:00
FUNC_ R E S T O R E
2018-12-10 19:57:36 +00:00
ret
key_128_dec :
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X , G H A S H _ L A S T _ 8 _ A V X , G H A S H _ M U L _ A V X , D E C , 9
FUNC_ R E S T O R E
ret
key_256_dec :
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X , G H A S H _ L A S T _ 8 _ A V X , G H A S H _ M U L _ A V X , D E C , 1 3
FUNC_ R E S T O R E
ret
2013-12-11 14:28:41 -08:00
ENDPROC( a e s n i _ g c m _ d e c _ a v x _ g e n 2 )
# endif / * C O N F I G _ A S _ A V X * /
# ifdef C O N F I G _ A S _ A V X 2
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# GHASH_ M U L M A C R O t o i m p l e m e n t : D a t a * H a s h K e y m o d ( 1 2 8 ,1 2 7 ,1 2 6 ,1 2 1 ,0 )
# Input : A a n d B ( 1 2 8 - b i t s e a c h , b i t - r e f l e c t e d )
# Output : C = A * B * x m o d p o l y , ( i . e . > > 1 )
# To c o m p u t e G H = G H * H a s h K e y m o d p o l y , g i v e H K = H a s h K e y < < 1 m o d p o l y a s i n p u t
# GH = G H * H K * x m o d p o l y w h i c h i s e q u i v a l e n t t o G H * H a s h K e y m o d p o l y .
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
.macro GHASH_MUL_AVX2 GH H K T 1 T 2 T 3 T 4 T 5
vpclmulqdq $ 0 x11 ,\ H K ,\ G H ,\ T 1 # T 1 = a1 * b1
vpclmulqdq $ 0 x00 ,\ H K ,\ G H ,\ T 2 # T 2 = a0 * b0
vpclmulqdq $ 0 x01 ,\ H K ,\ G H ,\ T 3 # T 3 = a1 * b0
vpclmulqdq $ 0 x10 ,\ H K ,\ G H ,\ G H # G H = a 0 * b1
vpxor \ T 3 , \ G H , \ G H
vpsrldq $ 8 , \ G H , \ T 3 # s h i f t - R G H 2 D W s
vpslldq $ 8 , \ G H , \ G H # s h i f t - L G H 2 D W s
vpxor \ T 3 , \ T 1 , \ T 1
vpxor \ T 2 , \ G H , \ G H
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# first p h a s e o f t h e r e d u c t i o n
vmovdqa P O L Y 2 ( % r i p ) , \ T 3
vpclmulqdq $ 0 x01 , \ G H , \ T 3 , \ T 2
vpslldq $ 8 , \ T 2 , \ T 2 # s h i f t - L T 2 2 D W s
vpxor \ T 2 , \ G H , \ G H # f i r s t p h a s e o f t h e r e d u c t i o n c o m p l e t e
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# second p h a s e o f t h e r e d u c t i o n
vpclmulqdq $ 0 x00 , \ G H , \ T 3 , \ T 2
vpsrldq $ 4 , \ T 2 , \ T 2 # s h i f t - R T 2 1 D W ( S h i f t - R o n l y 1 - D W t o o b t a i n 2 - D W s s h i f t - R )
vpclmulqdq $ 0 x10 , \ G H , \ T 3 , \ G H
vpslldq $ 4 , \ G H , \ G H # s h i f t - L G H 1 D W ( S h i f t - L 1 - D W t o o b t a i n r e s u l t w i t h n o s h i f t s )
vpxor \ T 2 , \ G H , \ G H # s e c o n d p h a s e o f t h e r e d u c t i o n c o m p l e t e
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
vpxor \ T 1 , \ G H , \ G H # t h e r e s u l t i s i n G H
.endm
.macro PRECOMPUTE_AVX2 HK T 1 T 2 T 3 T 4 T 5 T 6
# Haskey_ i _ k h o l d s X O R e d v a l u e s o f t h e l o w a n d h i g h p a r t s o f t h e H a s k e y _ i
vmovdqa \ H K , \ T 5
GHASH_ M U L _ A V X 2 \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 2 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 2 ( a r g 2 ) # [ H a s h K e y _ 2 ] = H a s h K e y ^ 2 < < 1 m o d p o l y
2013-12-11 14:28:41 -08:00
GHASH_ M U L _ A V X 2 \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 3 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 3 ( a r g 2 )
2013-12-11 14:28:41 -08:00
GHASH_ M U L _ A V X 2 \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 4 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 4 ( a r g 2 )
2013-12-11 14:28:41 -08:00
GHASH_ M U L _ A V X 2 \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 5 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 5 ( a r g 2 )
2013-12-11 14:28:41 -08:00
GHASH_ M U L _ A V X 2 \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 6 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 6 ( a r g 2 )
2013-12-11 14:28:41 -08:00
GHASH_ M U L _ A V X 2 \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 7 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 7 ( a r g 2 )
2013-12-11 14:28:41 -08:00
GHASH_ M U L _ A V X 2 \ T 5 , \ H K , \ T 1 , \ T 3 , \ T 4 , \ T 6 , \ T 2 # T 5 = H a s h K e y ^ 8 < < 1 m o d p o l y
2018-12-10 19:57:00 +00:00
vmovdqu \ T 5 , H a s h K e y _ 8 ( a r g 2 )
2013-12-11 14:28:41 -08:00
.endm
# # if a = n u m b e r o f t o t a l p l a i n t e x t b y t e s
# # b = f l o o r ( a / 1 6 )
# # num_ i n i t i a l _ b l o c k s = b m o d 4 #
# # encrypt t h e i n i t i a l n u m _ i n i t i a l _ b l o c k s b l o c k s a n d a p p l y g h a s h o n t h e c i p h e r t e x t
# # r1 0 , r11 , r12 , r a x a r e c l o b b e r e d
2018-12-10 19:57:00 +00:00
# # arg1 , a r g 3 , a r g 4 , r14 a r e u s e d a s a p o i n t e r o n l y , n o t m o d i f i e d
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:36 +00:00
.macro INITIAL_BLOCKS_AVX2 REP n u m _ i n i t i a l _ b l o c k s T 1 T 2 T 3 T 4 T 5 C T R X M M 1 X M M 2 X M M 3 X M M 4 X M M 5 X M M 6 X M M 7 X M M 8 T 6 T _ k e y E N C _ D E C V E R
2013-12-11 14:28:41 -08:00
i = ( 8 - \ n u m _ i n i t i a l _ b l o c k s )
2017-04-28 18:12:00 +02:00
j = 0
2013-12-11 14:28:41 -08:00
setreg
2018-12-10 19:57:00 +00:00
mov a r g 7 , % r10 # r 10 = A A D
mov a r g 8 , % r12 # r 12 = a a d L e n
2013-12-11 14:28:41 -08:00
2017-04-28 18:12:00 +02:00
mov % r12 , % r11
2013-12-11 14:28:41 -08:00
2017-04-28 18:12:00 +02:00
vpxor r e g _ j , r e g _ j , r e g _ j
vpxor r e g _ i , r e g _ i , r e g _ i
2013-12-11 14:28:41 -08:00
2017-04-28 18:12:00 +02:00
cmp $ 1 6 , % r11
jl _ g e t _ A A D _ r e s t 8 \ @
_ get_ A A D _ b l o c k s \ @:
vmovdqu ( % r10 ) , r e g _ i
vpshufb S H U F _ M A S K ( % r i p ) , r e g _ i , r e g _ i
vpxor r e g _ i , r e g _ j , r e g _ j
GHASH_ M U L _ A V X 2 r e g _ j , \ T 2 , \ T 1 , \ T 3 , \ T 4 , \ T 5 , \ T 6
add $ 1 6 , % r10
sub $ 1 6 , % r12
sub $ 1 6 , % r11
cmp $ 1 6 , % r11
jge _ g e t _ A A D _ b l o c k s \ @
vmovdqu r e g _ j , r e g _ i
cmp $ 0 , % r11
je _ g e t _ A A D _ d o n e \ @
2013-12-11 14:28:41 -08:00
2017-04-28 18:12:00 +02:00
vpxor r e g _ i , r e g _ i , r e g _ i
2013-12-11 14:28:41 -08:00
2017-04-28 18:12:00 +02:00
/ * read t h e l a s t < 1 6 B o f A A D . s i n c e w e h a v e a t l e a s t 4 B o f
data r i g h t a f t e r t h e A A D ( t h e I C V , a n d m a y b e s o m e C T ) , w e c a n
read 4 B / 8 B b l o c k s s a f e l y , a n d t h e n g e t r i d o f t h e e x t r a s t u f f * /
_ get_ A A D _ r e s t 8 \ @:
cmp $ 4 , % r11
jle _ g e t _ A A D _ r e s t 4 \ @
movq ( % r10 ) , \ T 1
add $ 8 , % r10
sub $ 8 , % r11
vpslldq $ 8 , \ T 1 , \ T 1
vpsrldq $ 8 , r e g _ i , r e g _ i
vpxor \ T 1 , r e g _ i , r e g _ i
jmp _ g e t _ A A D _ r e s t 8 \ @
_ get_ A A D _ r e s t 4 \ @:
cmp $ 0 , % r11
jle _ g e t _ A A D _ r e s t 0 \ @
mov ( % r10 ) , % e a x
movq % r a x , \ T 1
add $ 4 , % r10
sub $ 4 , % r11
vpslldq $ 1 2 , \ T 1 , \ T 1
vpsrldq $ 4 , r e g _ i , r e g _ i
vpxor \ T 1 , r e g _ i , r e g _ i
_ get_ A A D _ r e s t 0 \ @:
/ * finalize : shift o u t t h e e x t r a b y t e s w e r e a d , a n d a l i g n
left. s i n c e p s l l d q c a n o n l y s h i f t b y a n i m m e d i a t e , w e u s e
vpshufb a n d a n a r r a y o f s h u f f l e m a s k s * /
movq % r12 , % r11
salq $ 4 , % r11
movdqu a a d _ s h i f t _ a r r ( % r11 ) , \ T 1
vpshufb \ T 1 , r e g _ i , r e g _ i
_ get_ A A D _ r e s t _ f i n a l \ @:
vpshufb S H U F _ M A S K ( % r i p ) , r e g _ i , r e g _ i
vpxor r e g _ j , r e g _ i , r e g _ i
GHASH_ M U L _ A V X 2 r e g _ i , \ T 2 , \ T 1 , \ T 3 , \ T 4 , \ T 5 , \ T 6
2013-12-11 14:28:41 -08:00
2017-04-28 18:12:00 +02:00
_ get_ A A D _ d o n e \ @:
2013-12-11 14:28:41 -08:00
# initialize t h e d a t a p o i n t e r o f f s e t a s z e r o
2018-07-02 04:31:54 -06:00
xor % r11 d , % r11 d
2013-12-11 14:28:41 -08:00
# start A E S f o r n u m _ i n i t i a l _ b l o c k s b l o c k s
2018-12-10 19:57:00 +00:00
mov a r g 6 , % r a x # r a x = * Y 0
2013-12-11 14:28:41 -08:00
vmovdqu ( % r a x ) , \ C T R # C T R = Y 0
vpshufb S H U F _ M A S K ( % r i p ) , \ C T R , \ C T R
i = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , r e g _ i
vpshufb S H U F _ M A S K ( % r i p ) , r e g _ i , r e g _ i # p e r f o r m a 16 B y t e s w a p
i = ( i + 1 )
setreg
.endr
vmovdqa ( a r g 1 ) , \ T _ k e y
i = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
vpxor \ T _ k e y , r e g _ i , r e g _ i
i = ( i + 1 )
setreg
.endr
j = 1
setreg
2018-12-10 19:57:36 +00:00
.rep \ REP
2013-12-11 14:28:41 -08:00
vmovdqa 1 6 * j ( a r g 1 ) , \ T _ k e y
i = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
vaesenc \ T _ k e y , r e g _ i , r e g _ i
i = ( i + 1 )
setreg
.endr
j = ( j + 1 )
setreg
.endr
2018-12-10 19:57:36 +00:00
vmovdqa 1 6 * j ( a r g 1 ) , \ T _ k e y
2013-12-11 14:28:41 -08:00
i = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
vaesenclast \ T _ k e y , r e g _ i , r e g _ i
i = ( i + 1 )
setreg
.endr
i = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
2018-12-10 19:57:00 +00:00
vmovdqu ( a r g 4 , % r11 ) , \ T 1
2013-12-11 14:28:41 -08:00
vpxor \ T 1 , r e g _ i , r e g _ i
2018-12-10 19:57:00 +00:00
vmovdqu r e g _ i , ( a r g 3 , % r11 ) # w r i t e b a c k c i p h e r t e x t f o r
2013-12-11 14:28:41 -08:00
# num_ i n i t i a l _ b l o c k s b l o c k s
add $ 1 6 , % r11
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , r e g _ i
.endif
vpshufb S H U F _ M A S K ( % r i p ) , r e g _ i , r e g _ i # p r e p a r e c i p h e r t e x t f o r G H A S H c o m p u t a t i o n s
i = ( i + 1 )
setreg
.endr
i = ( 8 - \ n u m _ i n i t i a l _ b l o c k s )
j = ( 9 - \ n u m _ i n i t i a l _ b l o c k s )
setreg
.rep \ num_ i n i t i a l _ b l o c k s
vpxor r e g _ i , r e g _ j , r e g _ j
GHASH_ M U L _ A V X 2 r e g _ j , \ T 2 , \ T 1 , \ T 3 , \ T 4 , \ T 5 , \ T 6 # a p p l y G H A S H o n n u m _ i n i t i a l _ b l o c k s b l o c k s
i = ( i + 1 )
j = ( j + 1 )
setreg
.endr
# XMM8 h a s t h e c o m b i n e d r e s u l t h e r e
vmovdqa \ X M M 8 , T M P 1 ( % r s p )
vmovdqa \ X M M 8 , \ T 3
cmp $ 1 2 8 , % r13
jl _ i n i t i a l _ b l o c k s _ d o n e \ @ # no need for precomputed constants
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Haskey_ i _ k h o l d s X O R e d v a l u e s o f t h e l o w a n d h i g h p a r t s o f t h e H a s k e y _ i
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 1
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 1 , \ X M M 1 # p e r f o r m a 16 B y t e s w a p
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 2
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 2 , \ X M M 2 # p e r f o r m a 16 B y t e s w a p
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 3
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 3 , \ X M M 3 # p e r f o r m a 16 B y t e s w a p
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 4
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 4 , \ X M M 4 # p e r f o r m a 16 B y t e s w a p
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 5
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 5 , \ X M M 5 # p e r f o r m a 16 B y t e s w a p
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 6
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 6 , \ X M M 6 # p e r f o r m a 16 B y t e s w a p
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 7
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 7 , \ X M M 7 # p e r f o r m a 16 B y t e s w a p
vpaddd O N E ( % r i p ) , \ C T R , \ C T R # I N C R Y 0
vmovdqa \ C T R , \ X M M 8
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 8 , \ X M M 8 # p e r f o r m a 16 B y t e s w a p
vmovdqa ( a r g 1 ) , \ T _ k e y
vpxor \ T _ k e y , \ X M M 1 , \ X M M 1
vpxor \ T _ k e y , \ X M M 2 , \ X M M 2
vpxor \ T _ k e y , \ X M M 3 , \ X M M 3
vpxor \ T _ k e y , \ X M M 4 , \ X M M 4
vpxor \ T _ k e y , \ X M M 5 , \ X M M 5
vpxor \ T _ k e y , \ X M M 6 , \ X M M 6
vpxor \ T _ k e y , \ X M M 7 , \ X M M 7
vpxor \ T _ k e y , \ X M M 8 , \ X M M 8
i = 1
setreg
2018-12-10 19:57:36 +00:00
.rep \ REP # d o R E P r o u n d s
2013-12-11 14:28:41 -08:00
vmovdqa 1 6 * i ( a r g 1 ) , \ T _ k e y
vaesenc \ T _ k e y , \ X M M 1 , \ X M M 1
vaesenc \ T _ k e y , \ X M M 2 , \ X M M 2
vaesenc \ T _ k e y , \ X M M 3 , \ X M M 3
vaesenc \ T _ k e y , \ X M M 4 , \ X M M 4
vaesenc \ T _ k e y , \ X M M 5 , \ X M M 5
vaesenc \ T _ k e y , \ X M M 6 , \ X M M 6
vaesenc \ T _ k e y , \ X M M 7 , \ X M M 7
vaesenc \ T _ k e y , \ X M M 8 , \ X M M 8
i = ( i + 1 )
setreg
.endr
vmovdqa 1 6 * i ( a r g 1 ) , \ T _ k e y
vaesenclast \ T _ k e y , \ X M M 1 , \ X M M 1
vaesenclast \ T _ k e y , \ X M M 2 , \ X M M 2
vaesenclast \ T _ k e y , \ X M M 3 , \ X M M 3
vaesenclast \ T _ k e y , \ X M M 4 , \ X M M 4
vaesenclast \ T _ k e y , \ X M M 5 , \ X M M 5
vaesenclast \ T _ k e y , \ X M M 6 , \ X M M 6
vaesenclast \ T _ k e y , \ X M M 7 , \ X M M 7
vaesenclast \ T _ k e y , \ X M M 8 , \ X M M 8
2018-12-10 19:57:00 +00:00
vmovdqu ( a r g 4 , % r11 ) , \ T 1
2013-12-11 14:28:41 -08:00
vpxor \ T 1 , \ X M M 1 , \ X M M 1
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 1 , ( a r g 3 , % r11 )
2013-12-11 14:28:41 -08:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 1
.endif
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 1 ( a r g 4 , % r11 ) , \ T 1
2013-12-11 14:28:41 -08:00
vpxor \ T 1 , \ X M M 2 , \ X M M 2
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 2 , 1 6 * 1 ( a r g 3 , % r11 )
2013-12-11 14:28:41 -08:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 2
.endif
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 2 ( a r g 4 , % r11 ) , \ T 1
2013-12-11 14:28:41 -08:00
vpxor \ T 1 , \ X M M 3 , \ X M M 3
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 3 , 1 6 * 2 ( a r g 3 , % r11 )
2013-12-11 14:28:41 -08:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 3
.endif
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 3 ( a r g 4 , % r11 ) , \ T 1
2013-12-11 14:28:41 -08:00
vpxor \ T 1 , \ X M M 4 , \ X M M 4
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 4 , 1 6 * 3 ( a r g 3 , % r11 )
2013-12-11 14:28:41 -08:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 4
.endif
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 4 ( a r g 4 , % r11 ) , \ T 1
2013-12-11 14:28:41 -08:00
vpxor \ T 1 , \ X M M 5 , \ X M M 5
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 5 , 1 6 * 4 ( a r g 3 , % r11 )
2013-12-11 14:28:41 -08:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 5
.endif
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 5 ( a r g 4 , % r11 ) , \ T 1
2013-12-11 14:28:41 -08:00
vpxor \ T 1 , \ X M M 6 , \ X M M 6
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 6 , 1 6 * 5 ( a r g 3 , % r11 )
2013-12-11 14:28:41 -08:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 6
.endif
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 6 ( a r g 4 , % r11 ) , \ T 1
2013-12-11 14:28:41 -08:00
vpxor \ T 1 , \ X M M 7 , \ X M M 7
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 7 , 1 6 * 6 ( a r g 3 , % r11 )
2013-12-11 14:28:41 -08:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 7
.endif
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * 7 ( a r g 4 , % r11 ) , \ T 1
2013-12-11 14:28:41 -08:00
vpxor \ T 1 , \ X M M 8 , \ X M M 8
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 8 , 1 6 * 7 ( a r g 3 , % r11 )
2013-12-11 14:28:41 -08:00
.if \ ENC_ D E C = = D E C
vmovdqa \ T 1 , \ X M M 8
.endif
add $ 1 2 8 , % r11
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 1 , \ X M M 1 # p e r f o r m a 16 B y t e s w a p
vpxor T M P 1 ( % r s p ) , \ X M M 1 , \ X M M 1 # c o m b i n e G H A S H e d v a l u e w i t h
# the c o r r e s p o n d i n g c i p h e r t e x t
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 2 , \ X M M 2 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 3 , \ X M M 3 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 4 , \ X M M 4 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 5 , \ X M M 5 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 6 , \ X M M 6 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 7 , \ X M M 7 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 8 , \ X M M 8 # p e r f o r m a 16 B y t e s w a p
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
_ initial_ b l o c k s _ d o n e \ @:
.endm
# encrypt 8 b l o c k s a t a t i m e
# ghash t h e 8 p r e v i o u s l y e n c r y p t e d c i p h e r t e x t b l o c k s
2018-12-10 19:57:00 +00:00
# arg1 , a r g 3 , a r g 4 a r e u s e d a s p o i n t e r s o n l y , n o t m o d i f i e d
2013-12-11 14:28:41 -08:00
# r1 1 i s t h e d a t a o f f s e t v a l u e
2018-12-10 19:57:36 +00:00
.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T 1 T 2 T 3 T 4 T 5 T 6 C T R X M M 1 X M M 2 X M M 3 X M M 4 X M M 5 X M M 6 X M M 7 X M M 8 T 7 l o o p _ i d x E N C _ D E C
2013-12-11 14:28:41 -08:00
vmovdqa \ X M M 1 , \ T 2
vmovdqa \ X M M 2 , T M P 2 ( % r s p )
vmovdqa \ X M M 3 , T M P 3 ( % r s p )
vmovdqa \ X M M 4 , T M P 4 ( % r s p )
vmovdqa \ X M M 5 , T M P 5 ( % r s p )
vmovdqa \ X M M 6 , T M P 6 ( % r s p )
vmovdqa \ X M M 7 , T M P 7 ( % r s p )
vmovdqa \ X M M 8 , T M P 8 ( % r s p )
.if \ loop_ i d x = = i n _ o r d e r
vpaddd O N E ( % r i p ) , \ C T R , \ X M M 1 # I N C R C N T
vpaddd O N E ( % r i p ) , \ X M M 1 , \ X M M 2
vpaddd O N E ( % r i p ) , \ X M M 2 , \ X M M 3
vpaddd O N E ( % r i p ) , \ X M M 3 , \ X M M 4
vpaddd O N E ( % r i p ) , \ X M M 4 , \ X M M 5
vpaddd O N E ( % r i p ) , \ X M M 5 , \ X M M 6
vpaddd O N E ( % r i p ) , \ X M M 6 , \ X M M 7
vpaddd O N E ( % r i p ) , \ X M M 7 , \ X M M 8
vmovdqa \ X M M 8 , \ C T R
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 1 , \ X M M 1 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 2 , \ X M M 2 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 3 , \ X M M 3 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 4 , \ X M M 4 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 5 , \ X M M 5 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 6 , \ X M M 6 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 7 , \ X M M 7 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 8 , \ X M M 8 # p e r f o r m a 16 B y t e s w a p
.else
vpaddd O N E f ( % r i p ) , \ C T R , \ X M M 1 # I N C R C N T
vpaddd O N E f ( % r i p ) , \ X M M 1 , \ X M M 2
vpaddd O N E f ( % r i p ) , \ X M M 2 , \ X M M 3
vpaddd O N E f ( % r i p ) , \ X M M 3 , \ X M M 4
vpaddd O N E f ( % r i p ) , \ X M M 4 , \ X M M 5
vpaddd O N E f ( % r i p ) , \ X M M 5 , \ X M M 6
vpaddd O N E f ( % r i p ) , \ X M M 6 , \ X M M 7
vpaddd O N E f ( % r i p ) , \ X M M 7 , \ X M M 8
vmovdqa \ X M M 8 , \ C T R
.endif
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
vmovdqu ( a r g 1 ) , \ T 1
vpxor \ T 1 , \ X M M 1 , \ X M M 1
vpxor \ T 1 , \ X M M 2 , \ X M M 2
vpxor \ T 1 , \ X M M 3 , \ X M M 3
vpxor \ T 1 , \ X M M 4 , \ X M M 4
vpxor \ T 1 , \ X M M 5 , \ X M M 5
vpxor \ T 1 , \ X M M 6 , \ X M M 6
vpxor \ T 1 , \ X M M 7 , \ X M M 7
vpxor \ T 1 , \ X M M 8 , \ X M M 8
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
vmovdqu 1 6 * 1 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
vmovdqu 1 6 * 2 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 8 ( a r g 2 ) , \ T 5
2013-12-11 14:28:41 -08:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 2 , \ T 4 # T 4 = a1 * b1
vpclmulqdq $ 0 x00 , \ T 5 , \ T 2 , \ T 7 # T 7 = a0 * b0
vpclmulqdq $ 0 x01 , \ T 5 , \ T 2 , \ T 6 # T 6 = a1 * b0
vpclmulqdq $ 0 x10 , \ T 5 , \ T 2 , \ T 5 # T 5 = a0 * b1
vpxor \ T 5 , \ T 6 , \ T 6
vmovdqu 1 6 * 3 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
vmovdqa T M P 2 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 7 ( a r g 2 ) , \ T 5
2013-12-11 14:28:41 -08:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
vpclmulqdq $ 0 x01 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vpclmulqdq $ 0 x10 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vmovdqu 1 6 * 4 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
vmovdqa T M P 3 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 6 ( a r g 2 ) , \ T 5
2013-12-11 14:28:41 -08:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
vpclmulqdq $ 0 x01 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vpclmulqdq $ 0 x10 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vmovdqu 1 6 * 5 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
vmovdqa T M P 4 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 5 ( a r g 2 ) , \ T 5
2013-12-11 14:28:41 -08:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
vpclmulqdq $ 0 x01 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vpclmulqdq $ 0 x10 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vmovdqu 1 6 * 6 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
vmovdqa T M P 5 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 4 ( a r g 2 ) , \ T 5
2013-12-11 14:28:41 -08:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
vpclmulqdq $ 0 x01 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vpclmulqdq $ 0 x10 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vmovdqu 1 6 * 7 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
vmovdqa T M P 6 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 3 ( a r g 2 ) , \ T 5
2013-12-11 14:28:41 -08:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
vpclmulqdq $ 0 x01 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vpclmulqdq $ 0 x10 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vmovdqu 1 6 * 8 ( a r g 1 ) , \ T 1
vaesenc \ T 1 , \ X M M 1 , \ X M M 1
vaesenc \ T 1 , \ X M M 2 , \ X M M 2
vaesenc \ T 1 , \ X M M 3 , \ X M M 3
vaesenc \ T 1 , \ X M M 4 , \ X M M 4
vaesenc \ T 1 , \ X M M 5 , \ X M M 5
vaesenc \ T 1 , \ X M M 6 , \ X M M 6
vaesenc \ T 1 , \ X M M 7 , \ X M M 7
vaesenc \ T 1 , \ X M M 8 , \ X M M 8
vmovdqa T M P 7 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 2 ( a r g 2 ) , \ T 5
2013-12-11 14:28:41 -08:00
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 4
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
vpclmulqdq $ 0 x01 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vpclmulqdq $ 0 x10 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
vmovdqu 1 6 * 9 ( a r g 1 ) , \ T 5
vaesenc \ T 5 , \ X M M 1 , \ X M M 1
vaesenc \ T 5 , \ X M M 2 , \ X M M 2
vaesenc \ T 5 , \ X M M 3 , \ X M M 3
vaesenc \ T 5 , \ X M M 4 , \ X M M 4
vaesenc \ T 5 , \ X M M 5 , \ X M M 5
vaesenc \ T 5 , \ X M M 6 , \ X M M 6
vaesenc \ T 5 , \ X M M 7 , \ X M M 7
vaesenc \ T 5 , \ X M M 8 , \ X M M 8
vmovdqa T M P 8 ( % r s p ) , \ T 1
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y ( a r g 2 ) , \ T 5
2013-12-11 14:28:41 -08:00
vpclmulqdq $ 0 x00 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 7 , \ T 7
vpclmulqdq $ 0 x01 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vpclmulqdq $ 0 x10 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 6 , \ T 6
vpclmulqdq $ 0 x11 , \ T 5 , \ T 1 , \ T 3
vpxor \ T 3 , \ T 4 , \ T 1
vmovdqu 1 6 * 1 0 ( a r g 1 ) , \ T 5
2018-12-10 19:57:36 +00:00
i = 1 1
setreg
.rep ( \ REP- 9 )
vaesenc \ T 5 , \ X M M 1 , \ X M M 1
vaesenc \ T 5 , \ X M M 2 , \ X M M 2
vaesenc \ T 5 , \ X M M 3 , \ X M M 3
vaesenc \ T 5 , \ X M M 4 , \ X M M 4
vaesenc \ T 5 , \ X M M 5 , \ X M M 5
vaesenc \ T 5 , \ X M M 6 , \ X M M 6
vaesenc \ T 5 , \ X M M 7 , \ X M M 7
vaesenc \ T 5 , \ X M M 8 , \ X M M 8
vmovdqu 1 6 * i ( a r g 1 ) , \ T 5
i = i + 1
setreg
.endr
2013-12-11 14:28:41 -08:00
i = 0
j = 1
setreg
.rep 8
2018-12-10 19:57:00 +00:00
vpxor 1 6 * i ( a r g 4 , % r11 ) , \ T 5 , \ T 2
2013-12-11 14:28:41 -08:00
.if \ ENC_ D E C = = E N C
vaesenclast \ T 2 , r e g _ j , r e g _ j
.else
vaesenclast \ T 2 , r e g _ j , \ T 3
2018-12-10 19:57:00 +00:00
vmovdqu 1 6 * i ( a r g 4 , % r11 ) , r e g _ j
vmovdqu \ T 3 , 1 6 * i ( a r g 3 , % r11 )
2013-12-11 14:28:41 -08:00
.endif
i = ( i + 1 )
j = ( j + 1 )
setreg
.endr
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
vpslldq $ 8 , \ T 6 , \ T 3 # s h i f t - L T 3 2 D W s
vpsrldq $ 8 , \ T 6 , \ T 6 # s h i f t - R T 2 2 D W s
vpxor \ T 3 , \ T 7 , \ T 7
vpxor \ T 6 , \ T 1 , \ T 1 # a c c u m u l a t e t h e r e s u l t s i n T 1 : T 7
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# first p h a s e o f t h e r e d u c t i o n
vmovdqa P O L Y 2 ( % r i p ) , \ T 3
vpclmulqdq $ 0 x01 , \ T 7 , \ T 3 , \ T 2
vpslldq $ 8 , \ T 2 , \ T 2 # s h i f t - L x m m 2 2 D W s
vpxor \ T 2 , \ T 7 , \ T 7 # f i r s t p h a s e o f t h e r e d u c t i o n c o m p l e t e
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
.if \ ENC_ D E C = = E N C
2018-12-10 19:57:00 +00:00
vmovdqu \ X M M 1 , 1 6 * 0 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 2 , 1 6 * 1 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 3 , 1 6 * 2 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 4 , 1 6 * 3 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 5 , 1 6 * 4 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 6 , 1 6 * 5 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 7 , 1 6 * 6 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
vmovdqu \ X M M 8 , 1 6 * 7 ( a r g 3 ,% r11 ) # W r i t e t o t h e C i p h e r t e x t b u f f e r
2013-12-11 14:28:41 -08:00
.endif
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# second p h a s e o f t h e r e d u c t i o n
vpclmulqdq $ 0 x00 , \ T 7 , \ T 3 , \ T 2
vpsrldq $ 4 , \ T 2 , \ T 2 # s h i f t - R x m m 2 1 D W ( S h i f t - R o n l y 1 - D W t o o b t a i n 2 - D W s s h i f t - R )
vpclmulqdq $ 0 x10 , \ T 7 , \ T 3 , \ T 4
vpslldq $ 4 , \ T 4 , \ T 4 # s h i f t - L x m m 0 1 D W ( S h i f t - L 1 - D W t o o b t a i n r e s u l t w i t h n o s h i f t s )
vpxor \ T 2 , \ T 4 , \ T 4 # s e c o n d p h a s e o f t h e r e d u c t i o n c o m p l e t e
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
vpxor \ T 4 , \ T 1 , \ T 1 # t h e r e s u l t i s i n T 1
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 1 , \ X M M 1 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 2 , \ X M M 2 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 3 , \ X M M 3 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 4 , \ X M M 4 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 5 , \ X M M 5 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 6 , \ X M M 6 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 7 , \ X M M 7 # p e r f o r m a 16 B y t e s w a p
vpshufb S H U F _ M A S K ( % r i p ) , \ X M M 8 , \ X M M 8 # p e r f o r m a 16 B y t e s w a p
vpxor \ T 1 , \ X M M 1 , \ X M M 1
.endm
# GHASH t h e l a s t 4 c i p h e r t e x t b l o c k s .
.macro GHASH_LAST_8_AVX2 T1 T 2 T 3 T 4 T 5 T 6 T 7 X M M 1 X M M 2 X M M 3 X M M 4 X M M 5 X M M 6 X M M 7 X M M 8
# # Karatsuba M e t h o d
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 8 ( a r g 2 ) , \ T 5
2013-12-11 14:28:41 -08:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 1 , \ T 2
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 3
vpxor \ X M M 1 , \ T 2 , \ T 2
vpxor \ T 5 , \ T 3 , \ T 3
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 1 , \ T 6
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 1 , \ T 7
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ X M M 1
# # # # # # # # # # # # # # # # # # # # # #
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 7 ( a r g 2 ) , \ T 5
2013-12-11 14:28:41 -08:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 2 , \ T 2
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 3
2018-12-10 19:56:45 +00:00
vpxor \ X M M 2 , \ T 2 , \ T 2
vpxor \ T 5 , \ T 3 , \ T 3
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 2 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 2 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 6 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 3 , \ T 2
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 3
vpxor \ X M M 3 , \ T 2 , \ T 2
vpxor \ T 5 , \ T 3 , \ T 3
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 3 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 3 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 5 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 4 , \ T 2
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 3
vpxor \ X M M 4 , \ T 2 , \ T 2
vpxor \ T 5 , \ T 3 , \ T 3
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 4 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 4 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 4 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 5 , \ T 2
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 3
vpxor \ X M M 5 , \ T 2 , \ T 2
vpxor \ T 5 , \ T 3 , \ T 3
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 5 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 5 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 3 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 6 , \ T 2
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 3
vpxor \ X M M 6 , \ T 2 , \ T 2
vpxor \ T 5 , \ T 3 , \ T 3
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 6 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 6 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y _ 2 ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 7 , \ T 2
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 3
vpxor \ X M M 7 , \ T 2 , \ T 2
vpxor \ T 5 , \ T 3 , \ T 3
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 7 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 7 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 2 , \ X M M 1 , \ X M M 1
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu H a s h K e y ( a r g 2 ) , \ T 5
2018-12-10 19:56:45 +00:00
vpshufd $ 0 b01 0 0 1 1 1 0 , \ X M M 8 , \ T 2
vpshufd $ 0 b01 0 0 1 1 1 0 , \ T 5 , \ T 3
vpxor \ X M M 8 , \ T 2 , \ T 2
vpxor \ T 5 , \ T 3 , \ T 3
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x11 , \ T 5 , \ X M M 8 , \ T 4
vpxor \ T 4 , \ T 6 , \ T 6
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 5 , \ X M M 8 , \ T 4
vpxor \ T 4 , \ T 7 , \ T 7
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x00 , \ T 3 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 2 , \ X M M 1 , \ X M M 1
vpxor \ T 6 , \ X M M 1 , \ X M M 1
vpxor \ T 7 , \ X M M 1 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpslldq $ 8 , \ T 2 , \ T 4
vpsrldq $ 8 , \ T 2 , \ T 2
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 4 , \ T 7 , \ T 7
vpxor \ T 2 , \ T 6 , \ T 6 # < T 6 : T 7 > h o l d s t h e r e s u l t o f t h e
# accumulated c a r r y - l e s s m u l t i p l i c a t i o n s
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# first p h a s e o f t h e r e d u c t i o n
vmovdqa P O L Y 2 ( % r i p ) , \ T 3
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x01 , \ T 7 , \ T 3 , \ T 2
vpslldq $ 8 , \ T 2 , \ T 2 # s h i f t - L x m m 2 2 D W s
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 2 , \ T 7 , \ T 7 # f i r s t p h a s e o f t h e r e d u c t i o n c o m p l e t e
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
# second p h a s e o f t h e r e d u c t i o n
vpclmulqdq $ 0 x00 , \ T 7 , \ T 3 , \ T 2
vpsrldq $ 4 , \ T 2 , \ T 2 # s h i f t - R T 2 1 D W ( S h i f t - R o n l y 1 - D W t o o b t a i n 2 - D W s s h i f t - R )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpclmulqdq $ 0 x10 , \ T 7 , \ T 3 , \ T 4
vpslldq $ 4 , \ T 4 , \ T 4 # s h i f t - L T 4 1 D W ( S h i f t - L 1 - D W t o o b t a i n r e s u l t w i t h n o s h i f t s )
2013-12-11 14:28:41 -08:00
2018-12-10 19:56:45 +00:00
vpxor \ T 2 , \ T 4 , \ T 4 # s e c o n d p h a s e o f t h e r e d u c t i o n c o m p l e t e
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
vpxor \ T 4 , \ T 6 , \ T 6 # t h e r e s u l t i s i n T 6
2013-12-11 14:28:41 -08:00
.endm
2018-12-10 19:56:45 +00:00
2013-12-11 14:28:41 -08:00
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# void a e s n i _ g c m _ p r e c o m p _ a v x _ g e n 4
# ( gcm_ d a t a * m y _ c t x _ d a t a ,
2018-12-10 19:57:00 +00:00
# gcm_ c o n t e x t _ d a t a * d a t a ,
2013-12-11 14:28:41 -08:00
# u8 * h a s h _ s u b k e y ) # / * H , t h e H a s h s u b k e y i n p u t .
# Data s t a r t s o n a 1 6 - b y t e b o u n d a r y . * /
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
ENTRY( a e s n i _ g c m _ p r e c o m p _ a v x _ g e n 4 )
2018-12-10 19:57:12 +00:00
FUNC_ S A V E
2013-12-11 14:28:41 -08:00
2018-12-10 19:57:00 +00:00
vmovdqu ( a r g 3 ) , % x m m 6 # x m m 6 = H a s h K e y
2013-12-11 14:28:41 -08:00
vpshufb S H U F _ M A S K ( % r i p ) , % x m m 6 , % x m m 6
# # # # # # # # # # # # # # # PRECOMPUTATION o f H a s h K e y < < 1 m o d p o l y f r o m t h e H a s h K e y
vmovdqa % x m m 6 , % x m m 2
vpsllq $ 1 , % x m m 6 , % x m m 6
vpsrlq $ 6 3 , % x m m 2 , % x m m 2
vmovdqa % x m m 2 , % x m m 1
vpslldq $ 8 , % x m m 2 , % x m m 2
vpsrldq $ 8 , % x m m 1 , % x m m 1
vpor % x m m 2 , % x m m 6 , % x m m 6
# reduction
vpshufd $ 0 b00 1 0 0 1 0 0 , % x m m 1 , % x m m 2
vpcmpeqd T W O O N E ( % r i p ) , % x m m 2 , % x m m 2
vpand P O L Y ( % r i p ) , % x m m 2 , % x m m 2
vpxor % x m m 2 , % x m m 6 , % x m m 6 # x m m 6 h o l d s t h e H a s h K e y < < 1 m o d p o l y
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
2018-12-10 19:57:00 +00:00
vmovdqu % x m m 6 , H a s h K e y ( a r g 2 ) # s t o r e H a s h K e y < < 1 m o d p o l y
2013-12-11 14:28:41 -08:00
PRECOMPUTE_ A V X 2 % x m m 6 , % x m m 0 , % x m m 1 , % x m m 2 , % x m m 3 , % x m m 4 , % x m m 5
2018-12-10 19:57:12 +00:00
FUNC_ R E S T O R E
2013-12-11 14:28:41 -08:00
ret
ENDPROC( a e s n i _ g c m _ p r e c o m p _ a v x _ g e n 4 )
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# void a e s n i _ g c m _ e n c _ a v x _ g e n 4 (
# gcm_ d a t a * m y _ c t x _ d a t a , / * a l i g n e d t o 1 6 B y t e s * /
2018-12-10 19:57:00 +00:00
# gcm_ c o n t e x t _ d a t a * d a t a ,
2013-12-11 14:28:41 -08:00
# u8 * o u t , / * C i p h e r t e x t o u t p u t . E n c r y p t i n - p l a c e i s a l l o w e d . * /
# const u 8 * i n , / * P l a i n t e x t i n p u t * /
# u6 4 p l a i n t e x t _ l e n , / * L e n g t h o f d a t a i n B y t e s f o r e n c r y p t i o n . * /
# u8 * i v , / * P r e - c o u n t e r b l o c k j 0 : 4 b y t e s a l t
# ( from S e c u r i t y A s s o c i a t i o n ) c o n c a t e n a t e d w i t h 8 b y t e
# Initialisation V e c t o r ( f r o m I P S e c E S P P a y l o a d )
# concatenated w i t h 0 x00 0 0 0 0 0 1 . 1 6 - b y t e a l i g n e d p o i n t e r . * /
# const u 8 * a a d , / * A d d i t i o n a l A u t h e n t i c a t i o n D a t a ( A A D ) * /
# u6 4 a a d _ l e n , / * L e n g t h o f A A D i n b y t e s . W i t h R F C 4 1 0 6 t h i s i s g o i n g t o b e 8 o r 1 2 B y t e s * /
# u8 * a u t h _ t a g , / * A u t h e n t i c a t e d T a g o u t p u t . * /
# u6 4 a u t h _ t a g _ l e n ) # / * A u t h e n t i c a t e d T a g L e n g t h i n b y t e s .
# Valid v a l u e s a r e 1 6 ( m o s t l i k e l y ) , 1 2 o r 8 . * /
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
ENTRY( a e s n i _ g c m _ e n c _ a v x _ g e n 4 )
2018-12-10 19:57:12 +00:00
FUNC_ S A V E
2018-12-10 19:57:36 +00:00
mov k e y s i z e ,% e a x
cmp $ 3 2 , % e a x
je k e y _ 2 5 6 _ e n c4
cmp $ 1 6 , % e a x
je k e y _ 1 2 8 _ e n c4
# must b e 1 9 2
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X 2 , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X 2 , G H A S H _ L A S T _ 8 _ A V X 2 , G H A S H _ M U L _ A V X 2 , E N C , 1 1
FUNC_ R E S T O R E
ret
key_128_enc4 :
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X 2 , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X 2 , G H A S H _ L A S T _ 8 _ A V X 2 , G H A S H _ M U L _ A V X 2 , E N C , 9
FUNC_ R E S T O R E
ret
key_256_enc4 :
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X 2 , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X 2 , G H A S H _ L A S T _ 8 _ A V X 2 , G H A S H _ M U L _ A V X 2 , E N C , 1 3
2018-12-10 19:57:12 +00:00
FUNC_ R E S T O R E
2013-12-11 14:28:41 -08:00
ret
ENDPROC( a e s n i _ g c m _ e n c _ a v x _ g e n 4 )
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# void a e s n i _ g c m _ d e c _ a v x _ g e n 4 (
# gcm_ d a t a * m y _ c t x _ d a t a , / * a l i g n e d t o 1 6 B y t e s * /
2018-12-10 19:57:00 +00:00
# gcm_ c o n t e x t _ d a t a * d a t a ,
2013-12-11 14:28:41 -08:00
# u8 * o u t , / * P l a i n t e x t o u t p u t . D e c r y p t i n - p l a c e i s a l l o w e d . * /
# const u 8 * i n , / * C i p h e r t e x t i n p u t * /
# u6 4 p l a i n t e x t _ l e n , / * L e n g t h o f d a t a i n B y t e s f o r e n c r y p t i o n . * /
# u8 * i v , / * P r e - c o u n t e r b l o c k j 0 : 4 b y t e s a l t
# ( from S e c u r i t y A s s o c i a t i o n ) c o n c a t e n a t e d w i t h 8 b y t e
# Initialisation V e c t o r ( f r o m I P S e c E S P P a y l o a d )
# concatenated w i t h 0 x00 0 0 0 0 0 1 . 1 6 - b y t e a l i g n e d p o i n t e r . * /
# const u 8 * a a d , / * A d d i t i o n a l A u t h e n t i c a t i o n D a t a ( A A D ) * /
# u6 4 a a d _ l e n , / * L e n g t h o f A A D i n b y t e s . W i t h R F C 4 1 0 6 t h i s i s g o i n g t o b e 8 o r 1 2 B y t e s * /
# u8 * a u t h _ t a g , / * A u t h e n t i c a t e d T a g o u t p u t . * /
# u6 4 a u t h _ t a g _ l e n ) # / * A u t h e n t i c a t e d T a g L e n g t h i n b y t e s .
# Valid v a l u e s a r e 1 6 ( m o s t l i k e l y ) , 1 2 o r 8 . * /
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
ENTRY( a e s n i _ g c m _ d e c _ a v x _ g e n 4 )
2018-12-10 19:57:12 +00:00
FUNC_ S A V E
2018-12-10 19:57:36 +00:00
mov k e y s i z e ,% e a x
cmp $ 3 2 , % e a x
je k e y _ 2 5 6 _ d e c4
cmp $ 1 6 , % e a x
je k e y _ 1 2 8 _ d e c4
# must b e 1 9 2
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X 2 , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X 2 , G H A S H _ L A S T _ 8 _ A V X 2 , G H A S H _ M U L _ A V X 2 , D E C , 1 1
2018-12-10 19:57:12 +00:00
FUNC_ R E S T O R E
2018-12-10 19:57:36 +00:00
ret
key_128_dec4 :
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X 2 , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X 2 , G H A S H _ L A S T _ 8 _ A V X 2 , G H A S H _ M U L _ A V X 2 , D E C , 9
FUNC_ R E S T O R E
ret
key_256_dec4 :
GCM_ E N C _ D E C I N I T I A L _ B L O C K S _ A V X 2 , G H A S H _ 8 _ E N C R Y P T _ 8 _ P A R A L L E L _ A V X 2 , G H A S H _ L A S T _ 8 _ A V X 2 , G H A S H _ M U L _ A V X 2 , D E C , 1 3
FUNC_ R E S T O R E
ret
2013-12-11 14:28:41 -08:00
ENDPROC( a e s n i _ g c m _ d e c _ a v x _ g e n 4 )
# endif / * C O N F I G _ A S _ A V X 2 * /