2009-01-18 16:28:34 +11:00
/ *
* Implement A E S a l g o r i t h m i n I n t e l A E S - N I i n s t r u c t i o n s .
*
* The w h i t e p a p e r o f A E S - N I i n s t r u c t i o n s c a n b e d o w n l o a d e d f r o m :
* http : / / softwarecommunity. i n t e l . c o m / i s n / d o w n l o a d s / i n t e l a v x / A E S - I n s t r u c t i o n s - S e t _ W P . p d f
*
* Copyright ( C ) 2 0 0 8 , I n t e l C o r p .
* Author : Huang Y i n g < y i n g . h u a n g @intel.com>
* Vinodh G o p a l < v i n o d h . g o p a l @intel.com>
* Kahraman A k d e m i r
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e a s p u b l i s h e d b y
* the F r e e S o f t w a r e F o u n d a t i o n ; either version 2 of the License, or
* ( at y o u r o p t i o n ) a n y l a t e r v e r s i o n .
* /
# include < l i n u x / l i n k a g e . h >
2009-11-23 19:54:06 +08:00
# include < a s m / i n s t . h >
2009-01-18 16:28:34 +11:00
.text
# define S T A T E 1 % x m m 0
# define S T A T E 2 % x m m 4
# define S T A T E 3 % x m m 5
# define S T A T E 4 % x m m 6
# define S T A T E S T A T E 1
# define I N 1 % x m m 1
# define I N 2 % x m m 7
# define I N 3 % x m m 8
# define I N 4 % x m m 9
# define I N I N 1
# define K E Y % x m m 2
# define I V % x m m 3
2010-03-10 18:28:55 +08:00
# define B S W A P _ M A S K % x m m 1 0
# define C T R % x m m 1 1
# define I N C % x m m 1 2
2009-01-18 16:28:34 +11:00
# define K E Y P % r d i
# define O U T P % r s i
# define I N P % r d x
# define L E N % r c x
# define I V P % r8
# define K L E N % r9 d
# define T 1 % r10
# define T K E Y P T 1
# define T 2 % r11
2010-03-10 18:28:55 +08:00
# define T C T R _ L O W T 2
2009-01-18 16:28:34 +11:00
_key_expansion_128 :
_key_expansion_256a :
pshufd $ 0 b11 1 1 1 1 1 1 , % x m m 1 , % x m m 1
shufps $ 0 b00 0 1 0 0 0 0 , % x m m 0 , % x m m 4
pxor % x m m 4 , % x m m 0
shufps $ 0 b10 0 0 1 1 0 0 , % x m m 0 , % x m m 4
pxor % x m m 4 , % x m m 0
pxor % x m m 1 , % x m m 0
movaps % x m m 0 , ( % r c x )
add $ 0 x10 , % r c x
ret
_key_expansion_192a :
pshufd $ 0 b01 0 1 0 1 0 1 , % x m m 1 , % x m m 1
shufps $ 0 b00 0 1 0 0 0 0 , % x m m 0 , % x m m 4
pxor % x m m 4 , % x m m 0
shufps $ 0 b10 0 0 1 1 0 0 , % x m m 0 , % x m m 4
pxor % x m m 4 , % x m m 0
pxor % x m m 1 , % x m m 0
movaps % x m m 2 , % x m m 5
movaps % x m m 2 , % x m m 6
pslldq $ 4 , % x m m 5
pshufd $ 0 b11 1 1 1 1 1 1 , % x m m 0 , % x m m 3
pxor % x m m 3 , % x m m 2
pxor % x m m 5 , % x m m 2
movaps % x m m 0 , % x m m 1
shufps $ 0 b01 0 0 0 1 0 0 , % x m m 0 , % x m m 6
movaps % x m m 6 , ( % r c x )
shufps $ 0 b01 0 0 1 1 1 0 , % x m m 2 , % x m m 1
movaps % x m m 1 , 1 6 ( % r c x )
add $ 0 x20 , % r c x
ret
_key_expansion_192b :
pshufd $ 0 b01 0 1 0 1 0 1 , % x m m 1 , % x m m 1
shufps $ 0 b00 0 1 0 0 0 0 , % x m m 0 , % x m m 4
pxor % x m m 4 , % x m m 0
shufps $ 0 b10 0 0 1 1 0 0 , % x m m 0 , % x m m 4
pxor % x m m 4 , % x m m 0
pxor % x m m 1 , % x m m 0
movaps % x m m 2 , % x m m 5
pslldq $ 4 , % x m m 5
pshufd $ 0 b11 1 1 1 1 1 1 , % x m m 0 , % x m m 3
pxor % x m m 3 , % x m m 2
pxor % x m m 5 , % x m m 2
movaps % x m m 0 , ( % r c x )
add $ 0 x10 , % r c x
ret
_key_expansion_256b :
pshufd $ 0 b10 1 0 1 0 1 0 , % x m m 1 , % x m m 1
shufps $ 0 b00 0 1 0 0 0 0 , % x m m 2 , % x m m 4
pxor % x m m 4 , % x m m 2
shufps $ 0 b10 0 0 1 1 0 0 , % x m m 2 , % x m m 4
pxor % x m m 4 , % x m m 2
pxor % x m m 1 , % x m m 2
movaps % x m m 2 , ( % r c x )
add $ 0 x10 , % r c x
ret
/ *
* int a e s n i _ s e t _ k e y ( s t r u c t c r y p t o _ a e s _ c t x * c t x , c o n s t u 8 * i n _ k e y ,
* unsigned i n t k e y _ l e n )
* /
ENTRY( a e s n i _ s e t _ k e y )
movups ( % r s i ) , % x m m 0 # u s e r k e y ( f i r s t 16 b y t e s )
movaps % x m m 0 , ( % r d i )
lea 0 x10 ( % r d i ) , % r c x # k e y a d d r
movl % e d x , 4 8 0 ( % r d i )
pxor % x m m 4 , % x m m 4 # x m m 4 i s a s s u m e d 0 i n _ k e y _ e x p a n s i o n _ x
cmp $ 2 4 , % d l
jb . L e n c _ k e y 1 2 8
je . L e n c _ k e y 1 9 2
movups 0 x10 ( % r s i ) , % x m m 2 # o t h e r u s e r k e y
movaps % x m m 2 , ( % r c x )
add $ 0 x10 , % r c x
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x1 % x m m 2 % x m m 1 # r o u n d 1
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 a
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x1 % x m m 0 % x m m 1
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 b
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x2 % x m m 2 % x m m 1 # r o u n d 2
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 a
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x2 % x m m 0 % x m m 1
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 b
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x4 % x m m 2 % x m m 1 # r o u n d 3
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 a
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x4 % x m m 0 % x m m 1
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 b
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x8 % x m m 2 % x m m 1 # r o u n d 4
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 a
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x8 % x m m 0 % x m m 1
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 b
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x10 % x m m 2 % x m m 1 # r o u n d 5
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 a
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x10 % x m m 0 % x m m 1
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 b
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x20 % x m m 2 % x m m 1 # r o u n d 6
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 a
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x20 % x m m 0 % x m m 1
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 b
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x40 % x m m 2 % x m m 1 # r o u n d 7
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 2 5 6 a
jmp . L d e c _ k e y
.Lenc_key192 :
movq 0 x10 ( % r s i ) , % x m m 2 # o t h e r u s e r k e y
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x1 % x m m 2 % x m m 1 # r o u n d 1
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 9 2 a
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x2 % x m m 2 % x m m 1 # r o u n d 2
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 9 2 b
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x4 % x m m 2 % x m m 1 # r o u n d 3
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 9 2 a
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x8 % x m m 2 % x m m 1 # r o u n d 4
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 9 2 b
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x10 % x m m 2 % x m m 1 # r o u n d 5
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 9 2 a
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x20 % x m m 2 % x m m 1 # r o u n d 6
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 9 2 b
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x40 % x m m 2 % x m m 1 # r o u n d 7
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 9 2 a
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x80 % x m m 2 % x m m 1 # r o u n d 8
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 9 2 b
jmp . L d e c _ k e y
.Lenc_key128 :
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x1 % x m m 0 % x m m 1 # r o u n d 1
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 2 8
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x2 % x m m 0 % x m m 1 # r o u n d 2
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 2 8
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x4 % x m m 0 % x m m 1 # r o u n d 3
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 2 8
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x8 % x m m 0 % x m m 1 # r o u n d 4
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 2 8
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x10 % x m m 0 % x m m 1 # r o u n d 5
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 2 8
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x20 % x m m 0 % x m m 1 # r o u n d 6
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 2 8
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x40 % x m m 0 % x m m 1 # r o u n d 7
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 2 8
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x80 % x m m 0 % x m m 1 # r o u n d 8
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 2 8
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x1 b % x m m 0 % x m m 1 # r o u n d 9
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 2 8
2009-11-23 19:54:06 +08:00
AESKEYGENASSIST 0 x36 % x m m 0 % x m m 1 # r o u n d 10
2009-01-18 16:28:34 +11:00
call _ k e y _ e x p a n s i o n _ 1 2 8
.Ldec_key :
sub $ 0 x10 , % r c x
movaps ( % r d i ) , % x m m 0
movaps ( % r c x ) , % x m m 1
movaps % x m m 0 , 2 4 0 ( % r c x )
movaps % x m m 1 , 2 4 0 ( % r d i )
add $ 0 x10 , % r d i
lea 2 4 0 - 1 6 ( % r c x ) , % r s i
.align 4
.Ldec_key_loop :
movaps ( % r d i ) , % x m m 0
2009-11-23 19:54:06 +08:00
AESIMC % x m m 0 % x m m 1
2009-01-18 16:28:34 +11:00
movaps % x m m 1 , ( % r s i )
add $ 0 x10 , % r d i
sub $ 0 x10 , % r s i
cmp % r c x , % r d i
jb . L d e c _ k e y _ l o o p
xor % r a x , % r a x
ret
/ *
* void a e s n i _ e n c ( s t r u c t c r y p t o _ a e s _ c t x * c t x , u 8 * d s t , c o n s t u 8 * s r c )
* /
ENTRY( a e s n i _ e n c )
movl 4 8 0 ( K E Y P ) , K L E N # k e y l e n g t h
movups ( I N P ) , S T A T E # i n p u t
call _ a e s n i _ e n c1
movups S T A T E , ( O U T P ) # o u t p u t
ret
/ *
* _aesni_enc1 : internal A B I
* input :
* KEYP : key s t r u c t p o i n t e r
* KLEN : round c o u n t
* STATE : initial s t a t e ( i n p u t )
* output :
* STATE : finial s t a t e ( o u t p u t )
* changed :
* KEY
* TKEYP ( T 1 )
* /
_aesni_enc1 :
movaps ( K E Y P ) , K E Y # k e y
mov K E Y P , T K E Y P
pxor K E Y , S T A T E # r o u n d 0
add $ 0 x30 , T K E Y P
cmp $ 2 4 , K L E N
jb . L e n c12 8
lea 0 x20 ( T K E Y P ) , T K E Y P
je . L e n c19 2
add $ 0 x20 , T K E Y P
movaps - 0 x60 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps - 0 x50 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
.align 4
.Lenc192 :
movaps - 0 x40 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps - 0 x30 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
.align 4
.Lenc128 :
movaps - 0 x20 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps - 0 x10 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x10 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x20 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x30 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x40 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x50 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x60 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x70 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENCLAST K E Y S T A T E
2009-01-18 16:28:34 +11:00
ret
/ *
* _aesni_enc4 : internal A B I
* input :
* KEYP : key s t r u c t p o i n t e r
* KLEN : round c o u n t
* STATE1 : initial s t a t e ( i n p u t )
* STATE2
* STATE3
* STATE4
* output :
* STATE1 : finial s t a t e ( o u t p u t )
* STATE2
* STATE3
* STATE4
* changed :
* KEY
* TKEYP ( T 1 )
* /
_aesni_enc4 :
movaps ( K E Y P ) , K E Y # k e y
mov K E Y P , T K E Y P
pxor K E Y , S T A T E 1 # r o u n d 0
pxor K E Y , S T A T E 2
pxor K E Y , S T A T E 3
pxor K E Y , S T A T E 4
add $ 0 x30 , T K E Y P
cmp $ 2 4 , K L E N
jb . L 4 e n c12 8
lea 0 x20 ( T K E Y P ) , T K E Y P
je . L 4 e n c19 2
add $ 0 x20 , T K E Y P
movaps - 0 x60 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps - 0 x50 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
# .align 4
.L4enc192 :
movaps - 0 x40 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps - 0 x30 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
# .align 4
.L4enc128 :
movaps - 0 x20 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps - 0 x10 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x10 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x20 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x30 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x40 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x50 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x60 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENC K E Y S T A T E 1
AESENC K E Y S T A T E 2
AESENC K E Y S T A T E 3
AESENC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x70 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESENCLAST K E Y S T A T E 1 # l a s t r o u n d
AESENCLAST K E Y S T A T E 2
AESENCLAST K E Y S T A T E 3
AESENCLAST K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
ret
/ *
* void a e s n i _ d e c ( s t r u c t c r y p t o _ a e s _ c t x * c t x , u 8 * d s t , c o n s t u 8 * s r c )
* /
ENTRY( a e s n i _ d e c )
mov 4 8 0 ( K E Y P ) , K L E N # k e y l e n g t h
add $ 2 4 0 , K E Y P
movups ( I N P ) , S T A T E # i n p u t
call _ a e s n i _ d e c1
movups S T A T E , ( O U T P ) #o u t p u t
ret
/ *
* _aesni_dec1 : internal A B I
* input :
* KEYP : key s t r u c t p o i n t e r
* KLEN : key l e n g t h
* STATE : initial s t a t e ( i n p u t )
* output :
* STATE : finial s t a t e ( o u t p u t )
* changed :
* KEY
* TKEYP ( T 1 )
* /
_aesni_dec1 :
movaps ( K E Y P ) , K E Y # k e y
mov K E Y P , T K E Y P
pxor K E Y , S T A T E # r o u n d 0
add $ 0 x30 , T K E Y P
cmp $ 2 4 , K L E N
jb . L d e c12 8
lea 0 x20 ( T K E Y P ) , T K E Y P
je . L d e c19 2
add $ 0 x20 , T K E Y P
movaps - 0 x60 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps - 0 x50 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
.align 4
.Ldec192 :
movaps - 0 x40 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps - 0 x30 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
.align 4
.Ldec128 :
movaps - 0 x20 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps - 0 x10 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x10 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x20 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x30 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x40 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x50 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x60 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E
2009-01-18 16:28:34 +11:00
movaps 0 x70 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDECLAST K E Y S T A T E
2009-01-18 16:28:34 +11:00
ret
/ *
* _aesni_dec4 : internal A B I
* input :
* KEYP : key s t r u c t p o i n t e r
* KLEN : key l e n g t h
* STATE1 : initial s t a t e ( i n p u t )
* STATE2
* STATE3
* STATE4
* output :
* STATE1 : finial s t a t e ( o u t p u t )
* STATE2
* STATE3
* STATE4
* changed :
* KEY
* TKEYP ( T 1 )
* /
_aesni_dec4 :
movaps ( K E Y P ) , K E Y # k e y
mov K E Y P , T K E Y P
pxor K E Y , S T A T E 1 # r o u n d 0
pxor K E Y , S T A T E 2
pxor K E Y , S T A T E 3
pxor K E Y , S T A T E 4
add $ 0 x30 , T K E Y P
cmp $ 2 4 , K L E N
jb . L 4 d e c12 8
lea 0 x20 ( T K E Y P ) , T K E Y P
je . L 4 d e c19 2
add $ 0 x20 , T K E Y P
movaps - 0 x60 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps - 0 x50 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
.align 4
.L4dec192 :
movaps - 0 x40 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps - 0 x30 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
.align 4
.L4dec128 :
movaps - 0 x20 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps - 0 x10 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x10 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x20 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x30 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x40 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x50 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x60 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDEC K E Y S T A T E 1
AESDEC K E Y S T A T E 2
AESDEC K E Y S T A T E 3
AESDEC K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
movaps 0 x70 ( T K E Y P ) , K E Y
2009-11-23 19:54:06 +08:00
AESDECLAST K E Y S T A T E 1 # l a s t r o u n d
AESDECLAST K E Y S T A T E 2
AESDECLAST K E Y S T A T E 3
AESDECLAST K E Y S T A T E 4
2009-01-18 16:28:34 +11:00
ret
/ *
* void a e s n i _ e c b _ e n c ( s t r u c t c r y p t o _ a e s _ c t x * c t x , c o n s t u 8 * d s t , u 8 * s r c ,
* size_ t l e n )
* /
ENTRY( a e s n i _ e c b _ e n c )
test L E N , L E N # c h e c k l e n g t h
jz . L e c b _ e n c _ r e t
mov 4 8 0 ( K E Y P ) , K L E N
cmp $ 1 6 , L E N
jb . L e c b _ e n c _ r e t
cmp $ 6 4 , L E N
jb . L e c b _ e n c _ l o o p1
.align 4
.Lecb_enc_loop4 :
movups ( I N P ) , S T A T E 1
movups 0 x10 ( I N P ) , S T A T E 2
movups 0 x20 ( I N P ) , S T A T E 3
movups 0 x30 ( I N P ) , S T A T E 4
call _ a e s n i _ e n c4
movups S T A T E 1 , ( O U T P )
movups S T A T E 2 , 0 x10 ( O U T P )
movups S T A T E 3 , 0 x20 ( O U T P )
movups S T A T E 4 , 0 x30 ( O U T P )
sub $ 6 4 , L E N
add $ 6 4 , I N P
add $ 6 4 , O U T P
cmp $ 6 4 , L E N
jge . L e c b _ e n c _ l o o p4
cmp $ 1 6 , L E N
jb . L e c b _ e n c _ r e t
.align 4
.Lecb_enc_loop1 :
movups ( I N P ) , S T A T E 1
call _ a e s n i _ e n c1
movups S T A T E 1 , ( O U T P )
sub $ 1 6 , L E N
add $ 1 6 , I N P
add $ 1 6 , O U T P
cmp $ 1 6 , L E N
jge . L e c b _ e n c _ l o o p1
.Lecb_enc_ret :
ret
/ *
* void a e s n i _ e c b _ d e c ( s t r u c t c r y p t o _ a e s _ c t x * c t x , c o n s t u 8 * d s t , u 8 * s r c ,
* size_ t l e n ) ;
* /
ENTRY( a e s n i _ e c b _ d e c )
test L E N , L E N
jz . L e c b _ d e c _ r e t
mov 4 8 0 ( K E Y P ) , K L E N
add $ 2 4 0 , K E Y P
cmp $ 1 6 , L E N
jb . L e c b _ d e c _ r e t
cmp $ 6 4 , L E N
jb . L e c b _ d e c _ l o o p1
.align 4
.Lecb_dec_loop4 :
movups ( I N P ) , S T A T E 1
movups 0 x10 ( I N P ) , S T A T E 2
movups 0 x20 ( I N P ) , S T A T E 3
movups 0 x30 ( I N P ) , S T A T E 4
call _ a e s n i _ d e c4
movups S T A T E 1 , ( O U T P )
movups S T A T E 2 , 0 x10 ( O U T P )
movups S T A T E 3 , 0 x20 ( O U T P )
movups S T A T E 4 , 0 x30 ( O U T P )
sub $ 6 4 , L E N
add $ 6 4 , I N P
add $ 6 4 , O U T P
cmp $ 6 4 , L E N
jge . L e c b _ d e c _ l o o p4
cmp $ 1 6 , L E N
jb . L e c b _ d e c _ r e t
.align 4
.Lecb_dec_loop1 :
movups ( I N P ) , S T A T E 1
call _ a e s n i _ d e c1
movups S T A T E 1 , ( O U T P )
sub $ 1 6 , L E N
add $ 1 6 , I N P
add $ 1 6 , O U T P
cmp $ 1 6 , L E N
jge . L e c b _ d e c _ l o o p1
.Lecb_dec_ret :
ret
/ *
* void a e s n i _ c b c _ e n c ( s t r u c t c r y p t o _ a e s _ c t x * c t x , c o n s t u 8 * d s t , u 8 * s r c ,
* size_ t l e n , u 8 * i v )
* /
ENTRY( a e s n i _ c b c _ e n c )
cmp $ 1 6 , L E N
jb . L c b c _ e n c _ r e t
mov 4 8 0 ( K E Y P ) , K L E N
movups ( I V P ) , S T A T E # l o a d i v a s i n i t i a l s t a t e
.align 4
.Lcbc_enc_loop :
movups ( I N P ) , I N # l o a d i n p u t
pxor I N , S T A T E
call _ a e s n i _ e n c1
movups S T A T E , ( O U T P ) # s t o r e o u t p u t
sub $ 1 6 , L E N
add $ 1 6 , I N P
add $ 1 6 , O U T P
cmp $ 1 6 , L E N
jge . L c b c _ e n c _ l o o p
movups S T A T E , ( I V P )
.Lcbc_enc_ret :
ret
/ *
* void a e s n i _ c b c _ d e c ( s t r u c t c r y p t o _ a e s _ c t x * c t x , c o n s t u 8 * d s t , u 8 * s r c ,
* size_ t l e n , u 8 * i v )
* /
ENTRY( a e s n i _ c b c _ d e c )
cmp $ 1 6 , L E N
2009-06-18 19:33:57 +08:00
jb . L c b c _ d e c _ j u s t _ r e t
2009-01-18 16:28:34 +11:00
mov 4 8 0 ( K E Y P ) , K L E N
add $ 2 4 0 , K E Y P
movups ( I V P ) , I V
cmp $ 6 4 , L E N
jb . L c b c _ d e c _ l o o p1
.align 4
.Lcbc_dec_loop4 :
movups ( I N P ) , I N 1
movaps I N 1 , S T A T E 1
movups 0 x10 ( I N P ) , I N 2
movaps I N 2 , S T A T E 2
movups 0 x20 ( I N P ) , I N 3
movaps I N 3 , S T A T E 3
movups 0 x30 ( I N P ) , I N 4
movaps I N 4 , S T A T E 4
call _ a e s n i _ d e c4
pxor I V , S T A T E 1
pxor I N 1 , S T A T E 2
pxor I N 2 , S T A T E 3
pxor I N 3 , S T A T E 4
movaps I N 4 , I V
movups S T A T E 1 , ( O U T P )
movups S T A T E 2 , 0 x10 ( O U T P )
movups S T A T E 3 , 0 x20 ( O U T P )
movups S T A T E 4 , 0 x30 ( O U T P )
sub $ 6 4 , L E N
add $ 6 4 , I N P
add $ 6 4 , O U T P
cmp $ 6 4 , L E N
jge . L c b c _ d e c _ l o o p4
cmp $ 1 6 , L E N
jb . L c b c _ d e c _ r e t
.align 4
.Lcbc_dec_loop1 :
movups ( I N P ) , I N
movaps I N , S T A T E
call _ a e s n i _ d e c1
pxor I V , S T A T E
movups S T A T E , ( O U T P )
movaps I N , I V
sub $ 1 6 , L E N
add $ 1 6 , I N P
add $ 1 6 , O U T P
cmp $ 1 6 , L E N
jge . L c b c _ d e c _ l o o p1
.Lcbc_dec_ret :
2009-06-18 19:33:57 +08:00
movups I V , ( I V P )
.Lcbc_dec_just_ret :
2009-01-18 16:28:34 +11:00
ret
2010-03-10 18:28:55 +08:00
.align 16
.Lbswap_mask :
.byte 1 5 , 1 4 , 1 3 , 1 2 , 1 1 , 1 0 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0
/ *
* _aesni_inc_init : internal A B I
* setup r e g i s t e r s u s e d b y _ a e s n i _ i n c
* input :
* IV
* output :
* CTR : = = IV, i n l i t t l e e n d i a n
* TCTR_LOW : = = lower q w o r d o f C T R
* INC : = = 1 , in l i t t l e e n d i a n
* BSWAP_ M A S K = = e n d i a n s w a p p i n g m a s k
* /
_aesni_inc_init :
movaps . L b s w a p _ m a s k , B S W A P _ M A S K
movaps I V , C T R
PSHUFB_ X M M B S W A P _ M A S K C T R
mov $ 1 , T C T R _ L O W
2010-03-13 16:28:42 +08:00
MOVQ_ R 6 4 _ X M M T C T R _ L O W I N C
MOVQ_ R 6 4 _ X M M C T R T C T R _ L O W
2010-03-10 18:28:55 +08:00
ret
/ *
* _aesni_inc : internal A B I
* Increase I V b y 1 , I V i s i n b i g e n d i a n
* input :
* IV
* CTR : = = IV, i n l i t t l e e n d i a n
* TCTR_LOW : = = lower q w o r d o f C T R
* INC : = = 1 , in l i t t l e e n d i a n
* BSWAP_ M A S K = = e n d i a n s w a p p i n g m a s k
* output :
* IV : Increase b y 1
* changed :
* CTR : = = output I V , i n l i t t l e e n d i a n
* TCTR_LOW : = = lower q w o r d o f C T R
* /
_aesni_inc :
paddq I N C , C T R
add $ 1 , T C T R _ L O W
jnc . L i n c _ l o w
pslldq $ 8 , I N C
paddq I N C , C T R
psrldq $ 8 , I N C
.Linc_low :
movaps C T R , I V
PSHUFB_ X M M B S W A P _ M A S K I V
ret
/ *
* void a e s n i _ c t r _ e n c ( s t r u c t c r y p t o _ a e s _ c t x * c t x , c o n s t u 8 * d s t , u 8 * s r c ,
* size_ t l e n , u 8 * i v )
* /
ENTRY( a e s n i _ c t r _ e n c )
cmp $ 1 6 , L E N
jb . L c t r _ e n c _ j u s t _ r e t
mov 4 8 0 ( K E Y P ) , K L E N
movups ( I V P ) , I V
call _ a e s n i _ i n c _ i n i t
cmp $ 6 4 , L E N
jb . L c t r _ e n c _ l o o p1
.align 4
.Lctr_enc_loop4 :
movaps I V , S T A T E 1
call _ a e s n i _ i n c
movups ( I N P ) , I N 1
movaps I V , S T A T E 2
call _ a e s n i _ i n c
movups 0 x10 ( I N P ) , I N 2
movaps I V , S T A T E 3
call _ a e s n i _ i n c
movups 0 x20 ( I N P ) , I N 3
movaps I V , S T A T E 4
call _ a e s n i _ i n c
movups 0 x30 ( I N P ) , I N 4
call _ a e s n i _ e n c4
pxor I N 1 , S T A T E 1
movups S T A T E 1 , ( O U T P )
pxor I N 2 , S T A T E 2
movups S T A T E 2 , 0 x10 ( O U T P )
pxor I N 3 , S T A T E 3
movups S T A T E 3 , 0 x20 ( O U T P )
pxor I N 4 , S T A T E 4
movups S T A T E 4 , 0 x30 ( O U T P )
sub $ 6 4 , L E N
add $ 6 4 , I N P
add $ 6 4 , O U T P
cmp $ 6 4 , L E N
jge . L c t r _ e n c _ l o o p4
cmp $ 1 6 , L E N
jb . L c t r _ e n c _ r e t
.align 4
.Lctr_enc_loop1 :
movaps I V , S T A T E
call _ a e s n i _ i n c
movups ( I N P ) , I N
call _ a e s n i _ e n c1
pxor I N , S T A T E
movups S T A T E , ( O U T P )
sub $ 1 6 , L E N
add $ 1 6 , I N P
add $ 1 6 , O U T P
cmp $ 1 6 , L E N
jge . L c t r _ e n c _ l o o p1
.Lctr_enc_ret :
movups I V , ( I V P )
.Lctr_enc_just_ret :
ret