2018-05-11 14:19:12 +02:00
/ *
* AVX2 i m p l e m e n t a t i o n o f M O R U S - 1 2 8 0
*
* Copyright ( c ) 2 0 1 7 - 2 0 1 8 O n d r e j M o s n a c e k < o m o s n a c e k @gmail.com>
* Copyright ( C ) 2 0 1 7 - 2 0 1 8 R e d H a t , I n c . A l l r i g h t s r e s e r v e d .
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify it
* under t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 a s p u b l i s h e d
* by t h e F r e e S o f t w a r e F o u n d a t i o n .
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / f r a m e . h >
# define S H U F F L E _ M A S K ( i 0 , i 1 , i 2 , i 3 ) \
( i0 | ( i 1 < < 2 ) | ( i 2 < < 4 ) | ( i 3 < < 6 ) )
# define M A S K 1 S H U F F L E _ M A S K ( 3 , 0 , 1 , 2 )
# define M A S K 2 S H U F F L E _ M A S K ( 2 , 3 , 0 , 1 )
# define M A S K 3 S H U F F L E _ M A S K ( 1 , 2 , 3 , 0 )
# define S T A T E 0 % y m m 0
# define S T A T E 0 _ L O W % x m m 0
# define S T A T E 1 % y m m 1
# define S T A T E 2 % y m m 2
# define S T A T E 3 % y m m 3
# define S T A T E 4 % y m m 4
# define K E Y % y m m 5
# define M S G % y m m 5
# define M S G _ L O W % x m m 5
# define T 0 % y m m 6
# define T 0 _ L O W % x m m 6
# define T 1 % y m m 7
.section .rodata .cst32 .morus1280_const , " aM" , @progbits, 32
.align 32
.Lmorus1280_const :
.byte 0 x0 0 , 0 x01 , 0 x01 , 0 x02 , 0 x03 , 0 x05 , 0 x08 , 0 x0 d
.byte 0 x1 5 , 0 x22 , 0 x37 , 0 x59 , 0 x90 , 0 x e 9 , 0 x79 , 0 x62
.byte 0 xdb, 0 x3 d , 0 x18 , 0 x55 , 0 x6 d , 0 x c2 , 0 x2 f , 0 x f1
.byte 0 x2 0 , 0 x11 , 0 x31 , 0 x42 , 0 x73 , 0 x b5 , 0 x28 , 0 x d d
.section .rodata .cst32 .morus1280_counter , " aM" , @progbits, 32
.align 32
.Lmorus1280_counter :
.byte 0 x0 0 , 0 x01 , 0 x02 , 0 x03 , 0 x04 , 0 x05 , 0 x06 , 0 x07
.byte 0 x0 8 , 0 x09 , 0 x0 a , 0 x0 b , 0 x0 c , 0 x0 d , 0 x0 e , 0 x0 f
.byte 0 x1 0 , 0 x11 , 0 x12 , 0 x13 , 0 x14 , 0 x15 , 0 x16 , 0 x17
.byte 0 x1 8 , 0 x19 , 0 x1 a , 0 x1 b , 0 x1 c , 0 x1 d , 0 x1 e , 0 x1 f
.text
.macro morus1280_round s0 , s1 , s2 , s3 , s4 , b , w
vpand \ s1 , \ s2 , T 0
vpxor T 0 , \ s0 , \ s0
vpxor \ s3 , \ s0 , \ s0
vpsllq $ \ b , \ s0 , T 0
vpsrlq $ ( 6 4 - \ b ) , \ s0 , \ s0
vpxor T 0 , \ s0 , \ s0
vpermq $ \ w , \ s3 , \ s3
.endm
/ *
* __morus1280_update : internal A B I
* input :
* STATE[ 0 - 4 ] - i n p u t s t a t e
* MSG - m e s s a g e b l o c k
* output :
* STATE[ 0 - 4 ] - o u t p u t s t a t e
* changed :
* T0
* /
__morus1280_update :
morus1 2 8 0 _ r o u n d S T A T E 0 , S T A T E 1 , S T A T E 2 , S T A T E 3 , S T A T E 4 , 1 3 , M A S K 1
vpxor M S G , S T A T E 1 , S T A T E 1
morus1 2 8 0 _ r o u n d S T A T E 1 , S T A T E 2 , S T A T E 3 , S T A T E 4 , S T A T E 0 , 4 6 , M A S K 2
vpxor M S G , S T A T E 2 , S T A T E 2
morus1 2 8 0 _ r o u n d S T A T E 2 , S T A T E 3 , S T A T E 4 , S T A T E 0 , S T A T E 1 , 3 8 , M A S K 3
vpxor M S G , S T A T E 3 , S T A T E 3
morus1 2 8 0 _ r o u n d S T A T E 3 , S T A T E 4 , S T A T E 0 , S T A T E 1 , S T A T E 2 , 7 , M A S K 2
vpxor M S G , S T A T E 4 , S T A T E 4
morus1 2 8 0 _ r o u n d S T A T E 4 , S T A T E 0 , S T A T E 1 , S T A T E 2 , S T A T E 3 , 4 , M A S K 1
ret
ENDPROC( _ _ m o r u s12 8 0 _ u p d a t e )
/ *
* __morus1280_update_zero : internal A B I
* input :
* STATE[ 0 - 4 ] - i n p u t s t a t e
* output :
* STATE[ 0 - 4 ] - o u t p u t s t a t e
* changed :
* T0
* /
__morus1280_update_zero :
morus1 2 8 0 _ r o u n d S T A T E 0 , S T A T E 1 , S T A T E 2 , S T A T E 3 , S T A T E 4 , 1 3 , M A S K 1
morus1 2 8 0 _ r o u n d S T A T E 1 , S T A T E 2 , S T A T E 3 , S T A T E 4 , S T A T E 0 , 4 6 , M A S K 2
morus1 2 8 0 _ r o u n d S T A T E 2 , S T A T E 3 , S T A T E 4 , S T A T E 0 , S T A T E 1 , 3 8 , M A S K 3
morus1 2 8 0 _ r o u n d S T A T E 3 , S T A T E 4 , S T A T E 0 , S T A T E 1 , S T A T E 2 , 7 , M A S K 2
morus1 2 8 0 _ r o u n d S T A T E 4 , S T A T E 0 , S T A T E 1 , S T A T E 2 , S T A T E 3 , 4 , M A S K 1
ret
ENDPROC( _ _ m o r u s12 8 0 _ u p d a t e _ z e r o )
/ *
* __load_partial : internal A B I
* input :
* % rsi - s r c
* % rcx - b y t e s
* output :
* MSG - m e s s a g e b l o c k
* changed :
* % r8
* % r9
* /
__load_partial :
2018-07-02 04:31:54 -06:00
xor % r9 d , % r9 d
2018-05-11 14:19:12 +02:00
vpxor M S G , M S G , M S G
mov % r c x , % r8
and $ 0 x1 , % r8
jz . L l d _ p a r t i a l _ 1
mov % r c x , % r8
and $ 0 x1 E , % r8
add % r s i , % r8
mov ( % r8 ) , % r9 b
.Lld_partial_1 :
mov % r c x , % r8
and $ 0 x2 , % r8
jz . L l d _ p a r t i a l _ 2
mov % r c x , % r8
and $ 0 x1 C , % r8
add % r s i , % r8
shl $ 1 6 , % r9
mov ( % r8 ) , % r9 w
.Lld_partial_2 :
mov % r c x , % r8
and $ 0 x4 , % r8
jz . L l d _ p a r t i a l _ 4
mov % r c x , % r8
and $ 0 x18 , % r8
add % r s i , % r8
shl $ 3 2 , % r9
mov ( % r8 ) , % r8 d
xor % r8 , % r9
.Lld_partial_4 :
movq % r9 , M S G _ L O W
mov % r c x , % r8
and $ 0 x8 , % r8
jz . L l d _ p a r t i a l _ 8
mov % r c x , % r8
and $ 0 x10 , % r8
add % r s i , % r8
pshufd $ M A S K 2 , M S G _ L O W , M S G _ L O W
pinsrq $ 0 , ( % r8 ) , M S G _ L O W
.Lld_partial_8 :
mov % r c x , % r8
and $ 0 x10 , % r8
jz . L l d _ p a r t i a l _ 1 6
vpermq $ M A S K 2 , M S G , M S G
movdqu ( % r s i ) , M S G _ L O W
.Lld_partial_16 :
ret
ENDPROC( _ _ l o a d _ p a r t i a l )
/ *
* __store_partial : internal A B I
* input :
* % rdx - d s t
* % rcx - b y t e s
* output :
* T0 - m e s s a g e b l o c k
* changed :
* % r8
* % r9
* % r1 0
* /
__store_partial :
mov % r c x , % r8
mov % r d x , % r9
cmp $ 1 6 , % r8
jl . L s t _ p a r t i a l _ 1 6
movdqu T 0 _ L O W , ( % r9 )
vpermq $ M A S K 2 , T 0 , T 0
sub $ 1 6 , % r8
add $ 1 6 , % r9
.Lst_partial_16 :
movq T 0 _ L O W , % r10
cmp $ 8 , % r8
jl . L s t _ p a r t i a l _ 8
mov % r10 , ( % r9 )
pextrq $ 1 , T 0 _ L O W , % r10
sub $ 8 , % r8
add $ 8 , % r9
.Lst_partial_8 :
cmp $ 4 , % r8
jl . L s t _ p a r t i a l _ 4
mov % r10 d , ( % r9 )
shr $ 3 2 , % r10
sub $ 4 , % r8
add $ 4 , % r9
.Lst_partial_4 :
cmp $ 2 , % r8
jl . L s t _ p a r t i a l _ 2
mov % r10 w , ( % r9 )
shr $ 1 6 , % r10
sub $ 2 , % r8
add $ 2 , % r9
.Lst_partial_2 :
cmp $ 1 , % r8
jl . L s t _ p a r t i a l _ 1
mov % r10 b , ( % r9 )
.Lst_partial_1 :
ret
ENDPROC( _ _ s t o r e _ p a r t i a l )
/ *
* void c r y p t o _ m o r u s12 8 0 _ a v x2 _ i n i t ( v o i d * s t a t e , c o n s t v o i d * k e y ,
* const v o i d * i v ) ;
* /
ENTRY( c r y p t o _ m o r u s12 8 0 _ a v x2 _ i n i t )
FRAME_ B E G I N
/* load IV: */
vpxor S T A T E 0 , S T A T E 0 , S T A T E 0
movdqu ( % r d x ) , S T A T E 0 _ L O W
/* load key: */
vmovdqu ( % r s i ) , K E Y
vmovdqa K E Y , S T A T E 1
/* load all ones: */
vpcmpeqd S T A T E 2 , S T A T E 2 , S T A T E 2
/* load all zeros: */
vpxor S T A T E 3 , S T A T E 3 , S T A T E 3
/* load the constant: */
vmovdqa . L m o r u s12 8 0 _ c o n s t , S T A T E 4
/* update 16 times with zero: */
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
call _ _ m o r u s12 8 0 _ u p d a t e _ z e r o
/* xor-in the key again after updates: */
vpxor K E Y , S T A T E 1 , S T A T E 1
/* store the state: */
vmovdqu S T A T E 0 , ( 0 * 3 2 ) ( % r d i )
vmovdqu S T A T E 1 , ( 1 * 3 2 ) ( % r d i )
vmovdqu S T A T E 2 , ( 2 * 3 2 ) ( % r d i )
vmovdqu S T A T E 3 , ( 3 * 3 2 ) ( % r d i )
vmovdqu S T A T E 4 , ( 4 * 3 2 ) ( % r d i )
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s12 8 0 _ a v x2 _ i n i t )
/ *
* void c r y p t o _ m o r u s12 8 0 _ a v x2 _ a d ( v o i d * s t a t e , c o n s t v o i d * d a t a ,
* unsigned i n t l e n g t h ) ;
* /
ENTRY( c r y p t o _ m o r u s12 8 0 _ a v x2 _ a d )
FRAME_ B E G I N
cmp $ 3 2 , % r d x
jb . L a d _ o u t
/* load the state: */
vmovdqu ( 0 * 3 2 ) ( % r d i ) , S T A T E 0
vmovdqu ( 1 * 3 2 ) ( % r d i ) , S T A T E 1
vmovdqu ( 2 * 3 2 ) ( % r d i ) , S T A T E 2
vmovdqu ( 3 * 3 2 ) ( % r d i ) , S T A T E 3
vmovdqu ( 4 * 3 2 ) ( % r d i ) , S T A T E 4
mov % r s i , % r8
and $ 0 x1 F , % r8
jnz . L a d _ u _ l o o p
.align 4
.Lad_a_loop :
vmovdqa ( % r s i ) , M S G
call _ _ m o r u s12 8 0 _ u p d a t e
sub $ 3 2 , % r d x
add $ 3 2 , % r s i
cmp $ 3 2 , % r d x
jge . L a d _ a _ l o o p
jmp . L a d _ c o n t
.align 4
.Lad_u_loop :
vmovdqu ( % r s i ) , M S G
call _ _ m o r u s12 8 0 _ u p d a t e
sub $ 3 2 , % r d x
add $ 3 2 , % r s i
cmp $ 3 2 , % r d x
jge . L a d _ u _ l o o p
.Lad_cont :
/* store the state: */
vmovdqu S T A T E 0 , ( 0 * 3 2 ) ( % r d i )
vmovdqu S T A T E 1 , ( 1 * 3 2 ) ( % r d i )
vmovdqu S T A T E 2 , ( 2 * 3 2 ) ( % r d i )
vmovdqu S T A T E 3 , ( 3 * 3 2 ) ( % r d i )
vmovdqu S T A T E 4 , ( 4 * 3 2 ) ( % r d i )
.Lad_out :
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s12 8 0 _ a v x2 _ a d )
/ *
* void c r y p t o _ m o r u s12 8 0 _ a v x2 _ e n c ( v o i d * s t a t e , c o n s t v o i d * s r c , v o i d * d s t ,
* unsigned i n t l e n g t h ) ;
* /
ENTRY( c r y p t o _ m o r u s12 8 0 _ a v x2 _ e n c )
FRAME_ B E G I N
cmp $ 3 2 , % r c x
jb . L e n c _ o u t
/* load the state: */
vmovdqu ( 0 * 3 2 ) ( % r d i ) , S T A T E 0
vmovdqu ( 1 * 3 2 ) ( % r d i ) , S T A T E 1
vmovdqu ( 2 * 3 2 ) ( % r d i ) , S T A T E 2
vmovdqu ( 3 * 3 2 ) ( % r d i ) , S T A T E 3
vmovdqu ( 4 * 3 2 ) ( % r d i ) , S T A T E 4
mov % r s i , % r8
or % r d x , % r8
and $ 0 x1 F , % r8
jnz . L e n c _ u _ l o o p
.align 4
.Lenc_a_loop :
vmovdqa ( % r s i ) , M S G
vmovdqa M S G , T 0
vpxor S T A T E 0 , T 0 , T 0
vpermq $ M A S K 3 , S T A T E 1 , T 1
vpxor T 1 , T 0 , T 0
vpand S T A T E 2 , S T A T E 3 , T 1
vpxor T 1 , T 0 , T 0
vmovdqa T 0 , ( % r d x )
call _ _ m o r u s12 8 0 _ u p d a t e
sub $ 3 2 , % r c x
add $ 3 2 , % r s i
add $ 3 2 , % r d x
cmp $ 3 2 , % r c x
jge . L e n c _ a _ l o o p
jmp . L e n c _ c o n t
.align 4
.Lenc_u_loop :
vmovdqu ( % r s i ) , M S G
vmovdqa M S G , T 0
vpxor S T A T E 0 , T 0 , T 0
vpermq $ M A S K 3 , S T A T E 1 , T 1
vpxor T 1 , T 0 , T 0
vpand S T A T E 2 , S T A T E 3 , T 1
vpxor T 1 , T 0 , T 0
vmovdqu T 0 , ( % r d x )
call _ _ m o r u s12 8 0 _ u p d a t e
sub $ 3 2 , % r c x
add $ 3 2 , % r s i
add $ 3 2 , % r d x
cmp $ 3 2 , % r c x
jge . L e n c _ u _ l o o p
.Lenc_cont :
/* store the state: */
vmovdqu S T A T E 0 , ( 0 * 3 2 ) ( % r d i )
vmovdqu S T A T E 1 , ( 1 * 3 2 ) ( % r d i )
vmovdqu S T A T E 2 , ( 2 * 3 2 ) ( % r d i )
vmovdqu S T A T E 3 , ( 3 * 3 2 ) ( % r d i )
vmovdqu S T A T E 4 , ( 4 * 3 2 ) ( % r d i )
.Lenc_out :
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s12 8 0 _ a v x2 _ e n c )
/ *
* void c r y p t o _ m o r u s12 8 0 _ a v x2 _ e n c _ t a i l ( v o i d * s t a t e , c o n s t v o i d * s r c , v o i d * d s t ,
* unsigned i n t l e n g t h ) ;
* /
ENTRY( c r y p t o _ m o r u s12 8 0 _ a v x2 _ e n c _ t a i l )
FRAME_ B E G I N
/* load the state: */
vmovdqu ( 0 * 3 2 ) ( % r d i ) , S T A T E 0
vmovdqu ( 1 * 3 2 ) ( % r d i ) , S T A T E 1
vmovdqu ( 2 * 3 2 ) ( % r d i ) , S T A T E 2
vmovdqu ( 3 * 3 2 ) ( % r d i ) , S T A T E 3
vmovdqu ( 4 * 3 2 ) ( % r d i ) , S T A T E 4
/* encrypt message: */
call _ _ l o a d _ p a r t i a l
vmovdqa M S G , T 0
vpxor S T A T E 0 , T 0 , T 0
vpermq $ M A S K 3 , S T A T E 1 , T 1
vpxor T 1 , T 0 , T 0
vpand S T A T E 2 , S T A T E 3 , T 1
vpxor T 1 , T 0 , T 0
call _ _ s t o r e _ p a r t i a l
call _ _ m o r u s12 8 0 _ u p d a t e
/* store the state: */
vmovdqu S T A T E 0 , ( 0 * 3 2 ) ( % r d i )
vmovdqu S T A T E 1 , ( 1 * 3 2 ) ( % r d i )
vmovdqu S T A T E 2 , ( 2 * 3 2 ) ( % r d i )
vmovdqu S T A T E 3 , ( 3 * 3 2 ) ( % r d i )
vmovdqu S T A T E 4 , ( 4 * 3 2 ) ( % r d i )
FRAME_ E N D
2018-06-23 12:36:22 +02:00
ret
2018-05-11 14:19:12 +02:00
ENDPROC( c r y p t o _ m o r u s12 8 0 _ a v x2 _ e n c _ t a i l )
/ *
* void c r y p t o _ m o r u s12 8 0 _ a v x2 _ d e c ( v o i d * s t a t e , c o n s t v o i d * s r c , v o i d * d s t ,
* unsigned i n t l e n g t h ) ;
* /
ENTRY( c r y p t o _ m o r u s12 8 0 _ a v x2 _ d e c )
FRAME_ B E G I N
cmp $ 3 2 , % r c x
jb . L d e c _ o u t
/* load the state: */
vmovdqu ( 0 * 3 2 ) ( % r d i ) , S T A T E 0
vmovdqu ( 1 * 3 2 ) ( % r d i ) , S T A T E 1
vmovdqu ( 2 * 3 2 ) ( % r d i ) , S T A T E 2
vmovdqu ( 3 * 3 2 ) ( % r d i ) , S T A T E 3
vmovdqu ( 4 * 3 2 ) ( % r d i ) , S T A T E 4
mov % r s i , % r8
or % r d x , % r8
and $ 0 x1 F , % r8
jnz . L d e c _ u _ l o o p
.align 4
.Ldec_a_loop :
vmovdqa ( % r s i ) , M S G
vpxor S T A T E 0 , M S G , M S G
vpermq $ M A S K 3 , S T A T E 1 , T 0
vpxor T 0 , M S G , M S G
vpand S T A T E 2 , S T A T E 3 , T 0
vpxor T 0 , M S G , M S G
vmovdqa M S G , ( % r d x )
call _ _ m o r u s12 8 0 _ u p d a t e
sub $ 3 2 , % r c x
add $ 3 2 , % r s i
add $ 3 2 , % r d x
cmp $ 3 2 , % r c x
jge . L d e c _ a _ l o o p
jmp . L d e c _ c o n t
.align 4
.Ldec_u_loop :
vmovdqu ( % r s i ) , M S G
vpxor S T A T E 0 , M S G , M S G
vpermq $ M A S K 3 , S T A T E 1 , T 0
vpxor T 0 , M S G , M S G
vpand S T A T E 2 , S T A T E 3 , T 0
vpxor T 0 , M S G , M S G
vmovdqu M S G , ( % r d x )
call _ _ m o r u s12 8 0 _ u p d a t e
sub $ 3 2 , % r c x
add $ 3 2 , % r s i
add $ 3 2 , % r d x
cmp $ 3 2 , % r c x
jge . L d e c _ u _ l o o p
.Ldec_cont :
/* store the state: */
vmovdqu S T A T E 0 , ( 0 * 3 2 ) ( % r d i )
vmovdqu S T A T E 1 , ( 1 * 3 2 ) ( % r d i )
vmovdqu S T A T E 2 , ( 2 * 3 2 ) ( % r d i )
vmovdqu S T A T E 3 , ( 3 * 3 2 ) ( % r d i )
vmovdqu S T A T E 4 , ( 4 * 3 2 ) ( % r d i )
.Ldec_out :
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s12 8 0 _ a v x2 _ d e c )
/ *
* void c r y p t o _ m o r u s12 8 0 _ a v x2 _ d e c _ t a i l ( v o i d * s t a t e , c o n s t v o i d * s r c , v o i d * d s t ,
* unsigned i n t l e n g t h ) ;
* /
ENTRY( c r y p t o _ m o r u s12 8 0 _ a v x2 _ d e c _ t a i l )
FRAME_ B E G I N
/* load the state: */
vmovdqu ( 0 * 3 2 ) ( % r d i ) , S T A T E 0
vmovdqu ( 1 * 3 2 ) ( % r d i ) , S T A T E 1
vmovdqu ( 2 * 3 2 ) ( % r d i ) , S T A T E 2
vmovdqu ( 3 * 3 2 ) ( % r d i ) , S T A T E 3
vmovdqu ( 4 * 3 2 ) ( % r d i ) , S T A T E 4
/* decrypt message: */
call _ _ l o a d _ p a r t i a l
vpxor S T A T E 0 , M S G , M S G
vpermq $ M A S K 3 , S T A T E 1 , T 0
vpxor T 0 , M S G , M S G
vpand S T A T E 2 , S T A T E 3 , T 0
vpxor T 0 , M S G , M S G
vmovdqa M S G , T 0
call _ _ s t o r e _ p a r t i a l
/* mask with byte count: */
movq % r c x , T 0 _ L O W
vpbroadcastb T 0 _ L O W , T 0
vmovdqa . L m o r u s12 8 0 _ c o u n t e r , T 1
vpcmpgtb T 1 , T 0 , T 0
vpand T 0 , M S G , M S G
call _ _ m o r u s12 8 0 _ u p d a t e
/* store the state: */
vmovdqu S T A T E 0 , ( 0 * 3 2 ) ( % r d i )
vmovdqu S T A T E 1 , ( 1 * 3 2 ) ( % r d i )
vmovdqu S T A T E 2 , ( 2 * 3 2 ) ( % r d i )
vmovdqu S T A T E 3 , ( 3 * 3 2 ) ( % r d i )
vmovdqu S T A T E 4 , ( 4 * 3 2 ) ( % r d i )
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s12 8 0 _ a v x2 _ d e c _ t a i l )
/ *
* void c r y p t o _ m o r u s12 8 0 _ a v x2 _ f i n a l ( v o i d * s t a t e , v o i d * t a g _ x o r ,
* u6 4 a s s o c l e n , u 6 4 c r y p t l e n ) ;
* /
ENTRY( c r y p t o _ m o r u s12 8 0 _ a v x2 _ f i n a l )
FRAME_ B E G I N
/* load the state: */
vmovdqu ( 0 * 3 2 ) ( % r d i ) , S T A T E 0
vmovdqu ( 1 * 3 2 ) ( % r d i ) , S T A T E 1
vmovdqu ( 2 * 3 2 ) ( % r d i ) , S T A T E 2
vmovdqu ( 3 * 3 2 ) ( % r d i ) , S T A T E 3
vmovdqu ( 4 * 3 2 ) ( % r d i ) , S T A T E 4
/* xor state[0] into state[4]: */
vpxor S T A T E 0 , S T A T E 4 , S T A T E 4
/* prepare length block: */
vpxor M S G , M S G , M S G
vpinsrq $ 0 , % r d x , M S G _ L O W , M S G _ L O W
vpinsrq $ 1 , % r c x , M S G _ L O W , M S G _ L O W
vpsllq $ 3 , M S G , M S G / * m u l t i p l y b y 8 ( t o g e t b i t c o u n t ) * /
/* update state: */
call _ _ m o r u s12 8 0 _ u p d a t e
call _ _ m o r u s12 8 0 _ u p d a t e
call _ _ m o r u s12 8 0 _ u p d a t e
call _ _ m o r u s12 8 0 _ u p d a t e
call _ _ m o r u s12 8 0 _ u p d a t e
call _ _ m o r u s12 8 0 _ u p d a t e
call _ _ m o r u s12 8 0 _ u p d a t e
call _ _ m o r u s12 8 0 _ u p d a t e
call _ _ m o r u s12 8 0 _ u p d a t e
call _ _ m o r u s12 8 0 _ u p d a t e
/* xor tag: */
vmovdqu ( % r s i ) , M S G
vpxor S T A T E 0 , M S G , M S G
vpermq $ M A S K 3 , S T A T E 1 , T 0
vpxor T 0 , M S G , M S G
vpand S T A T E 2 , S T A T E 3 , T 0
vpxor T 0 , M S G , M S G
vmovdqu M S G , ( % r s i )
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s12 8 0 _ a v x2 _ f i n a l )