2019-06-04 10:11:33 +02:00
/* SPDX-License-Identifier: GPL-2.0-only */
2018-05-11 14:19:12 +02:00
/ *
* SSE2 i m p l e m e n t a t i o n o f M O R U S - 6 4 0
*
* Copyright ( c ) 2 0 1 7 - 2 0 1 8 O n d r e j M o s n a c e k < o m o s n a c e k @gmail.com>
* Copyright ( C ) 2 0 1 7 - 2 0 1 8 R e d H a t , I n c . A l l r i g h t s r e s e r v e d .
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / f r a m e . h >
# define S H U F F L E _ M A S K ( i 0 , i 1 , i 2 , i 3 ) \
( i0 | ( i 1 < < 2 ) | ( i 2 < < 4 ) | ( i 3 < < 6 ) )
# define M A S K 1 S H U F F L E _ M A S K ( 3 , 0 , 1 , 2 )
# define M A S K 2 S H U F F L E _ M A S K ( 2 , 3 , 0 , 1 )
# define M A S K 3 S H U F F L E _ M A S K ( 1 , 2 , 3 , 0 )
# define S T A T E 0 % x m m 0
# define S T A T E 1 % x m m 1
# define S T A T E 2 % x m m 2
# define S T A T E 3 % x m m 3
# define S T A T E 4 % x m m 4
# define K E Y % x m m 5
# define M S G % x m m 5
# define T 0 % x m m 6
# define T 1 % x m m 7
.section .rodata .cst16 .morus640_const , " aM" , @progbits, 32
.align 16
.Lmorus640_const_0 :
.byte 0 x0 0 , 0 x01 , 0 x01 , 0 x02 , 0 x03 , 0 x05 , 0 x08 , 0 x0 d
.byte 0 x1 5 , 0 x22 , 0 x37 , 0 x59 , 0 x90 , 0 x e 9 , 0 x79 , 0 x62
.Lmorus640_const_1 :
.byte 0 xdb, 0 x3 d , 0 x18 , 0 x55 , 0 x6 d , 0 x c2 , 0 x2 f , 0 x f1
.byte 0 x2 0 , 0 x11 , 0 x31 , 0 x42 , 0 x73 , 0 x b5 , 0 x28 , 0 x d d
.section .rodata .cst16 .morus640_counter , " aM" , @progbits, 16
.align 16
.Lmorus640_counter :
.byte 0 x0 0 , 0 x01 , 0 x02 , 0 x03 , 0 x04 , 0 x05 , 0 x06 , 0 x07
.byte 0 x0 8 , 0 x09 , 0 x0 a , 0 x0 b , 0 x0 c , 0 x0 d , 0 x0 e , 0 x0 f
.text
.macro morus640_round s0 , s1 , s2 , s3 , s4 , b , w
movdqa \ s1 , T 0
pand \ s2 , T 0
pxor T 0 , \ s0
pxor \ s3 , \ s0
movdqa \ s0 , T 0
pslld $ \ b , T 0
psrld $ ( 3 2 - \ b ) , \ s0
pxor T 0 , \ s0
pshufd $ \ w , \ s3 , \ s3
.endm
/ *
* __morus640_update : internal A B I
* input :
* STATE[ 0 - 4 ] - i n p u t s t a t e
* MSG - m e s s a g e b l o c k
* output :
* STATE[ 0 - 4 ] - o u t p u t s t a t e
* changed :
* T0
* /
__morus640_update :
morus6 4 0 _ r o u n d S T A T E 0 , S T A T E 1 , S T A T E 2 , S T A T E 3 , S T A T E 4 , 5 , M A S K 1
pxor M S G , S T A T E 1
morus6 4 0 _ r o u n d S T A T E 1 , S T A T E 2 , S T A T E 3 , S T A T E 4 , S T A T E 0 , 3 1 , M A S K 2
pxor M S G , S T A T E 2
morus6 4 0 _ r o u n d S T A T E 2 , S T A T E 3 , S T A T E 4 , S T A T E 0 , S T A T E 1 , 7 , M A S K 3
pxor M S G , S T A T E 3
morus6 4 0 _ r o u n d S T A T E 3 , S T A T E 4 , S T A T E 0 , S T A T E 1 , S T A T E 2 , 2 2 , M A S K 2
pxor M S G , S T A T E 4
morus6 4 0 _ r o u n d S T A T E 4 , S T A T E 0 , S T A T E 1 , S T A T E 2 , S T A T E 3 , 1 3 , M A S K 1
ret
ENDPROC( _ _ m o r u s64 0 _ u p d a t e )
/ *
* __morus640_update_zero : internal A B I
* input :
* STATE[ 0 - 4 ] - i n p u t s t a t e
* output :
* STATE[ 0 - 4 ] - o u t p u t s t a t e
* changed :
* T0
* /
__morus640_update_zero :
morus6 4 0 _ r o u n d S T A T E 0 , S T A T E 1 , S T A T E 2 , S T A T E 3 , S T A T E 4 , 5 , M A S K 1
morus6 4 0 _ r o u n d S T A T E 1 , S T A T E 2 , S T A T E 3 , S T A T E 4 , S T A T E 0 , 3 1 , M A S K 2
morus6 4 0 _ r o u n d S T A T E 2 , S T A T E 3 , S T A T E 4 , S T A T E 0 , S T A T E 1 , 7 , M A S K 3
morus6 4 0 _ r o u n d S T A T E 3 , S T A T E 4 , S T A T E 0 , S T A T E 1 , S T A T E 2 , 2 2 , M A S K 2
morus6 4 0 _ r o u n d S T A T E 4 , S T A T E 0 , S T A T E 1 , S T A T E 2 , S T A T E 3 , 1 3 , M A S K 1
ret
ENDPROC( _ _ m o r u s64 0 _ u p d a t e _ z e r o )
/ *
* __load_partial : internal A B I
* input :
* % rsi - s r c
* % rcx - b y t e s
* output :
* MSG - m e s s a g e b l o c k
* changed :
* T0
* % r8
* % r9
* /
__load_partial :
2018-07-02 04:31:54 -06:00
xor % r9 d , % r9 d
2018-05-11 14:19:12 +02:00
pxor M S G , M S G
mov % r c x , % r8
and $ 0 x1 , % r8
jz . L l d _ p a r t i a l _ 1
mov % r c x , % r8
and $ 0 x1 E , % r8
add % r s i , % r8
mov ( % r8 ) , % r9 b
.Lld_partial_1 :
mov % r c x , % r8
and $ 0 x2 , % r8
jz . L l d _ p a r t i a l _ 2
mov % r c x , % r8
and $ 0 x1 C , % r8
add % r s i , % r8
shl $ 1 6 , % r9
mov ( % r8 ) , % r9 w
.Lld_partial_2 :
mov % r c x , % r8
and $ 0 x4 , % r8
jz . L l d _ p a r t i a l _ 4
mov % r c x , % r8
and $ 0 x18 , % r8
add % r s i , % r8
shl $ 3 2 , % r9
mov ( % r8 ) , % r8 d
xor % r8 , % r9
.Lld_partial_4 :
movq % r9 , M S G
mov % r c x , % r8
and $ 0 x8 , % r8
jz . L l d _ p a r t i a l _ 8
mov % r c x , % r8
and $ 0 x10 , % r8
add % r s i , % r8
pslldq $ 8 , M S G
movq ( % r8 ) , T 0
pxor T 0 , M S G
.Lld_partial_8 :
ret
ENDPROC( _ _ l o a d _ p a r t i a l )
/ *
* __store_partial : internal A B I
* input :
* % rdx - d s t
* % rcx - b y t e s
* output :
* T0 - m e s s a g e b l o c k
* changed :
* % r8
* % r9
* % r1 0
* /
__store_partial :
mov % r c x , % r8
mov % r d x , % r9
movq T 0 , % r10
cmp $ 8 , % r8
jl . L s t _ p a r t i a l _ 8
mov % r10 , ( % r9 )
psrldq $ 8 , T 0
movq T 0 , % r10
sub $ 8 , % r8
add $ 8 , % r9
.Lst_partial_8 :
cmp $ 4 , % r8
jl . L s t _ p a r t i a l _ 4
mov % r10 d , ( % r9 )
shr $ 3 2 , % r10
sub $ 4 , % r8
add $ 4 , % r9
.Lst_partial_4 :
cmp $ 2 , % r8
jl . L s t _ p a r t i a l _ 2
mov % r10 w , ( % r9 )
shr $ 1 6 , % r10
sub $ 2 , % r8
add $ 2 , % r9
.Lst_partial_2 :
cmp $ 1 , % r8
jl . L s t _ p a r t i a l _ 1
mov % r10 b , ( % r9 )
.Lst_partial_1 :
ret
ENDPROC( _ _ s t o r e _ p a r t i a l )
/ *
* void c r y p t o _ m o r u s64 0 _ s s e 2 _ i n i t ( v o i d * s t a t e , c o n s t v o i d * k e y , c o n s t v o i d * i v ) ;
* /
ENTRY( c r y p t o _ m o r u s64 0 _ s s e 2 _ i n i t )
FRAME_ B E G I N
/* load IV: */
movdqu ( % r d x ) , S T A T E 0
/* load key: */
movdqu ( % r s i ) , K E Y
movdqa K E Y , S T A T E 1
/* load all ones: */
pcmpeqd S T A T E 2 , S T A T E 2
/* load the constants: */
movdqa . L m o r u s64 0 _ c o n s t _ 0 , S T A T E 3
movdqa . L m o r u s64 0 _ c o n s t _ 1 , S T A T E 4
/* update 16 times with zero: */
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
call _ _ m o r u s64 0 _ u p d a t e _ z e r o
/* xor-in the key again after updates: */
pxor K E Y , S T A T E 1
/* store the state: */
movdqu S T A T E 0 , ( 0 * 1 6 ) ( % r d i )
movdqu S T A T E 1 , ( 1 * 1 6 ) ( % r d i )
movdqu S T A T E 2 , ( 2 * 1 6 ) ( % r d i )
movdqu S T A T E 3 , ( 3 * 1 6 ) ( % r d i )
movdqu S T A T E 4 , ( 4 * 1 6 ) ( % r d i )
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s64 0 _ s s e 2 _ i n i t )
/ *
* void c r y p t o _ m o r u s64 0 _ s s e 2 _ a d ( v o i d * s t a t e , c o n s t v o i d * d a t a ,
* unsigned i n t l e n g t h ) ;
* /
ENTRY( c r y p t o _ m o r u s64 0 _ s s e 2 _ a d )
FRAME_ B E G I N
cmp $ 1 6 , % r d x
jb . L a d _ o u t
/* load the state: */
movdqu ( 0 * 1 6 ) ( % r d i ) , S T A T E 0
movdqu ( 1 * 1 6 ) ( % r d i ) , S T A T E 1
movdqu ( 2 * 1 6 ) ( % r d i ) , S T A T E 2
movdqu ( 3 * 1 6 ) ( % r d i ) , S T A T E 3
movdqu ( 4 * 1 6 ) ( % r d i ) , S T A T E 4
mov % r s i , % r8
and $ 0 x F , % r8
jnz . L a d _ u _ l o o p
.align 4
.Lad_a_loop :
movdqa ( % r s i ) , M S G
call _ _ m o r u s64 0 _ u p d a t e
sub $ 1 6 , % r d x
add $ 1 6 , % r s i
cmp $ 1 6 , % r d x
jge . L a d _ a _ l o o p
jmp . L a d _ c o n t
.align 4
.Lad_u_loop :
movdqu ( % r s i ) , M S G
call _ _ m o r u s64 0 _ u p d a t e
sub $ 1 6 , % r d x
add $ 1 6 , % r s i
cmp $ 1 6 , % r d x
jge . L a d _ u _ l o o p
.Lad_cont :
/* store the state: */
movdqu S T A T E 0 , ( 0 * 1 6 ) ( % r d i )
movdqu S T A T E 1 , ( 1 * 1 6 ) ( % r d i )
movdqu S T A T E 2 , ( 2 * 1 6 ) ( % r d i )
movdqu S T A T E 3 , ( 3 * 1 6 ) ( % r d i )
movdqu S T A T E 4 , ( 4 * 1 6 ) ( % r d i )
.Lad_out :
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s64 0 _ s s e 2 _ a d )
/ *
* void c r y p t o _ m o r u s64 0 _ s s e 2 _ e n c ( v o i d * s t a t e , c o n s t v o i d * s r c , v o i d * d s t ,
* unsigned i n t l e n g t h ) ;
* /
ENTRY( c r y p t o _ m o r u s64 0 _ s s e 2 _ e n c )
FRAME_ B E G I N
cmp $ 1 6 , % r c x
jb . L e n c _ o u t
/* load the state: */
movdqu ( 0 * 1 6 ) ( % r d i ) , S T A T E 0
movdqu ( 1 * 1 6 ) ( % r d i ) , S T A T E 1
movdqu ( 2 * 1 6 ) ( % r d i ) , S T A T E 2
movdqu ( 3 * 1 6 ) ( % r d i ) , S T A T E 3
movdqu ( 4 * 1 6 ) ( % r d i ) , S T A T E 4
mov % r s i , % r8
or % r d x , % r8
and $ 0 x F , % r8
jnz . L e n c _ u _ l o o p
.align 4
.Lenc_a_loop :
movdqa ( % r s i ) , M S G
movdqa M S G , T 0
pxor S T A T E 0 , T 0
pshufd $ M A S K 3 , S T A T E 1 , T 1
pxor T 1 , T 0
movdqa S T A T E 2 , T 1
pand S T A T E 3 , T 1
pxor T 1 , T 0
movdqa T 0 , ( % r d x )
call _ _ m o r u s64 0 _ u p d a t e
sub $ 1 6 , % r c x
add $ 1 6 , % r s i
add $ 1 6 , % r d x
cmp $ 1 6 , % r c x
jge . L e n c _ a _ l o o p
jmp . L e n c _ c o n t
.align 4
.Lenc_u_loop :
movdqu ( % r s i ) , M S G
movdqa M S G , T 0
pxor S T A T E 0 , T 0
pshufd $ M A S K 3 , S T A T E 1 , T 1
pxor T 1 , T 0
movdqa S T A T E 2 , T 1
pand S T A T E 3 , T 1
pxor T 1 , T 0
movdqu T 0 , ( % r d x )
call _ _ m o r u s64 0 _ u p d a t e
sub $ 1 6 , % r c x
add $ 1 6 , % r s i
add $ 1 6 , % r d x
cmp $ 1 6 , % r c x
jge . L e n c _ u _ l o o p
.Lenc_cont :
/* store the state: */
movdqu S T A T E 0 , ( 0 * 1 6 ) ( % r d i )
movdqu S T A T E 1 , ( 1 * 1 6 ) ( % r d i )
movdqu S T A T E 2 , ( 2 * 1 6 ) ( % r d i )
movdqu S T A T E 3 , ( 3 * 1 6 ) ( % r d i )
movdqu S T A T E 4 , ( 4 * 1 6 ) ( % r d i )
.Lenc_out :
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s64 0 _ s s e 2 _ e n c )
/ *
* void c r y p t o _ m o r u s64 0 _ s s e 2 _ e n c _ t a i l ( v o i d * s t a t e , c o n s t v o i d * s r c , v o i d * d s t ,
* unsigned i n t l e n g t h ) ;
* /
ENTRY( c r y p t o _ m o r u s64 0 _ s s e 2 _ e n c _ t a i l )
FRAME_ B E G I N
/* load the state: */
movdqu ( 0 * 1 6 ) ( % r d i ) , S T A T E 0
movdqu ( 1 * 1 6 ) ( % r d i ) , S T A T E 1
movdqu ( 2 * 1 6 ) ( % r d i ) , S T A T E 2
movdqu ( 3 * 1 6 ) ( % r d i ) , S T A T E 3
movdqu ( 4 * 1 6 ) ( % r d i ) , S T A T E 4
/* encrypt message: */
call _ _ l o a d _ p a r t i a l
movdqa M S G , T 0
pxor S T A T E 0 , T 0
pshufd $ M A S K 3 , S T A T E 1 , T 1
pxor T 1 , T 0
movdqa S T A T E 2 , T 1
pand S T A T E 3 , T 1
pxor T 1 , T 0
call _ _ s t o r e _ p a r t i a l
call _ _ m o r u s64 0 _ u p d a t e
/* store the state: */
movdqu S T A T E 0 , ( 0 * 1 6 ) ( % r d i )
movdqu S T A T E 1 , ( 1 * 1 6 ) ( % r d i )
movdqu S T A T E 2 , ( 2 * 1 6 ) ( % r d i )
movdqu S T A T E 3 , ( 3 * 1 6 ) ( % r d i )
movdqu S T A T E 4 , ( 4 * 1 6 ) ( % r d i )
FRAME_ E N D
2018-06-23 12:36:22 +02:00
ret
2018-05-11 14:19:12 +02:00
ENDPROC( c r y p t o _ m o r u s64 0 _ s s e 2 _ e n c _ t a i l )
/ *
* void c r y p t o _ m o r u s64 0 _ s s e 2 _ d e c ( v o i d * s t a t e , c o n s t v o i d * s r c , v o i d * d s t ,
* unsigned i n t l e n g t h ) ;
* /
ENTRY( c r y p t o _ m o r u s64 0 _ s s e 2 _ d e c )
FRAME_ B E G I N
cmp $ 1 6 , % r c x
jb . L d e c _ o u t
/* load the state: */
movdqu ( 0 * 1 6 ) ( % r d i ) , S T A T E 0
movdqu ( 1 * 1 6 ) ( % r d i ) , S T A T E 1
movdqu ( 2 * 1 6 ) ( % r d i ) , S T A T E 2
movdqu ( 3 * 1 6 ) ( % r d i ) , S T A T E 3
movdqu ( 4 * 1 6 ) ( % r d i ) , S T A T E 4
mov % r s i , % r8
or % r d x , % r8
and $ 0 x F , % r8
jnz . L d e c _ u _ l o o p
.align 4
.Ldec_a_loop :
movdqa ( % r s i ) , M S G
pxor S T A T E 0 , M S G
pshufd $ M A S K 3 , S T A T E 1 , T 0
pxor T 0 , M S G
movdqa S T A T E 2 , T 0
pand S T A T E 3 , T 0
pxor T 0 , M S G
movdqa M S G , ( % r d x )
call _ _ m o r u s64 0 _ u p d a t e
sub $ 1 6 , % r c x
add $ 1 6 , % r s i
add $ 1 6 , % r d x
cmp $ 1 6 , % r c x
jge . L d e c _ a _ l o o p
jmp . L d e c _ c o n t
.align 4
.Ldec_u_loop :
movdqu ( % r s i ) , M S G
pxor S T A T E 0 , M S G
pshufd $ M A S K 3 , S T A T E 1 , T 0
pxor T 0 , M S G
movdqa S T A T E 2 , T 0
pand S T A T E 3 , T 0
pxor T 0 , M S G
movdqu M S G , ( % r d x )
call _ _ m o r u s64 0 _ u p d a t e
sub $ 1 6 , % r c x
add $ 1 6 , % r s i
add $ 1 6 , % r d x
cmp $ 1 6 , % r c x
jge . L d e c _ u _ l o o p
.Ldec_cont :
/* store the state: */
movdqu S T A T E 0 , ( 0 * 1 6 ) ( % r d i )
movdqu S T A T E 1 , ( 1 * 1 6 ) ( % r d i )
movdqu S T A T E 2 , ( 2 * 1 6 ) ( % r d i )
movdqu S T A T E 3 , ( 3 * 1 6 ) ( % r d i )
movdqu S T A T E 4 , ( 4 * 1 6 ) ( % r d i )
.Ldec_out :
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s64 0 _ s s e 2 _ d e c )
/ *
* void c r y p t o _ m o r u s64 0 _ s s e 2 _ d e c _ t a i l ( v o i d * s t a t e , c o n s t v o i d * s r c , v o i d * d s t ,
* unsigned i n t l e n g t h ) ;
* /
ENTRY( c r y p t o _ m o r u s64 0 _ s s e 2 _ d e c _ t a i l )
FRAME_ B E G I N
/* load the state: */
movdqu ( 0 * 1 6 ) ( % r d i ) , S T A T E 0
movdqu ( 1 * 1 6 ) ( % r d i ) , S T A T E 1
movdqu ( 2 * 1 6 ) ( % r d i ) , S T A T E 2
movdqu ( 3 * 1 6 ) ( % r d i ) , S T A T E 3
movdqu ( 4 * 1 6 ) ( % r d i ) , S T A T E 4
/* decrypt message: */
call _ _ l o a d _ p a r t i a l
pxor S T A T E 0 , M S G
pshufd $ M A S K 3 , S T A T E 1 , T 0
pxor T 0 , M S G
movdqa S T A T E 2 , T 0
pand S T A T E 3 , T 0
pxor T 0 , M S G
movdqa M S G , T 0
call _ _ s t o r e _ p a r t i a l
/* mask with byte count: */
movq % r c x , T 0
punpcklbw T 0 , T 0
punpcklbw T 0 , T 0
punpcklbw T 0 , T 0
punpcklbw T 0 , T 0
movdqa . L m o r u s64 0 _ c o u n t e r , T 1
pcmpgtb T 1 , T 0
pand T 0 , M S G
call _ _ m o r u s64 0 _ u p d a t e
/* store the state: */
movdqu S T A T E 0 , ( 0 * 1 6 ) ( % r d i )
movdqu S T A T E 1 , ( 1 * 1 6 ) ( % r d i )
movdqu S T A T E 2 , ( 2 * 1 6 ) ( % r d i )
movdqu S T A T E 3 , ( 3 * 1 6 ) ( % r d i )
movdqu S T A T E 4 , ( 4 * 1 6 ) ( % r d i )
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s64 0 _ s s e 2 _ d e c _ t a i l )
/ *
* void c r y p t o _ m o r u s64 0 _ s s e 2 _ f i n a l ( v o i d * s t a t e , v o i d * t a g _ x o r ,
* u6 4 a s s o c l e n , u 6 4 c r y p t l e n ) ;
* /
ENTRY( c r y p t o _ m o r u s64 0 _ s s e 2 _ f i n a l )
FRAME_ B E G I N
/* load the state: */
movdqu ( 0 * 1 6 ) ( % r d i ) , S T A T E 0
movdqu ( 1 * 1 6 ) ( % r d i ) , S T A T E 1
movdqu ( 2 * 1 6 ) ( % r d i ) , S T A T E 2
movdqu ( 3 * 1 6 ) ( % r d i ) , S T A T E 3
movdqu ( 4 * 1 6 ) ( % r d i ) , S T A T E 4
/* xor state[0] into state[4]: */
pxor S T A T E 0 , S T A T E 4
/* prepare length block: */
movq % r d x , M S G
movq % r c x , T 0
pslldq $ 8 , T 0
pxor T 0 , M S G
psllq $ 3 , M S G / * m u l t i p l y b y 8 ( t o g e t b i t c o u n t ) * /
/* update state: */
call _ _ m o r u s64 0 _ u p d a t e
call _ _ m o r u s64 0 _ u p d a t e
call _ _ m o r u s64 0 _ u p d a t e
call _ _ m o r u s64 0 _ u p d a t e
call _ _ m o r u s64 0 _ u p d a t e
call _ _ m o r u s64 0 _ u p d a t e
call _ _ m o r u s64 0 _ u p d a t e
call _ _ m o r u s64 0 _ u p d a t e
call _ _ m o r u s64 0 _ u p d a t e
call _ _ m o r u s64 0 _ u p d a t e
/* xor tag: */
movdqu ( % r s i ) , M S G
pxor S T A T E 0 , M S G
pshufd $ M A S K 3 , S T A T E 1 , T 0
pxor T 0 , M S G
movdqa S T A T E 2 , T 0
pand S T A T E 3 , T 0
pxor T 0 , M S G
movdqu M S G , ( % r s i )
FRAME_ E N D
ret
ENDPROC( c r y p t o _ m o r u s64 0 _ s s e 2 _ f i n a l )