2012-07-11 21:38:57 +04:00
/ *
* Cast6 C i p h e r 8 - w a y p a r a l l e l a l g o r i t h m ( A V X / x86 _ 6 4 )
*
* Copyright ( C ) 2 0 1 2 J o h a n n e s G o e t z f r i e d
* < Johannes. G o e t z f r i e d @informatik.stud.uni-erlangen.de>
*
2013-04-08 22:51:05 +04:00
* Copyright © 2 0 1 2 - 2 0 1 3 J u s s i K i v i l i n n a < j u s s i . k i v i l i n n a @iki.fi>
2012-08-28 15:24:54 +04:00
*
2012-07-11 21:38:57 +04:00
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e a s p u b l i s h e d b y
* the F r e e S o f t w a r e F o u n d a t i o n ; either version 2 of the License, or
* ( at y o u r o p t i o n ) a n y l a t e r v e r s i o n .
*
* This p r o g r a m i s d i s t r i b u t e d i n t h e h o p e t h a t i t w i l l b e u s e f u l ,
* but W I T H O U T A N Y W A R R A N T Y ; without even the implied warranty of
* MERCHANTABILITY o r F I T N E S S F O R A P A R T I C U L A R P U R P O S E . S e e t h e
* GNU G e n e r a l P u b l i c L i c e n s e f o r m o r e d e t a i l s .
*
* You s h o u l d h a v e r e c e i v e d a c o p y o f t h e G N U G e n e r a l P u b l i c L i c e n s e
* along w i t h t h i s p r o g r a m ; if not, write to the Free Software
* Foundation, I n c . , 5 9 T e m p l e P l a c e , S u i t e 3 3 0 , B o s t o n , M A 0 2 1 1 1 - 1 3 0 7
* USA
*
* /
2013-01-19 15:39:16 +04:00
# include < l i n u x / l i n k a g e . h >
2012-10-20 16:06:41 +04:00
# include " g l u e _ h e l p e r - a s m - a v x . S "
2012-07-11 21:38:57 +04:00
.file " cast6 - a v x - x86 _ 6 4 - a s m _ 6 4 . S "
2012-11-13 13:43:14 +04:00
.extern cast_s1
.extern cast_s2
.extern cast_s3
.extern cast_s4
2012-07-11 21:38:57 +04:00
/* structure of crypto context */
# define k m 0
# define k r ( 1 2 * 4 * 4 )
/* s-boxes */
2012-11-13 13:43:14 +04:00
# define s1 c a s t _ s1
# define s2 c a s t _ s2
# define s3 c a s t _ s3
# define s4 c a s t _ s4
2012-07-11 21:38:57 +04:00
/ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8 - way A V X c a s t 6
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * /
# define C T X % r d i
# define R A 1 % x m m 0
# define R B 1 % x m m 1
# define R C 1 % x m m 2
# define R D 1 % x m m 3
# define R A 2 % x m m 4
# define R B 2 % x m m 5
# define R C 2 % x m m 6
# define R D 2 % x m m 7
2012-08-28 15:24:54 +04:00
# define R X % x m m 8
2012-07-11 21:38:57 +04:00
# define R K M % x m m 9
2012-08-28 15:24:54 +04:00
# define R K R % x m m 1 0
# define R K R F % x m m 1 1
# define R K R R % x m m 1 2
# define R 3 2 % x m m 1 3
# define R 1 S T % x m m 1 4
2012-07-11 21:38:57 +04:00
2012-08-28 15:24:54 +04:00
# define R T M P % x m m 1 5
2012-07-11 21:38:57 +04:00
2012-08-28 15:24:54 +04:00
# define R I D 1 % r b p
# define R I D 1 d % e b p
# define R I D 2 % r s i
# define R I D 2 d % e s i
2012-07-11 21:38:57 +04:00
# define R G I 1 % r d x
# define R G I 1 b l % d l
# define R G I 1 b h % d h
# define R G I 2 % r c x
# define R G I 2 b l % c l
# define R G I 2 b h % c h
2012-08-28 15:24:54 +04:00
# define R G I 3 % r a x
# define R G I 3 b l % a l
# define R G I 3 b h % a h
# define R G I 4 % r b x
# define R G I 4 b l % b l
# define R G I 4 b h % b h
2012-07-11 21:38:57 +04:00
# define R F S 1 % r8
# define R F S 1 d % r8 d
# define R F S 2 % r9
# define R F S 2 d % r9 d
# define R F S 3 % r10
# define R F S 3 d % r10 d
2012-08-28 15:24:54 +04:00
# define l o o k u p _ 3 2 b i t ( s r c , d s t , o p1 , o p2 , o p3 , i n t e r l e a v e _ o p , i l _ r e g ) \
movzbl s r c ## b h , R I D 1 d ; \
movzbl s r c ## b l , R I D 2 d ; \
shrq $ 1 6 , s r c ; \
2012-07-11 21:38:57 +04:00
movl s1 ( , R I D 1 , 4 ) , d s t ## d ; \
op1 s2 ( , R I D 2 , 4 ) , d s t ## d ; \
2012-08-28 15:24:54 +04:00
movzbl s r c ## b h , R I D 1 d ; \
movzbl s r c ## b l , R I D 2 d ; \
interleave_ o p ( i l _ r e g ) ; \
2012-07-11 21:38:57 +04:00
op2 s3 ( , R I D 1 , 4 ) , d s t ## d ; \
op3 s4 ( , R I D 2 , 4 ) , d s t ## d ;
2012-08-28 15:24:54 +04:00
# define d u m m y ( d ) / * d o n o t h i n g * /
# define s h r _ n e x t ( r e g ) \
shrq $ 1 6 , r e g ;
# define F _ h e a d ( a , x , g i 1 , g i 2 , o p0 ) \
2012-07-11 21:38:57 +04:00
op0 a , R K M , x ; \
2012-08-28 15:24:54 +04:00
vpslld R K R F , x , R T M P ; \
vpsrld R K R R , x , x ; \
2012-07-11 21:38:57 +04:00
vpor R T M P , x , x ; \
\
2012-08-28 15:24:54 +04:00
vmovq x , g i 1 ; \
vpextrq $ 1 , x , g i 2 ;
# define F _ t a i l ( a , x , g i 1 , g i 2 , o p1 , o p2 , o p3 ) \
lookup_ 3 2 b i t ( ## g i 1 , R F S 1 , o p1 , o p2 , o p3 , s h r _ n e x t , ## g i 1 ) ; \
lookup_ 3 2 b i t ( ## g i 2 , R F S 3 , o p1 , o p2 , o p3 , s h r _ n e x t , ## g i 2 ) ; \
2012-07-11 21:38:57 +04:00
\
2012-08-28 15:24:54 +04:00
lookup_ 3 2 b i t ( ## g i 1 , R F S 2 , o p1 , o p2 , o p3 , d u m m y , n o n e ) ; \
shlq $ 3 2 , R F S 2 ; \
orq R F S 1 , R F S 2 ; \
lookup_ 3 2 b i t ( ## g i 2 , R F S 1 , o p1 , o p2 , o p3 , d u m m y , n o n e ) ; \
shlq $ 3 2 , R F S 1 ; \
orq R F S 1 , R F S 3 ; \
2012-07-11 21:38:57 +04:00
\
2012-08-28 15:24:54 +04:00
vmovq R F S 2 , x ; \
2012-07-11 21:38:57 +04:00
vpinsrq $ 1 , R F S 3 , x , x ;
2012-08-28 15:24:54 +04:00
# define F _ 2 ( a1 , b1 , a2 , b2 , o p0 , o p1 , o p2 , o p3 ) \
F_ h e a d ( b1 , R X , R G I 1 , R G I 2 , o p0 ) ; \
F_ h e a d ( b2 , R X , R G I 3 , R G I 4 , o p0 ) ; \
\
F_ t a i l ( b1 , R X , R G I 1 , R G I 2 , o p1 , o p2 , o p3 ) ; \
F_ t a i l ( b2 , R T M P , R G I 3 , R G I 4 , o p1 , o p2 , o p3 ) ; \
\
vpxor a1 , R X , a1 ; \
vpxor a2 , R T M P , a2 ;
# define F 1 _ 2 ( a1 , b1 , a2 , b2 ) \
F_ 2 ( a1 , b1 , a2 , b2 , v p a d d d , x o r l , s u b l , a d d l )
# define F 2 _ 2 ( a1 , b1 , a2 , b2 ) \
F_ 2 ( a1 , b1 , a2 , b2 , v p x o r , s u b l , a d d l , x o r l )
# define F 3 _ 2 ( a1 , b1 , a2 , b2 ) \
F_ 2 ( a1 , b1 , a2 , b2 , v p s u b d , a d d l , x o r l , s u b l )
2012-07-11 21:38:57 +04:00
2012-08-28 15:24:54 +04:00
# define q o p ( i n , o u t , f ) \
F ## f # # _ 2 ( o u t ## 1 , i n ## 1 , o u t ## 2 , i n ## 2 ) ;
# define g e t _ r o u n d _ k e y s ( n n ) \
vbroadcastss ( k m + ( 4 * ( n n ) ) ) ( C T X ) , R K M ; \
vpand R 1 S T , R K R , R K R F ; \
vpsubq R K R F , R 3 2 , R K R R ; \
vpsrldq $ 1 , R K R , R K R ;
2012-07-11 21:38:57 +04:00
# define Q ( n ) \
2012-08-28 15:24:54 +04:00
get_ r o u n d _ k e y s ( 4 * n + 0 ) ; \
qop( R D , R C , 1 ) ; \
2012-07-11 21:38:57 +04:00
\
2012-08-28 15:24:54 +04:00
get_ r o u n d _ k e y s ( 4 * n + 1 ) ; \
qop( R C , R B , 2 ) ; \
2012-07-11 21:38:57 +04:00
\
2012-08-28 15:24:54 +04:00
get_ r o u n d _ k e y s ( 4 * n + 2 ) ; \
qop( R B , R A , 3 ) ; \
2012-07-11 21:38:57 +04:00
\
2012-08-28 15:24:54 +04:00
get_ r o u n d _ k e y s ( 4 * n + 3 ) ; \
qop( R A , R D , 1 ) ;
2012-07-11 21:38:57 +04:00
# define Q B A R ( n ) \
2012-08-28 15:24:54 +04:00
get_ r o u n d _ k e y s ( 4 * n + 3 ) ; \
qop( R A , R D , 1 ) ; \
2012-07-11 21:38:57 +04:00
\
2012-08-28 15:24:54 +04:00
get_ r o u n d _ k e y s ( 4 * n + 2 ) ; \
qop( R B , R A , 3 ) ; \
2012-07-11 21:38:57 +04:00
\
2012-08-28 15:24:54 +04:00
get_ r o u n d _ k e y s ( 4 * n + 1 ) ; \
qop( R C , R B , 2 ) ; \
2012-07-11 21:38:57 +04:00
\
2012-08-28 15:24:54 +04:00
get_ r o u n d _ k e y s ( 4 * n + 0 ) ; \
qop( R D , R C , 1 ) ;
# define s h u f f l e ( m a s k ) \
vpshufb m a s k , R K R , R K R ;
2012-07-11 21:38:57 +04:00
2012-08-28 15:24:54 +04:00
# define p r e l o a d _ r k r ( n , d o _ m a s k , m a s k ) \
vbroadcastss . L 1 6 _ m a s k , R K R ; \
/* add 16-bit rotation to key rotations (mod 32) */ \
vpxor ( k r + n * 1 6 ) ( C T X ) , R K R , R K R ; \
do_ m a s k ( m a s k ) ;
2012-07-11 21:38:57 +04:00
# define t r a n s p o s e _ 4 x4 ( x0 , x1 , x2 , x3 , t 0 , t 1 , t 2 ) \
vpunpckldq x1 , x0 , t 0 ; \
vpunpckhdq x1 , x0 , t 2 ; \
vpunpckldq x3 , x2 , t 1 ; \
vpunpckhdq x3 , x2 , x3 ; \
\
vpunpcklqdq t 1 , t 0 , x0 ; \
vpunpckhqdq t 1 , t 0 , x1 ; \
vpunpcklqdq x3 , t 2 , x2 ; \
vpunpckhqdq x3 , t 2 , x3 ;
2012-10-20 16:06:41 +04:00
# define i n p a c k _ b l o c k s ( x0 , x1 , x2 , x3 , t 0 , t 1 , t 2 , r m a s k ) \
2012-08-28 15:24:54 +04:00
vpshufb r m a s k , x0 , x0 ; \
vpshufb r m a s k , x1 , x1 ; \
vpshufb r m a s k , x2 , x2 ; \
vpshufb r m a s k , x3 , x3 ; \
2012-07-11 21:38:57 +04:00
\
transpose_ 4 x4 ( x0 , x1 , x2 , x3 , t 0 , t 1 , t 2 )
2012-10-20 16:06:41 +04:00
# define o u t u n p a c k _ b l o c k s ( x0 , x1 , x2 , x3 , t 0 , t 1 , t 2 , r m a s k ) \
2012-07-11 21:38:57 +04:00
transpose_ 4 x4 ( x0 , x1 , x2 , x3 , t 0 , t 1 , t 2 ) \
\
2012-08-28 15:24:54 +04:00
vpshufb r m a s k , x0 , x0 ; \
vpshufb r m a s k , x1 , x1 ; \
vpshufb r m a s k , x2 , x2 ; \
2012-10-20 16:06:41 +04:00
vpshufb r m a s k , x3 , x3 ;
2012-07-11 21:38:57 +04:00
2012-08-28 15:24:54 +04:00
.data
2012-07-11 21:38:57 +04:00
.align 16
2013-04-08 22:51:05 +04:00
.Lxts_gf128mul_and_shl1_mask :
.byte 0 x8 7 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0
2012-07-11 21:38:57 +04:00
.Lbswap_mask :
.byte 3 , 2 , 1 , 0 , 7 , 6 , 5 , 4 , 1 1 , 1 0 , 9 , 8 , 1 5 , 1 4 , 1 3 , 1 2
2012-10-20 16:06:41 +04:00
.Lbswap128_mask :
.byte 1 5 , 1 4 , 1 3 , 1 2 , 1 1 , 1 0 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0
2012-08-28 15:24:54 +04:00
.Lrkr_enc_Q_Q_QBAR_QBAR :
.byte 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 1 1 , 1 0 , 9 , 8 , 1 5 , 1 4 , 1 3 , 1 2
.Lrkr_enc_QBAR_QBAR_QBAR_QBAR :
.byte 3 , 2 , 1 , 0 , 7 , 6 , 5 , 4 , 1 1 , 1 0 , 9 , 8 , 1 5 , 1 4 , 1 3 , 1 2
.Lrkr_dec_Q_Q_Q_Q :
.byte 1 2 , 1 3 , 1 4 , 1 5 , 8 , 9 , 1 0 , 1 1 , 4 , 5 , 6 , 7 , 0 , 1 , 2 , 3
.Lrkr_dec_Q_Q_QBAR_QBAR :
.byte 1 2 , 1 3 , 1 4 , 1 5 , 8 , 9 , 1 0 , 1 1 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0
.Lrkr_dec_QBAR_QBAR_QBAR_QBAR :
.byte 1 5 , 1 4 , 1 3 , 1 2 , 1 1 , 1 0 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0
.L16_mask :
.byte 1 6 , 1 6 , 1 6 , 1 6
2012-07-11 21:38:57 +04:00
.L32_mask :
2012-08-28 15:24:54 +04:00
.byte 3 2 , 0 , 0 , 0
.Lfirst_mask :
.byte 0 x1 f , 0 , 0 , 0
.text
2012-07-11 21:38:57 +04:00
2012-10-20 16:06:41 +04:00
.align 8
__cast6_enc_blk8 :
2012-07-11 21:38:57 +04:00
/ * input :
* % rdi : ctx, C T X
2012-10-20 16:06:41 +04:00
* RA1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 : b l o c k s
* output :
* RA1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 : e n c r y p t e d b l o c k s
2012-07-11 21:38:57 +04:00
* /
2012-08-28 15:24:54 +04:00
pushq % r b p ;
2012-07-11 21:38:57 +04:00
pushq % r b x ;
2012-08-28 15:24:54 +04:00
vmovdqa . L b s w a p _ m a s k , R K M ;
vmovd . L f i r s t _ m a s k , R 1 S T ;
vmovd . L 3 2 _ m a s k , R 3 2 ;
2012-07-11 21:38:57 +04:00
2012-10-20 16:06:41 +04:00
inpack_ b l o c k s ( R A 1 , R B 1 , R C 1 , R D 1 , R T M P , R X , R K R F , R K M ) ;
inpack_ b l o c k s ( R A 2 , R B 2 , R C 2 , R D 2 , R T M P , R X , R K R F , R K M ) ;
2012-07-11 21:38:57 +04:00
2012-08-28 15:24:54 +04:00
preload_ r k r ( 0 , d u m m y , n o n e ) ;
2012-07-11 21:38:57 +04:00
Q( 0 ) ;
Q( 1 ) ;
Q( 2 ) ;
Q( 3 ) ;
2012-08-28 15:24:54 +04:00
preload_ r k r ( 1 , s h u f f l e , . L r k r _ e n c _ Q _ Q _ Q B A R _ Q B A R ) ;
2012-07-11 21:38:57 +04:00
Q( 4 ) ;
Q( 5 ) ;
QBAR( 6 ) ;
QBAR( 7 ) ;
2012-08-28 15:24:54 +04:00
preload_ r k r ( 2 , s h u f f l e , . L r k r _ e n c _ Q B A R _ Q B A R _ Q B A R _ Q B A R ) ;
2012-07-11 21:38:57 +04:00
QBAR( 8 ) ;
QBAR( 9 ) ;
QBAR( 1 0 ) ;
QBAR( 1 1 ) ;
popq % r b x ;
2012-08-28 15:24:54 +04:00
popq % r b p ;
2012-07-11 21:38:57 +04:00
2012-08-28 15:24:54 +04:00
vmovdqa . L b s w a p _ m a s k , R K M ;
2012-07-11 21:38:57 +04:00
2012-10-20 16:06:41 +04:00
outunpack_ b l o c k s ( R A 1 , R B 1 , R C 1 , R D 1 , R T M P , R X , R K R F , R K M ) ;
outunpack_ b l o c k s ( R A 2 , R B 2 , R C 2 , R D 2 , R T M P , R X , R K R F , R K M ) ;
2012-07-11 21:38:57 +04:00
ret;
2013-01-19 15:39:16 +04:00
ENDPROC( _ _ c a s t 6 _ e n c _ b l k 8 )
2012-07-11 21:38:57 +04:00
2012-10-20 16:06:41 +04:00
.align 8
__cast6_dec_blk8 :
2012-07-11 21:38:57 +04:00
/ * input :
* % rdi : ctx, C T X
2012-10-20 16:06:41 +04:00
* RA1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 : e n c r y p t e d b l o c k s
* output :
* RA1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 : d e c r y p t e d b l o c k s
2012-07-11 21:38:57 +04:00
* /
2012-08-28 15:24:54 +04:00
pushq % r b p ;
2012-07-11 21:38:57 +04:00
pushq % r b x ;
2012-08-28 15:24:54 +04:00
vmovdqa . L b s w a p _ m a s k , R K M ;
vmovd . L f i r s t _ m a s k , R 1 S T ;
vmovd . L 3 2 _ m a s k , R 3 2 ;
2012-07-11 21:38:57 +04:00
2012-10-20 16:06:41 +04:00
inpack_ b l o c k s ( R A 1 , R B 1 , R C 1 , R D 1 , R T M P , R X , R K R F , R K M ) ;
inpack_ b l o c k s ( R A 2 , R B 2 , R C 2 , R D 2 , R T M P , R X , R K R F , R K M ) ;
2012-07-11 21:38:57 +04:00
2012-08-28 15:24:54 +04:00
preload_ r k r ( 2 , s h u f f l e , . L r k r _ d e c _ Q _ Q _ Q _ Q ) ;
2012-07-11 21:38:57 +04:00
Q( 1 1 ) ;
Q( 1 0 ) ;
Q( 9 ) ;
Q( 8 ) ;
2012-08-28 15:24:54 +04:00
preload_ r k r ( 1 , s h u f f l e , . L r k r _ d e c _ Q _ Q _ Q B A R _ Q B A R ) ;
2012-07-11 21:38:57 +04:00
Q( 7 ) ;
Q( 6 ) ;
QBAR( 5 ) ;
QBAR( 4 ) ;
2012-08-28 15:24:54 +04:00
preload_ r k r ( 0 , s h u f f l e , . L r k r _ d e c _ Q B A R _ Q B A R _ Q B A R _ Q B A R ) ;
2012-07-11 21:38:57 +04:00
QBAR( 3 ) ;
QBAR( 2 ) ;
QBAR( 1 ) ;
QBAR( 0 ) ;
popq % r b x ;
2012-08-28 15:24:54 +04:00
popq % r b p ;
2012-07-11 21:38:57 +04:00
2012-08-28 15:24:54 +04:00
vmovdqa . L b s w a p _ m a s k , R K M ;
2012-10-20 16:06:41 +04:00
outunpack_ b l o c k s ( R A 1 , R B 1 , R C 1 , R D 1 , R T M P , R X , R K R F , R K M ) ;
outunpack_ b l o c k s ( R A 2 , R B 2 , R C 2 , R D 2 , R T M P , R X , R K R F , R K M ) ;
ret;
2013-01-19 15:39:16 +04:00
ENDPROC( _ _ c a s t 6 _ d e c _ b l k 8 )
2012-10-20 16:06:41 +04:00
2013-01-19 15:39:16 +04:00
ENTRY( c a s t 6 _ e c b _ e n c _ 8 w a y )
2012-10-20 16:06:41 +04:00
/ * input :
* % rdi : ctx, C T X
* % rsi : dst
* % rdx : src
* /
movq % r s i , % r11 ;
load_ 8 w a y ( % r d x , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ) ;
call _ _ c a s t 6 _ e n c _ b l k 8 ;
store_ 8 w a y ( % r11 , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ) ;
ret;
2013-01-19 15:39:16 +04:00
ENDPROC( c a s t 6 _ e c b _ e n c _ 8 w a y )
2012-10-20 16:06:41 +04:00
2013-01-19 15:39:16 +04:00
ENTRY( c a s t 6 _ e c b _ d e c _ 8 w a y )
2012-10-20 16:06:41 +04:00
/ * input :
* % rdi : ctx, C T X
* % rsi : dst
* % rdx : src
* /
movq % r s i , % r11 ;
load_ 8 w a y ( % r d x , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ) ;
call _ _ c a s t 6 _ d e c _ b l k 8 ;
store_ 8 w a y ( % r11 , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ) ;
ret;
2013-01-19 15:39:16 +04:00
ENDPROC( c a s t 6 _ e c b _ d e c _ 8 w a y )
2012-10-20 16:06:41 +04:00
2013-01-19 15:39:16 +04:00
ENTRY( c a s t 6 _ c b c _ d e c _ 8 w a y )
2012-10-20 16:06:41 +04:00
/ * input :
* % rdi : ctx, C T X
* % rsi : dst
* % rdx : src
* /
pushq % r12 ;
movq % r s i , % r11 ;
movq % r d x , % r12 ;
load_ 8 w a y ( % r d x , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ) ;
call _ _ c a s t 6 _ d e c _ b l k 8 ;
store_ c b c _ 8 w a y ( % r12 , % r11 , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ) ;
popq % r12 ;
ret;
2013-01-19 15:39:16 +04:00
ENDPROC( c a s t 6 _ c b c _ d e c _ 8 w a y )
2012-10-20 16:06:41 +04:00
2013-01-19 15:39:16 +04:00
ENTRY( c a s t 6 _ c t r _ 8 w a y )
2012-10-20 16:06:41 +04:00
/ * input :
* % rdi : ctx, C T X
* % rsi : dst
* % rdx : src
* % rcx : iv ( l i t t l e e n d i a n , 1 2 8 b i t )
* /
pushq % r12 ;
movq % r s i , % r11 ;
movq % r d x , % r12 ;
load_ c t r _ 8 w a y ( % r c x , . L b s w a p12 8 _ m a s k , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 ,
RD2 , R X , R K R , R K M ) ;
call _ _ c a s t 6 _ e n c _ b l k 8 ;
store_ c t r _ 8 w a y ( % r12 , % r11 , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ) ;
popq % r12 ;
2012-07-11 21:38:57 +04:00
ret;
2013-01-19 15:39:16 +04:00
ENDPROC( c a s t 6 _ c t r _ 8 w a y )
2013-04-08 22:51:05 +04:00
ENTRY( c a s t 6 _ x t s _ e n c _ 8 w a y )
/ * input :
* % rdi : ctx, C T X
* % rsi : dst
* % rdx : src
* % rcx : iv ( t ⊕ α ⁿ ∈ G F ( 2 ¹ ² ⁸ ) )
* /
movq % r s i , % r11 ;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_ x t s _ 8 w a y ( % r c x , % r d x , % r s i , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ,
RX, R K R , R K M , . L x t s _ g f12 8 m u l _ a n d _ s h l 1 _ m a s k ) ;
call _ _ c a s t 6 _ e n c _ b l k 8 ;
/* dst <= regs xor IVs(in dst) */
store_ x t s _ 8 w a y ( % r11 , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ) ;
ret;
ENDPROC( c a s t 6 _ x t s _ e n c _ 8 w a y )
ENTRY( c a s t 6 _ x t s _ d e c _ 8 w a y )
/ * input :
* % rdi : ctx, C T X
* % rsi : dst
* % rdx : src
* % rcx : iv ( t ⊕ α ⁿ ∈ G F ( 2 ¹ ² ⁸ ) )
* /
movq % r s i , % r11 ;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
load_ x t s _ 8 w a y ( % r c x , % r d x , % r s i , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ,
RX, R K R , R K M , . L x t s _ g f12 8 m u l _ a n d _ s h l 1 _ m a s k ) ;
call _ _ c a s t 6 _ d e c _ b l k 8 ;
/* dst <= regs xor IVs(in dst) */
store_ x t s _ 8 w a y ( % r11 , R A 1 , R B 1 , R C 1 , R D 1 , R A 2 , R B 2 , R C 2 , R D 2 ) ;
ret;
ENDPROC( c a s t 6 _ x t s _ d e c _ 8 w a y )