2011-09-02 02:45:22 +04:00
/ *
* Blowfish C i p h e r A l g o r i t h m ( x86 _ 6 4 )
*
* Copyright ( C ) 2 0 1 1 J u s s i K i v i l i n n a < j u s s i . k i v i l i n n a @mbnet.fi>
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e a s p u b l i s h e d b y
* the F r e e S o f t w a r e F o u n d a t i o n ; either version 2 of the License, or
* ( at y o u r o p t i o n ) a n y l a t e r v e r s i o n .
*
* This p r o g r a m i s d i s t r i b u t e d i n t h e h o p e t h a t i t w i l l b e u s e f u l ,
* but W I T H O U T A N Y W A R R A N T Y ; without even the implied warranty of
* MERCHANTABILITY o r F I T N E S S F O R A P A R T I C U L A R P U R P O S E . S e e t h e
* GNU G e n e r a l P u b l i c L i c e n s e f o r m o r e d e t a i l s .
*
* You s h o u l d h a v e r e c e i v e d a c o p y o f t h e G N U G e n e r a l P u b l i c L i c e n s e
* along w i t h t h i s p r o g r a m ; if not, write to the Free Software
* Foundation, I n c . , 5 9 T e m p l e P l a c e , S u i t e 3 3 0 , B o s t o n , M A 0 2 1 1 1 - 1 3 0 7
* USA
*
* /
2013-01-19 15:39:00 +04:00
# include < l i n u x / l i n k a g e . h >
2011-09-02 02:45:22 +04:00
.file " blowfish- x86 _ 6 4 - a s m . S "
.text
/* structure of crypto context */
# define p 0
# define s0 ( ( 1 6 + 2 ) * 4 )
# define s1 ( ( 1 6 + 2 + ( 1 * 2 5 6 ) ) * 4 )
# define s2 ( ( 1 6 + 2 + ( 2 * 2 5 6 ) ) * 4 )
# define s3 ( ( 1 6 + 2 + ( 3 * 2 5 6 ) ) * 4 )
/* register macros */
2017-09-18 22:42:00 +03:00
# define C T X % r12
2011-09-02 02:45:22 +04:00
# define R I O % r s i
# define R X 0 % r a x
# define R X 1 % r b x
# define R X 2 % r c x
# define R X 3 % r d x
# define R X 0 d % e a x
# define R X 1 d % e b x
# define R X 2 d % e c x
# define R X 3 d % e d x
# define R X 0 b l % a l
# define R X 1 b l % b l
# define R X 2 b l % c l
# define R X 3 b l % d l
# define R X 0 b h % a h
# define R X 1 b h % b h
# define R X 2 b h % c h
# define R X 3 b h % d h
2017-09-18 22:42:00 +03:00
# define R T 0 % r d i
2011-09-02 02:45:22 +04:00
# define R T 1 % r s i
2011-09-23 20:50:55 +04:00
# define R T 2 % r8
# define R T 3 % r9
2011-09-02 02:45:22 +04:00
2017-09-18 22:42:00 +03:00
# define R T 0 d % e d i
2011-09-02 02:45:22 +04:00
# define R T 1 d % e s i
2011-09-23 20:50:55 +04:00
# define R T 2 d % r8 d
# define R T 3 d % r9 d
2011-09-02 02:45:22 +04:00
2011-09-23 20:50:55 +04:00
# define R K E Y % r10
2011-09-02 02:45:22 +04:00
/ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* 1 - way b l o w f i s h
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * /
2011-09-23 20:50:55 +04:00
# define F ( ) \
rorq $ 1 6 , R X 0 ; \
movzbl R X 0 b h , R T 0 d ; \
movzbl R X 0 b l , R T 1 d ; \
rolq $ 1 6 , R X 0 ; \
movl s0 ( C T X ,R T 0 ,4 ) , R T 0 d ; \
addl s1 ( C T X ,R T 1 ,4 ) , R T 0 d ; \
movzbl R X 0 b h , R T 1 d ; \
movzbl R X 0 b l , R T 2 d ; \
rolq $ 3 2 , R X 0 ; \
xorl s2 ( C T X ,R T 1 ,4 ) , R T 0 d ; \
addl s3 ( C T X ,R T 2 ,4 ) , R T 0 d ; \
xorq R T 0 , R X 0 ;
2011-09-02 02:45:22 +04:00
# define a d d _ r o u n d k e y _ e n c ( n ) \
xorq p + 4 * ( n ) ( C T X ) , R X 0 ;
# define r o u n d _ e n c ( n ) \
add_ r o u n d k e y _ e n c ( n ) ; \
\
2011-09-23 20:50:55 +04:00
F( ) ; \
F( ) ;
2011-09-02 02:45:22 +04:00
# define a d d _ r o u n d k e y _ d e c ( n ) \
movq p + 4 * ( n - 1 ) ( C T X ) , R T 0 ; \
rorq $ 3 2 , R T 0 ; \
xorq R T 0 , R X 0 ;
# define r o u n d _ d e c ( n ) \
add_ r o u n d k e y _ d e c ( n ) ; \
\
2011-09-23 20:50:55 +04:00
F( ) ; \
F( ) ; \
2011-09-02 02:45:22 +04:00
# define r e a d _ b l o c k ( ) \
movq ( R I O ) , R X 0 ; \
rorq $ 3 2 , R X 0 ; \
bswapq R X 0 ;
# define w r i t e _ b l o c k ( ) \
bswapq R X 0 ; \
movq R X 0 , ( R I O ) ;
# define x o r _ b l o c k ( ) \
bswapq R X 0 ; \
xorq R X 0 , ( R I O ) ;
2013-01-19 15:39:00 +04:00
ENTRY( _ _ b l o w f i s h _ e n c _ b l k )
2011-09-23 20:50:55 +04:00
/ * input :
2017-09-18 22:42:00 +03:00
* % rdi : ctx
2011-09-23 20:50:55 +04:00
* % rsi : dst
* % rdx : src
* % rcx : bool, i f t r u e : x o r o u t p u t
* /
2017-09-18 22:42:00 +03:00
movq % r12 , % r11 ;
2011-09-23 20:50:55 +04:00
2017-09-18 22:42:00 +03:00
movq % r d i , C T X ;
2011-09-23 20:50:55 +04:00
movq % r s i , % r10 ;
2011-09-02 02:45:22 +04:00
movq % r d x , R I O ;
read_ b l o c k ( ) ;
round_ e n c ( 0 ) ;
round_ e n c ( 2 ) ;
round_ e n c ( 4 ) ;
round_ e n c ( 6 ) ;
round_ e n c ( 8 ) ;
round_ e n c ( 1 0 ) ;
round_ e n c ( 1 2 ) ;
round_ e n c ( 1 4 ) ;
add_ r o u n d k e y _ e n c ( 1 6 ) ;
2017-09-18 22:42:00 +03:00
movq % r11 , % r12 ;
2011-09-02 02:45:22 +04:00
2011-09-23 20:50:55 +04:00
movq % r10 , R I O ;
test % c l , % c l ;
2013-01-19 15:39:00 +04:00
jnz . L _ _ e n c _ x o r ;
2011-09-02 02:45:22 +04:00
write_ b l o c k ( ) ;
ret;
2013-01-19 15:39:00 +04:00
.L__enc_xor :
2011-09-02 02:45:22 +04:00
xor_ b l o c k ( ) ;
2011-09-23 20:50:55 +04:00
ret;
2013-01-19 15:39:00 +04:00
ENDPROC( _ _ b l o w f i s h _ e n c _ b l k )
2011-09-02 02:45:22 +04:00
2013-01-19 15:39:00 +04:00
ENTRY( b l o w f i s h _ d e c _ b l k )
2011-09-23 20:50:55 +04:00
/ * input :
2017-09-18 22:42:00 +03:00
* % rdi : ctx
2011-09-23 20:50:55 +04:00
* % rsi : dst
* % rdx : src
* /
2017-09-18 22:42:00 +03:00
movq % r12 , % r11 ;
2011-09-23 20:50:55 +04:00
2017-09-18 22:42:00 +03:00
movq % r d i , C T X ;
2011-09-23 20:50:55 +04:00
movq % r s i , % r10 ;
2011-09-02 02:45:22 +04:00
movq % r d x , R I O ;
read_ b l o c k ( ) ;
round_ d e c ( 1 7 ) ;
round_ d e c ( 1 5 ) ;
round_ d e c ( 1 3 ) ;
round_ d e c ( 1 1 ) ;
round_ d e c ( 9 ) ;
round_ d e c ( 7 ) ;
round_ d e c ( 5 ) ;
round_ d e c ( 3 ) ;
add_ r o u n d k e y _ d e c ( 1 ) ;
2011-09-23 20:50:55 +04:00
movq % r10 , R I O ;
2011-09-02 02:45:22 +04:00
write_ b l o c k ( ) ;
2017-09-18 22:42:00 +03:00
movq % r11 , % r12 ;
2011-09-02 02:45:22 +04:00
ret;
2013-01-19 15:39:00 +04:00
ENDPROC( b l o w f i s h _ d e c _ b l k )
2011-09-02 02:45:22 +04:00
/ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
4 - way b l o w f i s h , f o u r b l o c k s p a r a l l e l
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * /
2011-09-23 20:50:55 +04:00
/ * F( ) f o r 4 - w a y . S l o w e r w h e n u s e d a l o n e / 1 - w a y , b u t f a s t e r w h e n u s e d
* parallel/ 4 - w a y ( t e s t e d o n A M D P h e n o m I I & I n t e l X e o n E 7 3 3 0 ) .
* /
# define F 4 ( x ) \
movzbl x ## b h , R T 1 d ; \
movzbl x ## b l , R T 3 d ; \
rorq $ 1 6 , x ; \
movzbl x ## b h , R T 0 d ; \
movzbl x ## b l , R T 2 d ; \
rorq $ 1 6 , x ; \
movl s0 ( C T X ,R T 0 ,4 ) , R T 0 d ; \
addl s1 ( C T X ,R T 2 ,4 ) , R T 0 d ; \
xorl s2 ( C T X ,R T 1 ,4 ) , R T 0 d ; \
addl s3 ( C T X ,R T 3 ,4 ) , R T 0 d ; \
xorq R T 0 , x ;
2011-09-02 02:45:22 +04:00
# define a d d _ p r e l o a d e d _ r o u n d k e y 4 ( ) \
xorq R K E Y , R X 0 ; \
xorq R K E Y , R X 1 ; \
xorq R K E Y , R X 2 ; \
xorq R K E Y , R X 3 ;
# define p r e l o a d _ r o u n d k e y _ e n c ( n ) \
movq p + 4 * ( n ) ( C T X ) , R K E Y ;
# define a d d _ r o u n d k e y _ e n c4 ( n ) \
add_ p r e l o a d e d _ r o u n d k e y 4 ( ) ; \
preload_ r o u n d k e y _ e n c ( n + 2 ) ;
# define r o u n d _ e n c4 ( n ) \
add_ r o u n d k e y _ e n c4 ( n ) ; \
\
2011-09-23 20:50:55 +04:00
F4 ( R X 0 ) ; \
F4 ( R X 1 ) ; \
F4 ( R X 2 ) ; \
F4 ( R X 3 ) ; \
2011-09-02 02:45:22 +04:00
\
2011-09-23 20:50:55 +04:00
F4 ( R X 0 ) ; \
F4 ( R X 1 ) ; \
F4 ( R X 2 ) ; \
F4 ( R X 3 ) ;
2011-09-02 02:45:22 +04:00
# define p r e l o a d _ r o u n d k e y _ d e c ( n ) \
movq p + 4 * ( ( n ) - 1 ) ( C T X ) , R K E Y ; \
rorq $ 3 2 , R K E Y ;
# define a d d _ r o u n d k e y _ d e c4 ( n ) \
add_ p r e l o a d e d _ r o u n d k e y 4 ( ) ; \
preload_ r o u n d k e y _ d e c ( n - 2 ) ;
# define r o u n d _ d e c4 ( n ) \
add_ r o u n d k e y _ d e c4 ( n ) ; \
\
2011-09-23 20:50:55 +04:00
F4 ( R X 0 ) ; \
F4 ( R X 1 ) ; \
F4 ( R X 2 ) ; \
F4 ( R X 3 ) ; \
2011-09-02 02:45:22 +04:00
\
2011-09-23 20:50:55 +04:00
F4 ( R X 0 ) ; \
F4 ( R X 1 ) ; \
F4 ( R X 2 ) ; \
F4 ( R X 3 ) ;
2011-09-02 02:45:22 +04:00
# define r e a d _ b l o c k 4 ( ) \
movq ( R I O ) , R X 0 ; \
rorq $ 3 2 , R X 0 ; \
bswapq R X 0 ; \
\
movq 8 ( R I O ) , R X 1 ; \
rorq $ 3 2 , R X 1 ; \
bswapq R X 1 ; \
\
movq 1 6 ( R I O ) , R X 2 ; \
rorq $ 3 2 , R X 2 ; \
bswapq R X 2 ; \
\
movq 2 4 ( R I O ) , R X 3 ; \
rorq $ 3 2 , R X 3 ; \
bswapq R X 3 ;
# define w r i t e _ b l o c k 4 ( ) \
bswapq R X 0 ; \
movq R X 0 , ( R I O ) ; \
\
bswapq R X 1 ; \
movq R X 1 , 8 ( R I O ) ; \
\
bswapq R X 2 ; \
movq R X 2 , 1 6 ( R I O ) ; \
\
bswapq R X 3 ; \
movq R X 3 , 2 4 ( R I O ) ;
# define x o r _ b l o c k 4 ( ) \
bswapq R X 0 ; \
xorq R X 0 , ( R I O ) ; \
\
bswapq R X 1 ; \
xorq R X 1 , 8 ( R I O ) ; \
\
bswapq R X 2 ; \
xorq R X 2 , 1 6 ( R I O ) ; \
\
bswapq R X 3 ; \
xorq R X 3 , 2 4 ( R I O ) ;
2013-01-19 15:39:00 +04:00
ENTRY( _ _ b l o w f i s h _ e n c _ b l k _ 4 w a y )
2011-09-23 20:50:55 +04:00
/ * input :
2017-09-18 22:42:00 +03:00
* % rdi : ctx
2011-09-23 20:50:55 +04:00
* % rsi : dst
* % rdx : src
* % rcx : bool, i f t r u e : x o r o u t p u t
* /
2017-09-18 22:42:00 +03:00
pushq % r12 ;
2011-09-02 02:45:22 +04:00
pushq % r b x ;
2011-09-23 20:50:55 +04:00
pushq % r c x ;
2017-09-18 22:42:00 +03:00
movq % r d i , C T X
2011-09-23 20:50:55 +04:00
movq % r s i , % r11 ;
2011-09-02 02:45:22 +04:00
movq % r d x , R I O ;
2017-09-18 22:42:00 +03:00
preload_ r o u n d k e y _ e n c ( 0 ) ;
2011-09-02 02:45:22 +04:00
read_ b l o c k 4 ( ) ;
round_ e n c4 ( 0 ) ;
round_ e n c4 ( 2 ) ;
round_ e n c4 ( 4 ) ;
round_ e n c4 ( 6 ) ;
round_ e n c4 ( 8 ) ;
round_ e n c4 ( 1 0 ) ;
round_ e n c4 ( 1 2 ) ;
round_ e n c4 ( 1 4 ) ;
add_ p r e l o a d e d _ r o u n d k e y 4 ( ) ;
2017-09-18 22:42:00 +03:00
popq % r12 ;
2011-09-23 20:50:55 +04:00
movq % r11 , R I O ;
2011-09-02 02:45:22 +04:00
2017-09-18 22:42:00 +03:00
test % r12 b , % r12 b ;
2013-01-19 15:39:00 +04:00
jnz . L _ _ e n c _ x o r4 ;
2011-09-02 02:45:22 +04:00
write_ b l o c k 4 ( ) ;
popq % r b x ;
2017-09-18 22:42:00 +03:00
popq % r12 ;
2011-09-02 02:45:22 +04:00
ret;
2013-01-19 15:39:00 +04:00
.L__enc_xor4 :
2011-09-02 02:45:22 +04:00
xor_ b l o c k 4 ( ) ;
2011-09-23 20:50:55 +04:00
popq % r b x ;
2017-09-18 22:42:00 +03:00
popq % r12 ;
2011-09-23 20:50:55 +04:00
ret;
2013-01-19 15:39:00 +04:00
ENDPROC( _ _ b l o w f i s h _ e n c _ b l k _ 4 w a y )
2011-09-02 02:45:22 +04:00
2013-01-19 15:39:00 +04:00
ENTRY( b l o w f i s h _ d e c _ b l k _ 4 w a y )
2011-09-23 20:50:55 +04:00
/ * input :
2017-09-18 22:42:00 +03:00
* % rdi : ctx
2011-09-23 20:50:55 +04:00
* % rsi : dst
* % rdx : src
* /
2017-09-18 22:42:00 +03:00
pushq % r12 ;
2011-09-02 02:45:22 +04:00
pushq % r b x ;
2017-09-18 22:42:00 +03:00
movq % r d i , C T X ;
movq % r s i , % r11
2011-09-02 02:45:22 +04:00
movq % r d x , R I O ;
2017-09-18 22:42:00 +03:00
preload_ r o u n d k e y _ d e c ( 1 7 ) ;
2011-09-02 02:45:22 +04:00
read_ b l o c k 4 ( ) ;
round_ d e c4 ( 1 7 ) ;
round_ d e c4 ( 1 5 ) ;
round_ d e c4 ( 1 3 ) ;
round_ d e c4 ( 1 1 ) ;
round_ d e c4 ( 9 ) ;
round_ d e c4 ( 7 ) ;
round_ d e c4 ( 5 ) ;
round_ d e c4 ( 3 ) ;
add_ p r e l o a d e d _ r o u n d k e y 4 ( ) ;
2011-09-23 20:50:55 +04:00
movq % r11 , R I O ;
2011-09-02 02:45:22 +04:00
write_ b l o c k 4 ( ) ;
popq % r b x ;
2017-09-18 22:42:00 +03:00
popq % r12 ;
2011-09-02 02:45:22 +04:00
ret;
2013-01-19 15:39:00 +04:00
ENDPROC( b l o w f i s h _ d e c _ b l k _ 4 w a y )