2014-03-21 02:14:00 +04:00
/ *
* Implement f a s t S H A - 1 w i t h A V X 2 i n s t r u c t i o n s . ( x86 _ 6 4 )
*
* This f i l e i s p r o v i d e d u n d e r a d u a l B S D / G P L v2 l i c e n s e . W h e n u s i n g o r
* redistributing t h i s f i l e , y o u m a y d o s o u n d e r e i t h e r l i c e n s e .
*
* GPL L I C E N S E S U M M A R Y
*
* Copyright( c ) 2 0 1 4 I n t e l C o r p o r a t i o n .
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f v e r s i o n 2 o f t h e G N U G e n e r a l P u b l i c L i c e n s e a s
* published b y t h e F r e e S o f t w a r e F o u n d a t i o n .
*
* This p r o g r a m i s d i s t r i b u t e d i n t h e h o p e t h a t i t w i l l b e u s e f u l , b u t
* WITHOUT A N Y W A R R A N T Y ; without even the implied warranty of
* MERCHANTABILITY o r F I T N E S S F O R A P A R T I C U L A R P U R P O S E . S e e t h e G N U
* General P u b l i c L i c e n s e f o r m o r e d e t a i l s .
*
* Contact I n f o r m a t i o n :
* Ilya A l b r e k h t < i l y a . a l b r e k h t @intel.com>
* Maxim L o c k t y u k h i n < m a x i m . l o c k t y u k h i n @intel.com>
* Ronen Z o h a r < r o n e n . z o h a r @intel.com>
* Chandramouli N a r a y a n a n < m o u l i @linux.intel.com>
*
* BSD L I C E N S E
*
* Copyright( c ) 2 0 1 4 I n t e l C o r p o r a t i o n .
*
* Redistribution a n d u s e i n s o u r c e a n d b i n a r y f o r m s , w i t h o r w i t h o u t
* modification, a r e p e r m i t t e d p r o v i d e d t h a t t h e f o l l o w i n g c o n d i t i o n s
* are m e t :
*
* Redistributions o f s o u r c e c o d e m u s t r e t a i n t h e a b o v e c o p y r i g h t
* notice, t h i s l i s t o f c o n d i t i o n s a n d t h e f o l l o w i n g d i s c l a i m e r .
* Redistributions i n b i n a r y f o r m m u s t r e p r o d u c e t h e a b o v e c o p y r i g h t
* notice, t h i s l i s t o f c o n d i t i o n s a n d t h e f o l l o w i n g d i s c l a i m e r i n
* the d o c u m e n t a t i o n a n d / o r o t h e r m a t e r i a l s p r o v i d e d w i t h t h e
* distribution.
* Neither t h e n a m e o f I n t e l C o r p o r a t i o n n o r t h e n a m e s o f i t s
* contributors m a y b e u s e d t o e n d o r s e o r p r o m o t e p r o d u c t s d e r i v e d
* from t h i s s o f t w a r e w i t h o u t s p e c i f i c p r i o r w r i t t e n p e r m i s s i o n .
*
* THIS S O F T W A R E I S P R O V I D E D B Y T H E C O P Y R I G H T H O L D E R S A N D C O N T R I B U T O R S
* " AS I S " A N D A N Y E X P R E S S O R I M P L I E D W A R R A N T I E S , I N C L U D I N G , B U T N O T
* LIMITED T O , T H E I M P L I E D W A R R A N T I E S O F M E R C H A N T A B I L I T Y A N D F I T N E S S F O R
* A P A R T I C U L A R P U R P O S E A R E D I S C L A I M E D . I N N O E V E N T S H A L L T H E C O P Y R I G H T
* OWNER O R C O N T R I B U T O R S B E L I A B L E F O R A N Y D I R E C T , I N D I R E C T , I N C I D E N T A L ,
* SPECIAL, E X E M P L A R Y , O R C O N S E Q U E N T I A L D A M A G E S ( I N C L U D I N G , B U T N O T
* LIMITED T O , P R O C U R E M E N T O F S U B S T I T U T E G O O D S O R S E R V I C E S ; LOSS OF USE,
* DATA, O R P R O F I T S ; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY O F L I A B I L I T Y , W H E T H E R I N C O N T R A C T , S T R I C T L I A B I L I T Y , O R T O R T
* ( INCLUDING N E G L I G E N C E O R O T H E R W I S E ) A R I S I N G I N A N Y W A Y O U T O F T H E U S E
* OF T H I S S O F T W A R E , E V E N I F A D V I S E D O F T H E P O S S I B I L I T Y O F S U C H D A M A G E .
*
* /
/ *
* SHA- 1 i m p l e m e n t a t i o n w i t h I n t e l ( R ) A V X 2 i n s t r u c t i o n s e t e x t e n s i o n s .
*
* This i m p l e m e n t a t i o n i s b a s e d o n t h e p r e v i o u s S S S E 3 r e l e a s e :
* Visit h t t p : / / s o f t w a r e . i n t e l . c o m / e n - u s / a r t i c l e s /
* and r e f e r t o i m p r o v i n g - t h e - p e r f o r m a n c e - o f - t h e - s e c u r e - h a s h - a l g o r i t h m - 1 /
*
* Updates 2 0 - b y t e S H A - 1 r e c o r d i n ' h a s h ' f o r e v e n n u m b e r o f
* ' num_ b l o c k s ' c o n s e c u t i v e 6 4 - b y t e b l o c k s
*
* extern " C " v o i d s h a1 _ t r a n s f o r m _ a v x2 (
* int * h a s h , c o n s t c h a r * i n p u t , s i z e _ t n u m _ b l o c k s ) ;
* /
# include < l i n u x / l i n k a g e . h >
# define C T X % r d i / * a r g 1 * /
# define B U F % r s i / * a r g 2 * /
# define C N T % r d x / * a r g 3 * /
# define R E G _ A % e c x
# define R E G _ B % e s i
# define R E G _ C % e d i
# define R E G _ D % e a x
# define R E G _ E % e d x
# define R E G _ T B % e b x
# define R E G _ T A % r12 d
# define R E G _ R A % r c x
# define R E G _ R B % r s i
# define R E G _ R C % r d i
# define R E G _ R D % r a x
# define R E G _ R E % r d x
# define R E G _ R T A % r12
# define R E G _ R T B % r b x
2017-09-18 22:42:05 +03:00
# define R E G _ T 1 % r11 d
2014-03-21 02:14:00 +04:00
# define x m m _ m o v v m o v u p s
# define a v x2 _ z e r o u p p e r v z e r o u p p e r
# define R N D _ F 1 1
# define R N D _ F 2 2
# define R N D _ F 3 3
.macro REGALLOC
.set A, R E G _ A
.set B, R E G _ B
.set C, R E G _ C
.set D, R E G _ D
.set E, R E G _ E
.set TB, R E G _ T B
.set TA, R E G _ T A
.set RA, R E G _ R A
.set RB, R E G _ R B
.set RC, R E G _ R C
.set RD, R E G _ R D
.set RE, R E G _ R E
.set RTA, R E G _ R T A
.set RTB, R E G _ R T B
.set T1 , R E G _ T 1
.endm
# define H A S H _ P T R % r9
2017-08-02 23:49:09 +03:00
# define B L O C K S _ C T R % r8
2014-03-21 02:14:00 +04:00
# define B U F F E R _ P T R % r10
# define B U F F E R _ P T R 2 % r13
# define P R E C A L C _ B U F % r14
# define W K _ B U F % r15
# define W _ T M P % x m m 0
# define W Y _ T M P % y m m 0
# define W Y _ T M P 2 % y m m 9
# AVX2 v a r i a b l e s
# define W Y 0 % y m m 3
# define W Y 4 % y m m 5
# define W Y 0 8 % y m m 7
# define W Y 1 2 % y m m 8
# define W Y 1 6 % y m m 1 2
# define W Y 2 0 % y m m 1 3
# define W Y 2 4 % y m m 1 4
# define W Y 2 8 % y m m 1 5
# define Y M M _ S H U F B _ B S W A P % y m m 1 0
/ *
* Keep 2 i t e r a t i o n s p r e c a l c u l a t e d a t a t i m e :
* - 8 0 DWORDs p e r i t e r a t i o n * 2
* /
# define W _ S I Z E ( 8 0 * 2 * 2 + 1 6 )
# define W K ( t ) ( ( ( ( t ) % 8 0 ) / 4 ) * 3 2 + ( ( t ) % 4 ) * 4 + ( ( t ) / 8 0 ) * 1 6 ) ( W K _ B U F )
# define P R E C A L C _ W K ( t ) ( ( t ) * 2 * 2 ) ( P R E C A L C _ B U F )
.macro UPDATE_HASH hash, v a l
add \ h a s h , \ v a l
mov \ v a l , \ h a s h
.endm
.macro PRECALC_RESET_WY
.set WY_ 0 0 , W Y 0
.set WY_ 0 4 , W Y 4
.set WY_ 0 8 , W Y 0 8
.set WY_ 1 2 , W Y 1 2
.set WY_ 1 6 , W Y 1 6
.set WY_ 2 0 , W Y 2 0
.set WY_ 2 4 , W Y 2 4
.set WY_ 2 8 , W Y 2 8
.set WY_ 3 2 , W Y _ 0 0
.endm
.macro PRECALC_ROTATE_WY
/* Rotate macros */
.set WY_ 3 2 , W Y _ 2 8
.set WY_ 2 8 , W Y _ 2 4
.set WY_ 2 4 , W Y _ 2 0
.set WY_ 2 0 , W Y _ 1 6
.set WY_ 1 6 , W Y _ 1 2
.set WY_ 1 2 , W Y _ 0 8
.set WY_ 0 8 , W Y _ 0 4
.set WY_ 0 4 , W Y _ 0 0
.set WY_ 0 0 , W Y _ 3 2
/* Define register aliases */
.set WY, W Y _ 0 0
.set WY_ m i n u s _ 0 4 , W Y _ 0 4
.set WY_ m i n u s _ 0 8 , W Y _ 0 8
.set WY_ m i n u s _ 1 2 , W Y _ 1 2
.set WY_ m i n u s _ 1 6 , W Y _ 1 6
.set WY_ m i n u s _ 2 0 , W Y _ 2 0
.set WY_ m i n u s _ 2 4 , W Y _ 2 4
.set WY_ m i n u s _ 2 8 , W Y _ 2 8
.set WY_ m i n u s _ 3 2 , W Y
.endm
.macro PRECALC_00_15
.if ( i = = 0 ) # I n i t i a l i z e a n d r o t a t e r e g i s t e r s
PRECALC_ R E S E T _ W Y
PRECALC_ R O T A T E _ W Y
.endif
/* message scheduling pre-compute for rounds 0-15 */
.if ( ( i & 7 ) = = 0 )
/ *
* blended A V X 2 a n d A L U i n s t r u c t i o n s c h e d u l i n g
* 1 vector i t e r a t i o n p e r 8 r o u n d s
* /
2017-08-02 23:49:09 +03:00
vmovdqu ( i * 2 ) ( B U F F E R _ P T R ) , W _ T M P
2014-03-21 02:14:00 +04:00
.elseif ( ( i & 7 ) = = 1 )
2017-08-02 23:49:09 +03:00
vinsertf1 2 8 $ 1 , ( ( i - 1 ) * 2 ) ( B U F F E R _ P T R 2 ) ,\
2014-03-21 02:14:00 +04:00
WY_ T M P , W Y _ T M P
.elseif ( ( i & 7 ) = = 2 )
vpshufb Y M M _ S H U F B _ B S W A P , W Y _ T M P , W Y
.elseif ( ( i & 7 ) = = 4 )
2017-08-02 23:49:09 +03:00
vpaddd K _ X M M + K _ X M M _ A R ( % r i p ) , W Y , W Y _ T M P
2014-03-21 02:14:00 +04:00
.elseif ( ( i & 7 ) = = 7 )
vmovdqu W Y _ T M P , P R E C A L C _ W K ( i & ~ 7 )
PRECALC_ R O T A T E _ W Y
.endif
.endm
.macro PRECALC_16_31
/ *
* message s c h e d u l i n g p r e - c o m p u t e f o r r o u n d s 1 6 - 3 1
* calculating l a s t 3 2 w [ i ] v a l u e s i n 8 X M M r e g i s t e r s
* pre- c a l c u l a t e K + w [ i ] v a l u e s a n d s t o r e t o m e m
* for l a t e r l o a d b y A L U a d d i n s t r u c t i o n
*
* " brute f o r c e " v e c t o r i z a t i o n f o r r o u n d s 1 6 - 3 1 o n l y
* due t o w [ i ] - > w [ i - 3 ] d e p e n d e n c y
* /
.if ( ( i & 7 ) = = 0 )
/ *
* blended A V X 2 a n d A L U i n s t r u c t i o n s c h e d u l i n g
* 1 vector i t e r a t i o n p e r 8 r o u n d s
* /
/* w[i-14] */
vpalignr $ 8 , W Y _ m i n u s _ 1 6 , W Y _ m i n u s _ 1 2 , W Y
vpsrldq $ 4 , W Y _ m i n u s _ 0 4 , W Y _ T M P / * w [ i - 3 ] * /
.elseif ( ( i & 7 ) = = 1 )
vpxor W Y _ m i n u s _ 0 8 , W Y , W Y
vpxor W Y _ m i n u s _ 1 6 , W Y _ T M P , W Y _ T M P
.elseif ( ( i & 7 ) = = 2 )
vpxor W Y _ T M P , W Y , W Y
vpslldq $ 1 2 , W Y , W Y _ T M P 2
.elseif ( ( i & 7 ) = = 3 )
vpslld $ 1 , W Y , W Y _ T M P
vpsrld $ 3 1 , W Y , W Y
.elseif ( ( i & 7 ) = = 4 )
vpor W Y , W Y _ T M P , W Y _ T M P
vpslld $ 2 , W Y _ T M P 2 , W Y
.elseif ( ( i & 7 ) = = 5 )
vpsrld $ 3 0 , W Y _ T M P 2 , W Y _ T M P 2
vpxor W Y , W Y _ T M P , W Y _ T M P
.elseif ( ( i & 7 ) = = 7 )
vpxor W Y _ T M P 2 , W Y _ T M P , W Y
2017-08-02 23:49:09 +03:00
vpaddd K _ X M M + K _ X M M _ A R ( % r i p ) , W Y , W Y _ T M P
2014-03-21 02:14:00 +04:00
vmovdqu W Y _ T M P , P R E C A L C _ W K ( i & ~ 7 )
PRECALC_ R O T A T E _ W Y
.endif
.endm
.macro PRECALC_32_79
/ *
* in S H A - 1 s p e c i f i c a t i o n :
* w[ i ] = ( w [ i - 3 ] ^ w [ i - 8 ] ^ w [ i - 1 4 ] ^ w [ i - 1 6 ] ) r o l 1
* instead w e d o e q u a l :
* w[ i ] = ( w [ i - 6 ] ^ w [ i - 1 6 ] ^ w [ i - 2 8 ] ^ w [ i - 3 2 ] ) r o l 2
* allows m o r e e f f i c i e n t v e c t o r i z a t i o n
* since w [ i ] = > w [ i - 3 ] d e p e n d e n c y i s b r o k e n
* /
.if ( ( i & 7 ) = = 0 )
/ *
* blended A V X 2 a n d A L U i n s t r u c t i o n s c h e d u l i n g
* 1 vector i t e r a t i o n p e r 8 r o u n d s
* /
vpalignr $ 8 , W Y _ m i n u s _ 0 8 , W Y _ m i n u s _ 0 4 , W Y _ T M P
.elseif ( ( i & 7 ) = = 1 )
/* W is W_minus_32 before xor */
vpxor W Y _ m i n u s _ 2 8 , W Y , W Y
.elseif ( ( i & 7 ) = = 2 )
vpxor W Y _ m i n u s _ 1 6 , W Y _ T M P , W Y _ T M P
.elseif ( ( i & 7 ) = = 3 )
vpxor W Y _ T M P , W Y , W Y
.elseif ( ( i & 7 ) = = 4 )
vpslld $ 2 , W Y , W Y _ T M P
.elseif ( ( i & 7 ) = = 5 )
vpsrld $ 3 0 , W Y , W Y
vpor W Y , W Y _ T M P , W Y
.elseif ( ( i & 7 ) = = 7 )
2017-08-02 23:49:09 +03:00
vpaddd K _ X M M + K _ X M M _ A R ( % r i p ) , W Y , W Y _ T M P
2014-03-21 02:14:00 +04:00
vmovdqu W Y _ T M P , P R E C A L C _ W K ( i & ~ 7 )
PRECALC_ R O T A T E _ W Y
.endif
.endm
.macro PRECALC r, s
.set i, \ r
.if ( i < 4 0 )
.set K_ X M M , 3 2 * 0
.elseif ( i < 8 0 )
.set K_ X M M , 3 2 * 1
.elseif ( i < 1 2 0 )
.set K_ X M M , 3 2 * 2
.else
.set K_ X M M , 3 2 * 3
.endif
.if ( i< 3 2 )
PRECALC_ 0 0 _ 1 5 \ s
.elseif ( i< 6 4 )
PRECALC_ 1 6 _ 3 1 \ s
.elseif ( i < 1 6 0 )
PRECALC_ 3 2 _ 7 9 \ s
.endif
.endm
.macro ROTATE_STATE
.set T_ R E G , E
.set E, D
.set D, C
.set C, B
.set B, T B
.set TB, A
.set A, T _ R E G
.set T_ R E G , R E
.set RE, R D
.set RD, R C
.set RC, R B
.set RB, R T B
.set RTB, R A
.set RA, T _ R E G
.endm
/* Macro relies on saved ROUND_Fx */
.macro RND_FUN f, r
.if ( \ f = = R N D _ F 1 )
ROUND_ F 1 \ r
.elseif ( \ f = = R N D _ F 2 )
ROUND_ F 2 \ r
.elseif ( \ f = = R N D _ F 3 )
ROUND_ F 3 \ r
.endif
.endm
.macro RR r
.set round_ i d , ( \ r % 8 0 )
.if ( round_ i d = = 0 ) / * P r e c a l c u l a t e F f o r f i r s t r o u n d * /
.set ROUND_ F U N C , R N D _ F 1
mov B , T B
rorx $ ( 3 2 - 3 0 ) , B , B / * b > > > 2 * /
andn D , T B , T 1
and C , T B
xor T 1 , T B
.endif
RND_ F U N R O U N D _ F U N C , \ r
ROTATE_ S T A T E
.if ( round_ i d = = 1 8 )
.set ROUND_ F U N C , R N D _ F 2
.elseif ( round_ i d = = 3 8 )
.set ROUND_ F U N C , R N D _ F 3
.elseif ( round_ i d = = 5 8 )
.set ROUND_ F U N C , R N D _ F 2
.endif
.set round_ i d , ( ( \ r + 1 ) % 8 0 )
RND_ F U N R O U N D _ F U N C , ( \ r + 1 )
ROTATE_ S T A T E
.endm
.macro ROUND_F1 r
add W K ( \ r ) , E
andn C , A , T 1 / * ~ b & d * /
lea ( R E ,R T B ) , E / * A d d F f r o m t h e p r e v i o u s r o u n d * /
rorx $ ( 3 2 - 5 ) , A , T A / * T 2 = A > > > 5 * /
rorx $ ( 3 2 - 3 0 ) ,A , T B / * b > > > 2 f o r n e x t r o u n d * /
PRECALC ( \ r ) / * m s g s c h e d u l i n g f o r n e x t 2 b l o c k s * /
/ *
* Calculate F f o r t h e n e x t r o u n d
* ( b & c ) ^ a n d n [ b , d ]
* /
and B , A / * b & c * /
xor T 1 , A / * F 1 = ( b & c ) ^ ( ~ b & d ) * /
lea ( R E ,R T A ) , E / * E + = A > > > 5 * /
.endm
.macro ROUND_F2 r
add W K ( \ r ) , E
lea ( R E ,R T B ) , E / * A d d F f r o m t h e p r e v i o u s r o u n d * /
/* Calculate F for the next round */
rorx $ ( 3 2 - 5 ) , A , T A / * T 2 = A > > > 5 * /
.if ( ( round_ i d ) < 7 9 )
rorx $ ( 3 2 - 3 0 ) , A , T B / * b > > > 2 f o r n e x t r o u n d * /
.endif
PRECALC ( \ r ) / * m s g s c h e d u l i n g f o r n e x t 2 b l o c k s * /
.if ( ( round_ i d ) < 7 9 )
xor B , A
.endif
add T A , E / * E + = A > > > 5 * /
.if ( ( round_ i d ) < 7 9 )
xor C , A
.endif
.endm
.macro ROUND_F3 r
add W K ( \ r ) , E
PRECALC ( \ r ) / * m s g s c h e d u l i n g f o r n e x t 2 b l o c k s * /
lea ( R E ,R T B ) , E / * A d d F f r o m t h e p r e v i o u s r o u n d * /
mov B , T 1
or A , T 1
rorx $ ( 3 2 - 5 ) , A , T A / * T 2 = A > > > 5 * /
rorx $ ( 3 2 - 3 0 ) , A , T B / * b > > > 2 f o r n e x t r o u n d * /
/ * Calculate F f o r t h e n e x t r o u n d
* ( b a n d c ) o r ( d a n d ( b o r c ) )
* /
and C , T 1
and B , A
or T 1 , A
add T A , E / * E + = A > > > 5 * /
.endm
2017-08-02 23:49:09 +03:00
/ * Add c o n s t a n t o n l y i f ( % 2 > % 3 ) c o n d i t i o n m e t ( u s e s R T A a s t e m p )
* % 1 + % 2 > = % 3 ? % 4 : 0
* /
.macro ADD_IF_GE a, b , c , d
mov \ a , R T A
add $ \ d , R T A
cmp $ \ c , \ b
cmovge R T A , \ a
.endm
2014-03-21 02:14:00 +04:00
/ *
* macro i m p l e m e n t s 8 0 r o u n d s o f S H A - 1 , f o r m u l t i p l e b l o c k s w i t h s / w p i p e l i n i n g
* /
.macro SHA1_PIPELINED_MAIN_BODY
REGALLOC
mov ( H A S H _ P T R ) , A
mov 4 ( H A S H _ P T R ) , B
mov 8 ( H A S H _ P T R ) , C
mov 1 2 ( H A S H _ P T R ) , D
mov 1 6 ( H A S H _ P T R ) , E
mov % r s p , P R E C A L C _ B U F
lea ( 2 * 4 * 8 0 + 3 2 ) ( % r s p ) , W K _ B U F
# Precalc W K f o r f i r s t 2 b l o c k s
2017-08-02 23:49:09 +03:00
ADD_ I F _ G E B U F F E R _ P T R 2 , B L O C K S _ C T R , 2 , 6 4
2014-03-21 02:14:00 +04:00
.set i, 0
.rept 160
PRECALC i
.set i, i + 1
.endr
2017-08-02 23:49:09 +03:00
/* Go to next block if needed */
ADD_ I F _ G E B U F F E R _ P T R , B L O C K S _ C T R , 3 , 1 2 8
ADD_ I F _ G E B U F F E R _ P T R 2 , B L O C K S _ C T R , 4 , 1 2 8
2014-03-21 02:14:00 +04:00
xchg W K _ B U F , P R E C A L C _ B U F
.align 32
_loop :
/ *
* code l o o p s t h r o u g h m o r e t h a n o n e b l o c k
* we u s e K _ B A S E v a l u e a s a s i g n a l o f a l a s t b l o c k ,
* it i s s e t b e l o w b y : c m o v a e B U F F E R _ P T R , K _ B A S E
* /
2017-08-02 23:49:09 +03:00
test B L O C K S _ C T R , B L O C K S _ C T R
jnz _ b e g i n
2014-03-21 02:14:00 +04:00
.align 32
jmp _ e n d
.align 32
_begin :
/ *
* Do f i r s t b l o c k
* rounds : 0 , 2 , 4 , 6 , 8
* /
.set j, 0
.rept 5
RR j
.set j, j + 2
.endr
jmp _ l o o p0
_loop0 :
/ *
* rounds :
* 1 0 , 1 2 , 1 4 , 1 6 , 1 8
* 2 0 , 2 2 , 2 4 , 2 6 , 2 8
* 3 0 , 3 2 , 3 4 , 3 6 , 3 8
* 4 0 , 4 2 , 4 4 , 4 6 , 4 8
* 5 0 , 5 2 , 5 4 , 5 6 , 5 8
* /
.rept 25
RR j
.set j, j + 2
.endr
2017-08-02 23:49:09 +03:00
/* Update Counter */
sub $ 1 , B L O C K S _ C T R
/* Move to the next block only if needed*/
ADD_ I F _ G E B U F F E R _ P T R , B L O C K S _ C T R , 4 , 1 2 8
2014-03-21 02:14:00 +04:00
/ *
* rounds
* 6 0 , 6 2 , 6 4 , 6 6 , 6 8
* 7 0 , 7 2 , 7 4 , 7 6 , 7 8
* /
.rept 10
RR j
.set j, j + 2
.endr
UPDATE_ H A S H ( H A S H _ P T R ) , A
UPDATE_ H A S H 4 ( H A S H _ P T R ) , T B
UPDATE_ H A S H 8 ( H A S H _ P T R ) , C
UPDATE_ H A S H 1 2 ( H A S H _ P T R ) , D
UPDATE_ H A S H 1 6 ( H A S H _ P T R ) , E
2017-08-02 23:49:09 +03:00
test B L O C K S _ C T R , B L O C K S _ C T R
jz _ l o o p
2014-03-21 02:14:00 +04:00
mov T B , B
/* Process second block */
/ *
* rounds
* 0 + 8 0 , 2 + 8 0 , 4 + 8 0 , 6 + 8 0 , 8 + 8 0
* 1 0 + 8 0 , 1 2 + 8 0 , 1 4 + 8 0 , 1 6 + 8 0 , 1 8 + 8 0
* /
.set j, 0
.rept 10
RR j + 8 0
.set j, j + 2
.endr
jmp _ l o o p1
_loop1 :
/ *
* rounds
* 2 0 + 8 0 , 2 2 + 8 0 , 2 4 + 8 0 , 2 6 + 8 0 , 2 8 + 8 0
* 3 0 + 8 0 , 3 2 + 8 0 , 3 4 + 8 0 , 3 6 + 8 0 , 3 8 + 8 0
* /
.rept 10
RR j + 8 0
.set j, j + 2
.endr
jmp _ l o o p2
_loop2 :
/ *
* rounds
* 4 0 + 8 0 , 4 2 + 8 0 , 4 4 + 8 0 , 4 6 + 8 0 , 4 8 + 8 0
* 5 0 + 8 0 , 5 2 + 8 0 , 5 4 + 8 0 , 5 6 + 8 0 , 5 8 + 8 0
* /
.rept 10
RR j + 8 0
.set j, j + 2
.endr
2017-08-02 23:49:09 +03:00
/* update counter */
sub $ 1 , B L O C K S _ C T R
/* Move to the next block only if needed*/
ADD_ I F _ G E B U F F E R _ P T R 2 , B L O C K S _ C T R , 4 , 1 2 8
2014-03-21 02:14:00 +04:00
jmp _ l o o p3
_loop3 :
/ *
* rounds
* 6 0 + 8 0 , 6 2 + 8 0 , 6 4 + 8 0 , 6 6 + 8 0 , 6 8 + 8 0
* 7 0 + 8 0 , 7 2 + 8 0 , 7 4 + 8 0 , 7 6 + 8 0 , 7 8 + 8 0
* /
.rept 10
RR j + 8 0
.set j, j + 2
.endr
UPDATE_ H A S H ( H A S H _ P T R ) , A
UPDATE_ H A S H 4 ( H A S H _ P T R ) , T B
UPDATE_ H A S H 8 ( H A S H _ P T R ) , C
UPDATE_ H A S H 1 2 ( H A S H _ P T R ) , D
UPDATE_ H A S H 1 6 ( H A S H _ P T R ) , E
/* Reset state for AVX2 reg permutation */
mov A , T A
mov T B , A
mov C , T B
mov E , C
mov D , B
mov T A , D
REGALLOC
xchg W K _ B U F , P R E C A L C _ B U F
jmp _ l o o p
.align 32
_end :
.endm
/ *
* macro i m p l e m e n t s S H A - 1 f u n c t i o n ' s b o d y f o r s e v e r a l 6 4 - b y t e b l o c k s
* param : function' s n a m e
* /
.macro SHA1_VECTOR_ASM name
ENTRY( \ n a m e )
push % r b x
push % r12
push % r13
push % r14
push % r15
RESERVE_ S T A C K = ( W _ S I Z E * 4 + 8 + 2 4 )
/* Align stack */
mov % r s p , % r b x
2014-03-24 20:10:38 +04:00
and $ ~ ( 0 x20 - 1 ) , % r s p
2014-03-21 02:14:00 +04:00
push % r b x
sub $ R E S E R V E _ S T A C K , % r s p
avx2 _ z e r o u p p e r
2017-08-02 23:49:09 +03:00
/* Setup initial values */
2014-03-21 02:14:00 +04:00
mov C T X , H A S H _ P T R
mov B U F , B U F F E R _ P T R
2017-08-02 23:49:09 +03:00
mov B U F , B U F F E R _ P T R 2
mov C N T , B L O C K S _ C T R
2014-03-21 02:14:00 +04:00
xmm_ m o v B S W A P _ S H U F B _ C T L ( % r i p ) , Y M M _ S H U F B _ B S W A P
SHA1 _ P I P E L I N E D _ M A I N _ B O D Y
avx2 _ z e r o u p p e r
add $ R E S E R V E _ S T A C K , % r s p
2014-03-24 20:10:38 +04:00
pop % r s p
2014-03-21 02:14:00 +04:00
pop % r15
pop % r14
pop % r13
pop % r12
pop % r b x
ret
ENDPROC( \ n a m e )
.endm
.section .rodata
# define K 1 0 x5 a82 7 9 9 9
# define K 2 0 x6 e d9 e b a1
# define K 3 0 x8 f1 b b c d c
# define K 4 0 x c a62 c1 d6
.align 128
K_XMM_AR :
.long K1 , K 1 , K 1 , K 1
.long K1 , K 1 , K 1 , K 1
.long K2 , K 2 , K 2 , K 2
.long K2 , K 2 , K 2 , K 2
.long K3 , K 3 , K 3 , K 3
.long K3 , K 3 , K 3 , K 3
.long K4 , K 4 , K 4 , K 4
.long K4 , K 4 , K 4 , K 4
BSWAP_SHUFB_CTL :
.long 0x00010203
.long 0x04050607
.long 0x08090a0b
.long 0x0c0d0e0f
.long 0x00010203
.long 0x04050607
.long 0x08090a0b
.long 0x0c0d0e0f
.text
SHA1 _ V E C T O R _ A S M s h a1 _ t r a n s f o r m _ a v x2