2013-01-10 18:54:59 +04:00
/ * GPL H E A D E R S T A R T
*
* DO N O T A L T E R O R R E M O V E C O P Y R I G H T N O T I C E S O R T H I S F I L E H E A D E R .
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 o n l y ,
* as p u b l i s h e d b y t h e F r e e S o f t w a r e F o u n d a t i o n .
*
* This p r o g r a m i s d i s t r i b u t e d i n t h e h o p e t h a t i t w i l l b e u s e f u l , b u t
* WITHOUT A N Y W A R R A N T Y ; without even the implied warranty of
* MERCHANTABILITY o r F I T N E S S F O R A P A R T I C U L A R P U R P O S E . S e e t h e G N U
* General P u b l i c L i c e n s e v e r s i o n 2 f o r m o r e d e t a i l s ( a c o p y i s i n c l u d e d
* in t h e L I C E N S E f i l e t h a t a c c o m p a n i e d t h i s c o d e ) .
*
* You s h o u l d h a v e r e c e i v e d a c o p y o f t h e G N U G e n e r a l P u b l i c L i c e n s e
* version 2 a l o n g w i t h t h i s p r o g r a m ; If not, see http://www.gnu.org/licenses
*
* Please v i s i t h t t p : / / w w w . x y r a t e x . c o m / c o n t a c t i f y o u n e e d a d d i t i o n a l
* information o r h a v e a n y q u e s t i o n s .
*
* GPL H E A D E R E N D
* /
/ *
* Copyright 2 0 1 2 X y r a t e x T e c h n o l o g y L i m i t e d
*
* Using h a r d w a r e p r o v i d e d P C L M U L Q D Q i n s t r u c t i o n t o a c c e l e r a t e t h e C R C 3 2
* calculation.
* CRC3 2 p o l y n o m i a l : 0 x04 c11 d b7 ( B E ) / 0 x E D B 8 8 3 2 0 ( L E )
* PCLMULQDQ i s a n e w i n s t r u c t i o n i n I n t e l S S E 4 . 2 , t h e r e f e r e n c e c a n b e f o u n d
* at :
* http : / / www. i n t e l . c o m / p r o d u c t s / p r o c e s s o r / m a n u a l s /
* Intel( R ) 6 4 a n d I A - 3 2 A r c h i t e c t u r e s S o f t w a r e D e v e l o p e r ' s M a n u a l
* Volume 2 B : I n s t r u c t i o n S e t R e f e r e n c e , N - Z
*
* Authors : Gregory P r e s t a s < G r e g o r y _ P r e s t a s @us.xyratex.com>
* Alexander B o y k o < A l e x a n d e r _ B o y k o @xyratex.com>
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / i n s t . h >
2017-09-06 22:41:21 -04:00
.section .rodata
2013-01-10 18:54:59 +04:00
.align 16
/ *
* [ x4 * 1 2 8 + 3 2 m o d P ( x ) < < 3 2 ) ] ' < < 1 = 0 x15 4 4 4 2 b d4
* # define C O N S T A N T _ R 1 0 x15 4 4 4 2 b d4 L L
*
* [ ( x4 * 1 2 8 - 3 2 m o d P ( x ) < < 3 2 ) ] ' < < 1 = 0 x1 c6 e 4 1 5 9 6
* # define C O N S T A N T _ R 2 0 x1 c6 e 4 1 5 9 6 L L
* /
.Lconstant_R2R1 :
.octa 0x00000001c6e415960000000154442bd4
/ *
* [ ( x1 2 8 + 3 2 m o d P ( x ) < < 3 2 ) ] ' < < 1 = 0 x17 5 1 9 9 7 d0
* # define C O N S T A N T _ R 3 0 x17 5 1 9 9 7 d0 L L
*
* [ ( x1 2 8 - 3 2 m o d P ( x ) < < 3 2 ) ] ' < < 1 = 0 x0 c c a a00 9 e
* # define C O N S T A N T _ R 4 0 x0 c c a a00 9 e L L
* /
.Lconstant_R4R3 :
.octa 0x00000000ccaa009e00000001751997d0
/ *
* [ ( x6 4 m o d P ( x ) < < 3 2 ) ] ' < < 1 = 0 x16 3 c d61 2 4
* # define C O N S T A N T _ R 5 0 x16 3 c d61 2 4 L L
* /
.Lconstant_R5 :
.octa 0x00000000000000000000000163cd6124
.Lconstant_mask32 :
.octa 0x000000000000000000000000FFFFFFFF
/ *
* # define C R C P O L Y _ T R U E _ L E _ F U L L 0 x1 D B 7 1 0 6 4 1 L L
*
* Barrett R e d u c t i o n c o n s t a n t ( u 6 4 ` ) = u ` = ( x * * 6 4 / P ( x ) ) ` = 0 x1 F 7 0 1 1 6 4 1 L L
* # define C O N S T A N T _ R U 0 x1 F 7 0 1 1 6 4 1 L L
* /
.Lconstant_RUpoly :
.octa 0x00000001F701164100000001DB710641
# define C O N S T A N T % x m m 0
# ifdef _ _ x86 _ 6 4 _ _
# define B U F % r d i
# define L E N % r s i
# define C R C % e d x
# else
# define B U F % e a x
# define L E N % e d x
# define C R C % e c x
# endif
.text
/ * *
* Calculate c r c32
* BUF - b u f f e r ( 1 6 b y t e s a l i g n e d )
* LEN - s i z e o f b u f f e r ( 1 6 b y t e s a l i g n e d ) , L E N s h o u l d b e g r a t e r t h a n 6 3
* CRC - i n i t i a l c r c32
* return % e a x c r c32
* uint c r c32 _ p c l m u l _ l e _ 1 6 ( u n s i g n e d c h a r c o n s t * b u f f e r ,
* size_ t l e n , u i n t c r c32 )
* /
2013-03-24 14:32:01 +02:00
2019-10-11 13:51:04 +02:00
SYM_ F U N C _ S T A R T ( c r c32 _ p c l m u l _ l e _ 1 6 ) / * b u f f e r a n d b u f f e r s i z e a r e 1 6 b y t e s a l i g n e d * /
2013-01-10 18:54:59 +04:00
movdqa ( B U F ) , % x m m 1
movdqa 0 x10 ( B U F ) , % x m m 2
movdqa 0 x20 ( B U F ) , % x m m 3
movdqa 0 x30 ( B U F ) , % x m m 4
movd C R C , C O N S T A N T
pxor C O N S T A N T , % x m m 1
sub $ 0 x40 , L E N
add $ 0 x40 , B U F
cmp $ 0 x40 , L E N
jb l e s s _ 6 4
# ifdef _ _ x86 _ 6 4 _ _
movdqa . L c o n s t a n t _ R 2 R 1 ( % r i p ) , C O N S T A N T
# else
2017-09-06 22:41:21 -04:00
movdqa . L c o n s t a n t _ R 2 R 1 , C O N S T A N T
2013-01-10 18:54:59 +04:00
# endif
loop_64 : /* 64 bytes Full cache line folding */
prefetchnta 0 x40 ( B U F )
movdqa % x m m 1 , % x m m 5
movdqa % x m m 2 , % x m m 6
movdqa % x m m 3 , % x m m 7
# ifdef _ _ x86 _ 6 4 _ _
movdqa % x m m 4 , % x m m 8
# endif
PCLMULQDQ 0 0 , C O N S T A N T , % x m m 1
PCLMULQDQ 0 0 , C O N S T A N T , % x m m 2
PCLMULQDQ 0 0 , C O N S T A N T , % x m m 3
# ifdef _ _ x86 _ 6 4 _ _
PCLMULQDQ 0 0 , C O N S T A N T , % x m m 4
# endif
PCLMULQDQ 0 x11 , C O N S T A N T , % x m m 5
PCLMULQDQ 0 x11 , C O N S T A N T , % x m m 6
PCLMULQDQ 0 x11 , C O N S T A N T , % x m m 7
# ifdef _ _ x86 _ 6 4 _ _
PCLMULQDQ 0 x11 , C O N S T A N T , % x m m 8
# endif
pxor % x m m 5 , % x m m 1
pxor % x m m 6 , % x m m 2
pxor % x m m 7 , % x m m 3
# ifdef _ _ x86 _ 6 4 _ _
pxor % x m m 8 , % x m m 4
# else
/* xmm8 unsupported for x32 */
movdqa % x m m 4 , % x m m 5
PCLMULQDQ 0 0 , C O N S T A N T , % x m m 4
PCLMULQDQ 0 x11 , C O N S T A N T , % x m m 5
pxor % x m m 5 , % x m m 4
# endif
pxor ( B U F ) , % x m m 1
pxor 0 x10 ( B U F ) , % x m m 2
pxor 0 x20 ( B U F ) , % x m m 3
pxor 0 x30 ( B U F ) , % x m m 4
sub $ 0 x40 , L E N
add $ 0 x40 , B U F
cmp $ 0 x40 , L E N
jge l o o p _ 6 4
less_64 : /* Folding cache line into 128bit */
# ifdef _ _ x86 _ 6 4 _ _
movdqa . L c o n s t a n t _ R 4 R 3 ( % r i p ) , C O N S T A N T
# else
2017-09-06 22:41:21 -04:00
movdqa . L c o n s t a n t _ R 4 R 3 , C O N S T A N T
2013-01-10 18:54:59 +04:00
# endif
prefetchnta ( B U F )
movdqa % x m m 1 , % x m m 5
PCLMULQDQ 0 x00 , C O N S T A N T , % x m m 1
PCLMULQDQ 0 x11 , C O N S T A N T , % x m m 5
pxor % x m m 5 , % x m m 1
pxor % x m m 2 , % x m m 1
movdqa % x m m 1 , % x m m 5
PCLMULQDQ 0 x00 , C O N S T A N T , % x m m 1
PCLMULQDQ 0 x11 , C O N S T A N T , % x m m 5
pxor % x m m 5 , % x m m 1
pxor % x m m 3 , % x m m 1
movdqa % x m m 1 , % x m m 5
PCLMULQDQ 0 x00 , C O N S T A N T , % x m m 1
PCLMULQDQ 0 x11 , C O N S T A N T , % x m m 5
pxor % x m m 5 , % x m m 1
pxor % x m m 4 , % x m m 1
cmp $ 0 x10 , L E N
jb f o l d _ 6 4
loop_16 : /* Folding rest buffer into 128bit */
movdqa % x m m 1 , % x m m 5
PCLMULQDQ 0 x00 , C O N S T A N T , % x m m 1
PCLMULQDQ 0 x11 , C O N S T A N T , % x m m 5
pxor % x m m 5 , % x m m 1
pxor ( B U F ) , % x m m 1
sub $ 0 x10 , L E N
add $ 0 x10 , B U F
cmp $ 0 x10 , L E N
jge l o o p _ 1 6
fold_64 :
/ * perform t h e l a s t 6 4 b i t f o l d , a l s o a d d s 3 2 z e r o e s
* to t h e i n p u t s t r e a m * /
PCLMULQDQ 0 x01 , % x m m 1 , C O N S T A N T / * R 4 * x m m 1 . l o w * /
psrldq $ 0 x08 , % x m m 1
pxor C O N S T A N T , % x m m 1
/* final 32-bit fold */
movdqa % x m m 1 , % x m m 2
# ifdef _ _ x86 _ 6 4 _ _
movdqa . L c o n s t a n t _ R 5 ( % r i p ) , C O N S T A N T
movdqa . L c o n s t a n t _ m a s k 3 2 ( % r i p ) , % x m m 3
# else
2017-09-06 22:41:21 -04:00
movdqa . L c o n s t a n t _ R 5 , C O N S T A N T
movdqa . L c o n s t a n t _ m a s k 3 2 , % x m m 3
2013-01-10 18:54:59 +04:00
# endif
psrldq $ 0 x04 , % x m m 2
pand % x m m 3 , % x m m 1
PCLMULQDQ 0 x00 , C O N S T A N T , % x m m 1
pxor % x m m 2 , % x m m 1
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
# ifdef _ _ x86 _ 6 4 _ _
movdqa . L c o n s t a n t _ R U p o l y ( % r i p ) , C O N S T A N T
# else
2017-09-06 22:41:21 -04:00
movdqa . L c o n s t a n t _ R U p o l y , C O N S T A N T
2013-01-10 18:54:59 +04:00
# endif
movdqa % x m m 1 , % x m m 2
pand % x m m 3 , % x m m 1
PCLMULQDQ 0 x10 , C O N S T A N T , % x m m 1
pand % x m m 3 , % x m m 1
PCLMULQDQ 0 x00 , C O N S T A N T , % x m m 1
pxor % x m m 2 , % x m m 1
2013-05-29 13:43:54 +01:00
PEXTRD 0 x01 , % x m m 1 , % e a x
2013-01-10 18:54:59 +04:00
ret
2019-10-11 13:51:04 +02:00
SYM_ F U N C _ E N D ( c r c32 _ p c l m u l _ l e _ 1 6 )