2017-03-15 15:37:34 +03:00
/ *
* Core o f t h e a c c e l e r a t e d C R C a l g o r i t h m .
* In y o u r f i l e , d e f i n e t h e c o n s t a n t s a n d C R C _ F U N C T I O N _ N A M E
* Then i n c l u d e t h i s f i l e .
*
* Calculate t h e c h e c k s u m o f d a t a t h a t i s 1 6 b y t e a l i g n e d a n d a m u l t i p l e o f
* 1 6 bytes.
*
* The f i r s t s t e p i s t o r e d u c e i t t o 1 0 2 4 b i t s . W e d o t h i s i n 8 p a r a l l e l
* chunks i n o r d e r t o m a s k t h e l a t e n c y o f t h e v p m s u m i n s t r u c t i o n s . I f w e
* have m o r e t h a n 3 2 k B o f d a t a t o c h e c k s u m w e r e p e a t t h i s s t e p m u l t i p l e
* times, p a s s i n g i n t h e p r e v i o u s 1 0 2 4 b i t s .
*
* The n e x t s t e p i s t o r e d u c e t h e 1 0 2 4 b i t s t o 6 4 b i t s . T h i s s t e p a d d s
* 3 2 bits o f 0 s t o t h e e n d - t h i s m a t c h e s w h a t a C R C d o e s . W e j u s t
* calculate c o n s t a n t s t h a t l a n d t h e d a t a i n t h i s 3 2 b i t s .
*
* We t h e n u s e f i x e d p o i n t B a r r e t t r e d u c t i o n t o c o m p u t e a m o d n o v e r G F ( 2 )
* for n = C R C u s i n g P O W E R 8 i n s t r u c t i o n s . W e u s e x = 3 2 .
*
* http : / / en. w i k i p e d i a . o r g / w i k i / B a r r e t t _ r e d u c t i o n
*
* Copyright ( C ) 2 0 1 5 A n t o n B l a n c h a r d < a n t o n @au.ibm.com>, IBM
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or
* modify i t u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e
* as p u b l i s h e d b y t h e F r e e S o f t w a r e F o u n d a t i o n ; either version
* 2 of t h e L i c e n s e , o r ( a t y o u r o p t i o n ) a n y l a t e r v e r s i o n .
* /
# include < a s m / p p c _ a s m . h >
# include < a s m / p p c - o p c o d e . h >
# define M A X _ S I Z E 3 2 7 6 8
.text
2017-03-15 15:37:35 +03:00
# if d e f i n e d ( _ _ B I G _ E N D I A N _ _ ) & & d e f i n e d ( R E F L E C T )
# define B Y T E S W A P _ D A T A
# elif d e f i n e d ( _ _ L I T T L E _ E N D I A N _ _ ) & & ! d e f i n e d ( R E F L E C T )
2017-03-15 15:37:34 +03:00
# define B Y T E S W A P _ D A T A
# else
# undef B Y T E S W A P _ D A T A
# endif
# define o f f16 r25
# define o f f32 r26
# define o f f48 r27
# define o f f64 r28
# define o f f80 r29
# define o f f96 r30
# define o f f11 2 r31
# define c o n s t 1 v24
# define c o n s t 2 v25
# define b y t e s w a p v26
# define m a s k _ 3 2 b i t v27
# define m a s k _ 6 4 b i t v28
# define z e r o e s v29
# ifdef B Y T E S W A P _ D A T A
# define V P E R M ( A , B , C , D ) v p e r m A , B , C , D
# else
# define V P E R M ( A , B , C , D )
# endif
/* unsigned int CRC_FUNCTION_NAME(unsigned int crc, void *p, unsigned long len) */
FUNC_ S T A R T ( C R C _ F U N C T I O N _ N A M E )
std r31 ,- 8 ( r1 )
std r30 ,- 1 6 ( r1 )
std r29 ,- 2 4 ( r1 )
std r28 ,- 3 2 ( r1 )
std r27 ,- 4 0 ( r1 )
std r26 ,- 4 8 ( r1 )
std r25 ,- 5 6 ( r1 )
li o f f16 ,1 6
li o f f32 ,3 2
li o f f48 ,4 8
li o f f64 ,6 4
li o f f80 ,8 0
li o f f96 ,9 6
li o f f11 2 ,1 1 2
li r0 ,0
/* Enough room for saving 10 non volatile VMX registers */
subi r6 ,r1 ,5 6 + 1 0 * 1 6
subi r7 ,r1 ,5 6 + 2 * 1 6
stvx v20 ,0 ,r6
stvx v21 ,o f f16 ,r6
stvx v22 ,o f f32 ,r6
stvx v23 ,o f f48 ,r6
stvx v24 ,o f f64 ,r6
stvx v25 ,o f f80 ,r6
stvx v26 ,o f f96 ,r6
stvx v27 ,o f f11 2 ,r6
stvx v28 ,0 ,r7
stvx v29 ,o f f16 ,r7
mr r10 ,r3
vxor z e r o e s ,z e r o e s ,z e r o e s
vspltisw v0 ,- 1
vsldoi m a s k _ 3 2 b i t ,z e r o e s ,v0 ,4
vsldoi m a s k _ 6 4 b i t ,z e r o e s ,v0 ,8
/* Get the initial value into v8 */
vxor v8 ,v8 ,v8
MTVRD( v8 , R 3 )
2017-03-15 15:37:35 +03:00
# ifdef R E F L E C T
2017-03-15 15:37:34 +03:00
vsldoi v8 ,z e r o e s ,v8 ,8 / * s h i f t i n t o b o t t o m 3 2 b i t s * /
2017-03-15 15:37:35 +03:00
# else
vsldoi v8 ,v8 ,z e r o e s ,4 / * s h i f t i n t o t o p 3 2 b i t s * /
# endif
2017-03-15 15:37:34 +03:00
# ifdef B Y T E S W A P _ D A T A
addis r3 ,r2 ,. b y t e s w a p _ c o n s t a n t @toc@ha
addi r3 ,r3 ,. b y t e s w a p _ c o n s t a n t @toc@l
lvx b y t e s w a p ,0 ,r3
addi r3 ,r3 ,1 6
# endif
cmpdi r5 ,2 5 6
blt . L s h o r t
rldicr r6 ,r5 ,0 ,5 6
/* Checksum in blocks of MAX_SIZE */
1 : lis r7 ,M A X _ S I Z E @h
ori r7 ,r7 ,M A X _ S I Z E @l
mr r9 ,r7
cmpd r6 ,r7
bgt 2 f
mr r7 ,r6
2 : subf r6 ,r7 ,r6
/* our main loop does 128 bytes at a time */
srdi r7 ,r7 ,7
/ *
* Work o u t t h e o f f s e t i n t o t h e c o n s t a n t s t a b l e t o s t a r t a t . E a c h
* constant i s 1 6 b y t e s , a n d i t i s u s e d a g a i n s t 1 2 8 b y t e s o f i n p u t
* data - 1 2 8 / 1 6 = 8
* /
sldi r8 ,r7 ,4
srdi r9 ,r9 ,3
subf r8 ,r8 ,r9
/* We reduce our final 128 bytes in a separate step */
addi r7 ,r7 ,- 1
mtctr r7
addis r3 ,r2 ,. c o n s t a n t s @toc@ha
addi r3 ,r3 ,. c o n s t a n t s @toc@l
/* Find the start of our constants */
add r3 ,r3 ,r8
/* zero v0-v7 which will contain our checksums */
vxor v0 ,v0 ,v0
vxor v1 ,v1 ,v1
vxor v2 ,v2 ,v2
vxor v3 ,v3 ,v3
vxor v4 ,v4 ,v4
vxor v5 ,v5 ,v5
vxor v6 ,v6 ,v6
vxor v7 ,v7 ,v7
lvx c o n s t 1 ,0 ,r3
/ *
* If w e a r e l o o p i n g b a c k t o c o n s u m e m o r e d a t a w e u s e t h e v a l u e s
* already i n v16 - v23 .
* /
cmpdi r0 ,1
beq 2 f
/* First warm up pass */
lvx v16 ,0 ,r4
lvx v17 ,o f f16 ,r4
VPERM( v16 ,v16 ,v16 ,b y t e s w a p )
VPERM( v17 ,v17 ,v17 ,b y t e s w a p )
lvx v18 ,o f f32 ,r4
lvx v19 ,o f f48 ,r4
VPERM( v18 ,v18 ,v18 ,b y t e s w a p )
VPERM( v19 ,v19 ,v19 ,b y t e s w a p )
lvx v20 ,o f f64 ,r4
lvx v21 ,o f f80 ,r4
VPERM( v20 ,v20 ,v20 ,b y t e s w a p )
VPERM( v21 ,v21 ,v21 ,b y t e s w a p )
lvx v22 ,o f f96 ,r4
lvx v23 ,o f f11 2 ,r4
VPERM( v22 ,v22 ,v22 ,b y t e s w a p )
VPERM( v23 ,v23 ,v23 ,b y t e s w a p )
addi r4 ,r4 ,8 * 1 6
/* xor in initial value */
vxor v16 ,v16 ,v8
2 : bdz . L f i r s t _ w a r m _ u p _ d o n e
addi r3 ,r3 ,1 6
lvx c o n s t 2 ,0 ,r3
/* Second warm up pass */
VPMSUMD( v8 ,v16 ,c o n s t 1 )
lvx v16 ,0 ,r4
VPERM( v16 ,v16 ,v16 ,b y t e s w a p )
ori r2 ,r2 ,0
VPMSUMD( v9 ,v17 ,c o n s t 1 )
lvx v17 ,o f f16 ,r4
VPERM( v17 ,v17 ,v17 ,b y t e s w a p )
ori r2 ,r2 ,0
VPMSUMD( v10 ,v18 ,c o n s t 1 )
lvx v18 ,o f f32 ,r4
VPERM( v18 ,v18 ,v18 ,b y t e s w a p )
ori r2 ,r2 ,0
VPMSUMD( v11 ,v19 ,c o n s t 1 )
lvx v19 ,o f f48 ,r4
VPERM( v19 ,v19 ,v19 ,b y t e s w a p )
ori r2 ,r2 ,0
VPMSUMD( v12 ,v20 ,c o n s t 1 )
lvx v20 ,o f f64 ,r4
VPERM( v20 ,v20 ,v20 ,b y t e s w a p )
ori r2 ,r2 ,0
VPMSUMD( v13 ,v21 ,c o n s t 1 )
lvx v21 ,o f f80 ,r4
VPERM( v21 ,v21 ,v21 ,b y t e s w a p )
ori r2 ,r2 ,0
VPMSUMD( v14 ,v22 ,c o n s t 1 )
lvx v22 ,o f f96 ,r4
VPERM( v22 ,v22 ,v22 ,b y t e s w a p )
ori r2 ,r2 ,0
VPMSUMD( v15 ,v23 ,c o n s t 1 )
lvx v23 ,o f f11 2 ,r4
VPERM( v23 ,v23 ,v23 ,b y t e s w a p )
addi r4 ,r4 ,8 * 1 6
bdz . L f i r s t _ c o o l _ d o w n
/ *
* main l o o p . W e m o d u l o s c h e d u l e i t s u c h t h a t i t t a k e s t h r e e i t e r a t i o n s
* to c o m p l e t e - f i r s t i t e r a t i o n l o a d , s e c o n d i t e r a t i o n v p m s u m , t h i r d
* iteration x o r .
* /
.balign 16
4 : lvx c o n s t 1 ,0 ,r3
addi r3 ,r3 ,1 6
ori r2 ,r2 ,0
vxor v0 ,v0 ,v8
VPMSUMD( v8 ,v16 ,c o n s t 2 )
lvx v16 ,0 ,r4
VPERM( v16 ,v16 ,v16 ,b y t e s w a p )
ori r2 ,r2 ,0
vxor v1 ,v1 ,v9
VPMSUMD( v9 ,v17 ,c o n s t 2 )
lvx v17 ,o f f16 ,r4
VPERM( v17 ,v17 ,v17 ,b y t e s w a p )
ori r2 ,r2 ,0
vxor v2 ,v2 ,v10
VPMSUMD( v10 ,v18 ,c o n s t 2 )
lvx v18 ,o f f32 ,r4
VPERM( v18 ,v18 ,v18 ,b y t e s w a p )
ori r2 ,r2 ,0
vxor v3 ,v3 ,v11
VPMSUMD( v11 ,v19 ,c o n s t 2 )
lvx v19 ,o f f48 ,r4
VPERM( v19 ,v19 ,v19 ,b y t e s w a p )
lvx c o n s t 2 ,0 ,r3
ori r2 ,r2 ,0
vxor v4 ,v4 ,v12
VPMSUMD( v12 ,v20 ,c o n s t 1 )
lvx v20 ,o f f64 ,r4
VPERM( v20 ,v20 ,v20 ,b y t e s w a p )
ori r2 ,r2 ,0
vxor v5 ,v5 ,v13
VPMSUMD( v13 ,v21 ,c o n s t 1 )
lvx v21 ,o f f80 ,r4
VPERM( v21 ,v21 ,v21 ,b y t e s w a p )
ori r2 ,r2 ,0
vxor v6 ,v6 ,v14
VPMSUMD( v14 ,v22 ,c o n s t 1 )
lvx v22 ,o f f96 ,r4
VPERM( v22 ,v22 ,v22 ,b y t e s w a p )
ori r2 ,r2 ,0
vxor v7 ,v7 ,v15
VPMSUMD( v15 ,v23 ,c o n s t 1 )
lvx v23 ,o f f11 2 ,r4
VPERM( v23 ,v23 ,v23 ,b y t e s w a p )
addi r4 ,r4 ,8 * 1 6
bdnz 4 b
.Lfirst_cool_down :
/* First cool down pass */
lvx c o n s t 1 ,0 ,r3
addi r3 ,r3 ,1 6
vxor v0 ,v0 ,v8
VPMSUMD( v8 ,v16 ,c o n s t 1 )
ori r2 ,r2 ,0
vxor v1 ,v1 ,v9
VPMSUMD( v9 ,v17 ,c o n s t 1 )
ori r2 ,r2 ,0
vxor v2 ,v2 ,v10
VPMSUMD( v10 ,v18 ,c o n s t 1 )
ori r2 ,r2 ,0
vxor v3 ,v3 ,v11
VPMSUMD( v11 ,v19 ,c o n s t 1 )
ori r2 ,r2 ,0
vxor v4 ,v4 ,v12
VPMSUMD( v12 ,v20 ,c o n s t 1 )
ori r2 ,r2 ,0
vxor v5 ,v5 ,v13
VPMSUMD( v13 ,v21 ,c o n s t 1 )
ori r2 ,r2 ,0
vxor v6 ,v6 ,v14
VPMSUMD( v14 ,v22 ,c o n s t 1 )
ori r2 ,r2 ,0
vxor v7 ,v7 ,v15
VPMSUMD( v15 ,v23 ,c o n s t 1 )
ori r2 ,r2 ,0
.Lsecond_cool_down :
/* Second cool down pass */
vxor v0 ,v0 ,v8
vxor v1 ,v1 ,v9
vxor v2 ,v2 ,v10
vxor v3 ,v3 ,v11
vxor v4 ,v4 ,v12
vxor v5 ,v5 ,v13
vxor v6 ,v6 ,v14
vxor v7 ,v7 ,v15
2017-03-15 15:37:35 +03:00
# ifdef R E F L E C T
2017-03-15 15:37:34 +03:00
/ *
* vpmsumd p r o d u c e s a 9 6 b i t r e s u l t i n t h e l e a s t s i g n i f i c a n t b i t s
* of t h e r e g i s t e r . S i n c e w e a r e b i t r e f l e c t e d w e h a v e t o s h i f t i t
* left 3 2 b i t s s o i t o c c u p i e s t h e l e a s t s i g n i f i c a n t b i t s i n t h e
* bit r e f l e c t e d d o m a i n .
* /
vsldoi v0 ,v0 ,z e r o e s ,4
vsldoi v1 ,v1 ,z e r o e s ,4
vsldoi v2 ,v2 ,z e r o e s ,4
vsldoi v3 ,v3 ,z e r o e s ,4
vsldoi v4 ,v4 ,z e r o e s ,4
vsldoi v5 ,v5 ,z e r o e s ,4
vsldoi v6 ,v6 ,z e r o e s ,4
vsldoi v7 ,v7 ,z e r o e s ,4
2017-03-15 15:37:35 +03:00
# endif
2017-03-15 15:37:34 +03:00
/* xor with last 1024 bits */
lvx v8 ,0 ,r4
lvx v9 ,o f f16 ,r4
VPERM( v8 ,v8 ,v8 ,b y t e s w a p )
VPERM( v9 ,v9 ,v9 ,b y t e s w a p )
lvx v10 ,o f f32 ,r4
lvx v11 ,o f f48 ,r4
VPERM( v10 ,v10 ,v10 ,b y t e s w a p )
VPERM( v11 ,v11 ,v11 ,b y t e s w a p )
lvx v12 ,o f f64 ,r4
lvx v13 ,o f f80 ,r4
VPERM( v12 ,v12 ,v12 ,b y t e s w a p )
VPERM( v13 ,v13 ,v13 ,b y t e s w a p )
lvx v14 ,o f f96 ,r4
lvx v15 ,o f f11 2 ,r4
VPERM( v14 ,v14 ,v14 ,b y t e s w a p )
VPERM( v15 ,v15 ,v15 ,b y t e s w a p )
addi r4 ,r4 ,8 * 1 6
vxor v16 ,v0 ,v8
vxor v17 ,v1 ,v9
vxor v18 ,v2 ,v10
vxor v19 ,v3 ,v11
vxor v20 ,v4 ,v12
vxor v21 ,v5 ,v13
vxor v22 ,v6 ,v14
vxor v23 ,v7 ,v15
li r0 ,1
cmpdi r6 ,0
addi r6 ,r6 ,1 2 8
bne 1 b
/* Work out how many bytes we have left */
andi. r5 ,r5 ,1 2 7
/* Calculate where in the constant table we need to start */
subfic r6 ,r5 ,1 2 8
add r3 ,r3 ,r6
/* How many 16 byte chunks are in the tail */
srdi r7 ,r5 ,4
mtctr r7
/ *
* Reduce t h e p r e v i o u s l y c a l c u l a t e d 1 0 2 4 b i t s t o 6 4 b i t s , s h i f t i n g
* 3 2 bits t o i n c l u d e t h e t r a i l i n g 3 2 b i t s o f z e r o s
* /
lvx v0 ,0 ,r3
lvx v1 ,o f f16 ,r3
lvx v2 ,o f f32 ,r3
lvx v3 ,o f f48 ,r3
lvx v4 ,o f f64 ,r3
lvx v5 ,o f f80 ,r3
lvx v6 ,o f f96 ,r3
lvx v7 ,o f f11 2 ,r3
addi r3 ,r3 ,8 * 1 6
VPMSUMW( v0 ,v16 ,v0 )
VPMSUMW( v1 ,v17 ,v1 )
VPMSUMW( v2 ,v18 ,v2 )
VPMSUMW( v3 ,v19 ,v3 )
VPMSUMW( v4 ,v20 ,v4 )
VPMSUMW( v5 ,v21 ,v5 )
VPMSUMW( v6 ,v22 ,v6 )
VPMSUMW( v7 ,v23 ,v7 )
/* Now reduce the tail (0 - 112 bytes) */
cmpdi r7 ,0
beq 1 f
lvx v16 ,0 ,r4
lvx v17 ,0 ,r3
VPERM( v16 ,v16 ,v16 ,b y t e s w a p )
VPMSUMW( v16 ,v16 ,v17 )
vxor v0 ,v0 ,v16
bdz 1 f
lvx v16 ,o f f16 ,r4
lvx v17 ,o f f16 ,r3
VPERM( v16 ,v16 ,v16 ,b y t e s w a p )
VPMSUMW( v16 ,v16 ,v17 )
vxor v0 ,v0 ,v16
bdz 1 f
lvx v16 ,o f f32 ,r4
lvx v17 ,o f f32 ,r3
VPERM( v16 ,v16 ,v16 ,b y t e s w a p )
VPMSUMW( v16 ,v16 ,v17 )
vxor v0 ,v0 ,v16
bdz 1 f
lvx v16 ,o f f48 ,r4
lvx v17 ,o f f48 ,r3
VPERM( v16 ,v16 ,v16 ,b y t e s w a p )
VPMSUMW( v16 ,v16 ,v17 )
vxor v0 ,v0 ,v16
bdz 1 f
lvx v16 ,o f f64 ,r4
lvx v17 ,o f f64 ,r3
VPERM( v16 ,v16 ,v16 ,b y t e s w a p )
VPMSUMW( v16 ,v16 ,v17 )
vxor v0 ,v0 ,v16
bdz 1 f
lvx v16 ,o f f80 ,r4
lvx v17 ,o f f80 ,r3
VPERM( v16 ,v16 ,v16 ,b y t e s w a p )
VPMSUMW( v16 ,v16 ,v17 )
vxor v0 ,v0 ,v16
bdz 1 f
lvx v16 ,o f f96 ,r4
lvx v17 ,o f f96 ,r3
VPERM( v16 ,v16 ,v16 ,b y t e s w a p )
VPMSUMW( v16 ,v16 ,v17 )
vxor v0 ,v0 ,v16
/* Now xor all the parallel chunks together */
1 : vxor v0 ,v0 ,v1
vxor v2 ,v2 ,v3
vxor v4 ,v4 ,v5
vxor v6 ,v6 ,v7
vxor v0 ,v0 ,v2
vxor v4 ,v4 ,v6
vxor v0 ,v0 ,v4
.Lbarrett_reduction :
/* Barrett constants */
addis r3 ,r2 ,. b a r r e t t _ c o n s t a n t s @toc@ha
addi r3 ,r3 ,. b a r r e t t _ c o n s t a n t s @toc@l
lvx c o n s t 1 ,0 ,r3
lvx c o n s t 2 ,o f f16 ,r3
vsldoi v1 ,v0 ,v0 ,8
vxor v0 ,v0 ,v1 / * x o r t w o 6 4 b i t r e s u l t s t o g e t h e r * /
2017-03-15 15:37:35 +03:00
# ifdef R E F L E C T
2017-03-15 15:37:34 +03:00
/* shift left one bit */
vspltisb v1 ,1
vsl v0 ,v0 ,v1
2017-03-15 15:37:35 +03:00
# endif
2017-03-15 15:37:34 +03:00
vand v0 ,v0 ,m a s k _ 6 4 b i t
2017-03-15 15:37:35 +03:00
# ifndef R E F L E C T
/ *
* Now f o r t h e B a r r e t t r e d u c t i o n a l g o r i t h m . T h e i d e a i s t o c a l c u l a t e q ,
* the m u l t i p l e o f o u r p o l y n o m i a l t h a t w e n e e d t o s u b t r a c t . B y
* doing t h e c o m p u t a t i o n 2 x b i t s h i g h e r ( i e 6 4 b i t s ) a n d s h i f t i n g t h e
* result b a c k d o w n 2 x b i t s , w e r o u n d d o w n t o t h e n e a r e s t m u l t i p l e .
* /
VPMSUMD( v1 ,v0 ,c o n s t 1 ) / * m a * /
vsldoi v1 ,z e r o e s ,v1 ,8 / * q = f l o o r ( m a / ( 2 ^ 6 4 ) ) * /
VPMSUMD( v1 ,v1 ,c o n s t 2 ) / * q n * /
vxor v0 ,v0 ,v1 / * a - q n , s u b t r a c t i o n i s x o r i n G F ( 2 ) * /
2017-03-15 15:37:34 +03:00
2017-03-15 15:37:35 +03:00
/ *
* Get t h e r e s u l t i n t o r3 . W e n e e d t o s h i f t i t l e f t 8 b y t e s :
* V0 [ 0 1 2 X ]
* V0 [ 0 X 2 3 ]
* /
vsldoi v0 ,v0 ,z e r o e s ,8 / * s h i f t r e s u l t i n t o t o p 6 4 b i t s * /
# else
2017-03-15 15:37:34 +03:00
/ *
* The r e f l e c t e d v e r s i o n o f B a r r e t t r e d u c t i o n . I n s t e a d o f b i t
* reflecting o u r d a t a ( w h i c h i s e x p e n s i v e t o d o ) , w e b i t r e f l e c t o u r
* constants a n d o u r a l g o r i t h m , w h i c h m e a n s t h e i n t e r m e d i a t e d a t a i n
* our v e c t o r r e g i s t e r s g o e s f r o m 0 - 6 3 i n s t e a d o f 6 3 - 0 . W e c a n r e f l e c t
* the a l g o r i t h m b e c a u s e w e d o n ' t c a r r y i n m o d 2 a r i t h m e t i c .
* /
vand v1 ,v0 ,m a s k _ 3 2 b i t / * b o t t o m 3 2 b i t s o f a * /
VPMSUMD( v1 ,v1 ,c o n s t 1 ) / * m a * /
vand v1 ,v1 ,m a s k _ 3 2 b i t / * b o t t o m 3 2 b i t s o f m a * /
VPMSUMD( v1 ,v1 ,c o n s t 2 ) / * q n * /
vxor v0 ,v0 ,v1 / * a - q n , s u b t r a c t i o n i s x o r i n G F ( 2 ) * /
/ *
* Since w e a r e b i t r e f l e c t e d , t h e r e s u l t ( i e t h e l o w 3 2 b i t s ) i s i n
* the h i g h 3 2 b i t s . W e j u s t n e e d t o s h i f t i t l e f t 4 b y t e s
* V0 [ 0 1 X 3 ]
* V0 [ 0 X 2 3 ]
* /
vsldoi v0 ,v0 ,z e r o e s ,4 / * s h i f t r e s u l t i n t o t o p 6 4 b i t s o f * /
2017-03-15 15:37:35 +03:00
# endif
2017-03-15 15:37:34 +03:00
/* Get it into r3 */
MFVRD( R 3 , v0 )
.Lout :
subi r6 ,r1 ,5 6 + 1 0 * 1 6
subi r7 ,r1 ,5 6 + 2 * 1 6
lvx v20 ,0 ,r6
lvx v21 ,o f f16 ,r6
lvx v22 ,o f f32 ,r6
lvx v23 ,o f f48 ,r6
lvx v24 ,o f f64 ,r6
lvx v25 ,o f f80 ,r6
lvx v26 ,o f f96 ,r6
lvx v27 ,o f f11 2 ,r6
lvx v28 ,0 ,r7
lvx v29 ,o f f16 ,r7
ld r31 ,- 8 ( r1 )
ld r30 ,- 1 6 ( r1 )
ld r29 ,- 2 4 ( r1 )
ld r28 ,- 3 2 ( r1 )
ld r27 ,- 4 0 ( r1 )
ld r26 ,- 4 8 ( r1 )
ld r25 ,- 5 6 ( r1 )
blr
.Lfirst_warm_up_done :
lvx c o n s t 1 ,0 ,r3
addi r3 ,r3 ,1 6
VPMSUMD( v8 ,v16 ,c o n s t 1 )
VPMSUMD( v9 ,v17 ,c o n s t 1 )
VPMSUMD( v10 ,v18 ,c o n s t 1 )
VPMSUMD( v11 ,v19 ,c o n s t 1 )
VPMSUMD( v12 ,v20 ,c o n s t 1 )
VPMSUMD( v13 ,v21 ,c o n s t 1 )
VPMSUMD( v14 ,v22 ,c o n s t 1 )
VPMSUMD( v15 ,v23 ,c o n s t 1 )
b . L s e c o n d _ c o o l _ d o w n
.Lshort :
cmpdi r5 ,0
beq . L z e r o
addis r3 ,r2 ,. s h o r t _ c o n s t a n t s @toc@ha
addi r3 ,r3 ,. s h o r t _ c o n s t a n t s @toc@l
/* Calculate where in the constant table we need to start */
subfic r6 ,r5 ,2 5 6
add r3 ,r3 ,r6
/* How many 16 byte chunks? */
srdi r7 ,r5 ,4
mtctr r7
vxor v19 ,v19 ,v19
vxor v20 ,v20 ,v20
lvx v0 ,0 ,r4
lvx v16 ,0 ,r3
VPERM( v0 ,v0 ,v16 ,b y t e s w a p )
vxor v0 ,v0 ,v8 / * x o r i n i n i t i a l v a l u e * /
VPMSUMW( v0 ,v0 ,v16 )
bdz . L v0
lvx v1 ,o f f16 ,r4
lvx v17 ,o f f16 ,r3
VPERM( v1 ,v1 ,v17 ,b y t e s w a p )
VPMSUMW( v1 ,v1 ,v17 )
bdz . L v1
lvx v2 ,o f f32 ,r4
lvx v16 ,o f f32 ,r3
VPERM( v2 ,v2 ,v16 ,b y t e s w a p )
VPMSUMW( v2 ,v2 ,v16 )
bdz . L v2
lvx v3 ,o f f48 ,r4
lvx v17 ,o f f48 ,r3
VPERM( v3 ,v3 ,v17 ,b y t e s w a p )
VPMSUMW( v3 ,v3 ,v17 )
bdz . L v3
lvx v4 ,o f f64 ,r4
lvx v16 ,o f f64 ,r3
VPERM( v4 ,v4 ,v16 ,b y t e s w a p )
VPMSUMW( v4 ,v4 ,v16 )
bdz . L v4
lvx v5 ,o f f80 ,r4
lvx v17 ,o f f80 ,r3
VPERM( v5 ,v5 ,v17 ,b y t e s w a p )
VPMSUMW( v5 ,v5 ,v17 )
bdz . L v5
lvx v6 ,o f f96 ,r4
lvx v16 ,o f f96 ,r3
VPERM( v6 ,v6 ,v16 ,b y t e s w a p )
VPMSUMW( v6 ,v6 ,v16 )
bdz . L v6
lvx v7 ,o f f11 2 ,r4
lvx v17 ,o f f11 2 ,r3
VPERM( v7 ,v7 ,v17 ,b y t e s w a p )
VPMSUMW( v7 ,v7 ,v17 )
bdz . L v7
addi r3 ,r3 ,1 2 8
addi r4 ,r4 ,1 2 8
lvx v8 ,0 ,r4
lvx v16 ,0 ,r3
VPERM( v8 ,v8 ,v16 ,b y t e s w a p )
VPMSUMW( v8 ,v8 ,v16 )
bdz . L v8
lvx v9 ,o f f16 ,r4
lvx v17 ,o f f16 ,r3
VPERM( v9 ,v9 ,v17 ,b y t e s w a p )
VPMSUMW( v9 ,v9 ,v17 )
bdz . L v9
lvx v10 ,o f f32 ,r4
lvx v16 ,o f f32 ,r3
VPERM( v10 ,v10 ,v16 ,b y t e s w a p )
VPMSUMW( v10 ,v10 ,v16 )
bdz . L v10
lvx v11 ,o f f48 ,r4
lvx v17 ,o f f48 ,r3
VPERM( v11 ,v11 ,v17 ,b y t e s w a p )
VPMSUMW( v11 ,v11 ,v17 )
bdz . L v11
lvx v12 ,o f f64 ,r4
lvx v16 ,o f f64 ,r3
VPERM( v12 ,v12 ,v16 ,b y t e s w a p )
VPMSUMW( v12 ,v12 ,v16 )
bdz . L v12
lvx v13 ,o f f80 ,r4
lvx v17 ,o f f80 ,r3
VPERM( v13 ,v13 ,v17 ,b y t e s w a p )
VPMSUMW( v13 ,v13 ,v17 )
bdz . L v13
lvx v14 ,o f f96 ,r4
lvx v16 ,o f f96 ,r3
VPERM( v14 ,v14 ,v16 ,b y t e s w a p )
VPMSUMW( v14 ,v14 ,v16 )
bdz . L v14
lvx v15 ,o f f11 2 ,r4
lvx v17 ,o f f11 2 ,r3
VPERM( v15 ,v15 ,v17 ,b y t e s w a p )
VPMSUMW( v15 ,v15 ,v17 )
.Lv15 : vxor v19 ,v19 ,v15
.Lv14 : vxor v20 ,v20 ,v14
.Lv13 : vxor v19 ,v19 ,v13
.Lv12 : vxor v20 ,v20 ,v12
.Lv11 : vxor v19 ,v19 ,v11
.Lv10 : vxor v20 ,v20 ,v10
.Lv9 : vxor v19 ,v19 ,v9
.Lv8 : vxor v20 ,v20 ,v8
.Lv7 : vxor v19 ,v19 ,v7
.Lv6 : vxor v20 ,v20 ,v6
.Lv5 : vxor v19 ,v19 ,v5
.Lv4 : vxor v20 ,v20 ,v4
.Lv3 : vxor v19 ,v19 ,v3
.Lv2 : vxor v20 ,v20 ,v2
.Lv1 : vxor v19 ,v19 ,v1
.Lv0 : vxor v20 ,v20 ,v0
vxor v0 ,v19 ,v20
b . L b a r r e t t _ r e d u c t i o n
.Lzero :
mr r3 ,r10
b . L o u t
FUNC_ E N D ( C R C _ F U N C T I O N _ N A M E )