2005-04-17 02:20:36 +04:00
/ *
*
* Optmized v e r s i o n o f t h e s t a n d a r d d o _ c s u m ( ) f u n c t i o n
*
* Return : a 6 4 b i t q u a n t i t y c o n t a i n i n g t h e 1 6 b i t I n t e r n e t c h e c k s u m
*
* Inputs :
* in0 : address o f b u f f e r t o c h e c k s u m ( c h a r * )
* in1 : length o f t h e b u f f e r ( i n t )
*
* Copyright ( C ) 1 9 9 9 , 2 0 0 1 - 2 0 0 2 H e w l e t t - P a c k a r d C o
* Stephane E r a n i a n < e r a n i a n @hpl.hp.com>
*
* 0 2 / 0 4 / 2 2 Ken C h e n < k e n n e t h . w . c h e n @intel.com>
* Data l o c a l i t y s t u d y o n t h e c h e c k s u m b u f f e r .
* More o p t i m i z a t i o n c l e a n u p - r e m o v e e x c e s s i v e s t o p b i t s .
* 0 2 / 0 4 / 0 8 David M o s b e r g e r < d a v i d m @hpl.hp.com>
* More c l e a n u p a n d t u n i n g .
* 0 1 / 0 4 / 1 8 Jun N a k a j i m a < j u n . n a k a j i m a @intel.com>
* Clean u p a n d o p t i m i z e a n d t h e s o f t w a r e p i p e l i n e , l o a d i n g t w o
* back- t o - b a c k 8 - b y t e w o r d s p e r l o o p . C l e a n u p t h e i n i t i a l i z a t i o n
* for t h e l o o p . S u p p o r t t h e c a s e s w h e r e l o a d l a t e n c y = 1 o r 2 .
* Set C O N F I G _ I A 6 4 _ L O A D _ L A T E N C Y t o 1 o r 2 ( d e f a u l t ) .
* /
# include < a s m / a s m m a c r o . h >
/ /
/ / Theory o f o p e r a t i o n s :
/ / The g o a l i s t o g o a s q u i c k l y a s p o s s i b l e t o t h e p o i n t w h e r e
/ / we c a n c h e c k s u m 1 6 b y t e s / l o o p . B e f o r e r e a c h i n g t h a t p o i n t w e m u s t
/ / take c a r e o f i n c o r r e c t a l i g n m e n t o f f i r s t b y t e .
/ /
/ / The c o d e h e r e a f t e r a l s o t a k e s c a r e o f t h e " t a i l " p a r t o f t h e b u f f e r
/ / before e n t e r i n g t h e c o r e l o o p , i f a n y . T h e c h e c k s u m i s a s u m s o i t
/ / allows u s t o c o m m u t e o p e r a t i o n s . S o w e d o t h e " h e a d " a n d " t a i l "
/ / first t o f i n i s h a t f u l l s p e e d i n t h e b o d y . O n c e w e g e t t h e h e a d a n d
/ / tail v a l u e s , w e f e e d t h e m i n t o t h e p i p e l i n e , v e r y h a n d y i n i t i a l i z a t i o n .
/ /
/ / Of c o u r s e w e d e a l w i t h t h e s p e c i a l c a s e w h e r e t h e w h o l e b u f f e r f i t s
/ / into o n e 8 b y t e w o r d . I n t h i s c a s e w e h a v e o n l y o n e e n t r y i n t h e p i p e l i n e .
/ /
/ / We u s e a ( L O A D _ L A T E N C Y + 2 ) - s t a g e p i p e l i n e i n t h e l o o p t o a c c o u n t f o r
/ / possible l o a d l a t e n c y a n d a l s o t o a c c o m m o d a t e f o r h e a d a n d t a i l .
/ /
/ / The e n d o f t h e f u n c t i o n d e a l s w i t h f o l d i n g t h e c h e c k s u m f r o m 6 4 b i t s
/ / down t o 1 6 b i t s t a k i n g c a r e o f t h e c a r r y .
/ /
/ / This v e r s i o n a v o i d s s y n c h r o n i z a t i o n i n t h e c o r e l o o p b y a l s o u s i n g a
/ / pipeline f o r t h e a c c u m u l a t i o n o f t h e c h e c k s u m i n r e s u l t x [ ] ( x =1 ,2 ) .
/ /
/ / wordx[ ] ( x =1 ,2 )
/ / | - - - |
/ / | | 0 : new v a l u e l o a d e d i n p i p e l i n e
/ / | - - - |
/ / | | - : in t r a n s i t d a t a
/ / | - - - |
/ / | | LOAD_ L A T E N C Y : c u r r e n t v a l u e t o a d d t o c h e c k s u m
/ / | - - - |
/ / | | LOAD_ L A T E N C Y + 1 : p r e v i o u s v a l u e a d d e d t o c h e c k s u m
/ / | - - - | ( previous i t e r a t i o n )
/ /
/ / resultx[ ] ( x =1 ,2 )
/ / | - - - |
/ / | | 0 : initial v a l u e
/ / | - - - |
/ / | | LOAD_ L A T E N C Y - 1 : n e w c h e c k s u m
/ / | - - - |
/ / | | LOAD_ L A T E N C Y : p r e v i o u s v a l u e o f c h e c k s u m
/ / | - - - |
/ / | | LOAD_ L A T E N C Y + 1 : f i n a l c h e c k s u m w h e n o u t o f t h e l o o p
/ / | - - - |
/ /
/ /
/ / See R F C 1 0 7 1 " C o m p u t i n g t h e I n t e r n e t C h e c k s u m " f o r v a r i o u s t e c h n i q u e s f o r
/ / calculating t h e I n t e r n e t c h e c k s u m .
/ /
/ / NOT Y E T D O N E :
/ / - Maybe a n o t h e r a l g o r i t h m w h i c h w o u l d t a k e c a r e o f t h e f o l d i n g a t t h e
/ / end i n a d i f f e r e n t m a n n e r
/ / - Work w i t h p e o p l e m o r e k n o w l e d g e a b l e t h a n m e o n t h e n e t w o r k s t a c k
/ / to f i g u r e o u t i f w e c o u l d n o t s p l i t t h e f u n c t i o n d e p e n d i n g o n t h e
/ / type o f p a c k e t o r a l i g n m e n t w e g e t . L i k e t h e i p _ f a s t _ c s u m ( ) r o u t i n e
/ / where w e k n o w w e h a v e a t l e a s t 2 0 b y t e s w o r t h o f d a t a t o c h e c k s u m .
/ / - Do a b e t t e r j o b o f h a n d l i n g s m a l l p a c k e t s .
/ / - Note o n p r e f e t c h i n g : i t w a s f o u n d t h a t u n d e r v a r i o u s l o a d , i . e . f t p r e a d / w r i t e ,
/ / nfs r e a d / w r i t e , t h e L 1 c a c h e h i t r a t e i s a t 6 0 % a n d L 2 c a c h e h i t r a t e i s a t 9 9 . 8 %
/ / on t h e d a t a t h a t b u f f e r p o i n t s t o ( p a r t l y b e c a u s e t h e c h e c k s u m i s o f t e n p r e c e d e d b y
/ / a c o p y _ f r o m _ u s e r ( ) ) . T h i s f i n d i n g i n d i a t e t h a t l f e t c h w i l l n o t b e b e n e f i c i a l s i n c e
/ / the d a t a i s a l r e a d y i n t h e c a c h e .
/ /
# define s a v e d _ p f s r11
# define h m a s k r16
# define t m a s k r17
# define f i r s t 1 r18
# define f i r s t v a l r19
# define f i r s t o f f r20
# define l a s t r21
# define l a s t v a l r22
# define l a s t o f f r23
# define s a v e d _ l c r24
# define s a v e d _ p r r25
# define t m p1 r26
# define t m p2 r27
# define t m p3 r28
# define c a r r y 1 r29
# define c a r r y 2 r30
# define f i r s t 2 r31
# define b u f i n 0
# define l e n i n 1
# define L O A D _ L A T E N C Y 2 / / X X X f i x m e
# if ( L O A D _ L A T E N C Y ! = 1 ) & & ( L O A D _ L A T E N C Y ! = 2 )
# error " O n l y 1 o r 2 i s s u p p o r t e d / t e s t e d f o r L O A D _ L A T E N C Y . "
# endif
# define P I P E _ D E P T H ( L O A D _ L A T E N C Y + 2 )
# define E L D p [ L O A D _ L A T E N C Y ] / / e n d o f l o a d
# define E L D _ 1 p [ L O A D _ L A T E N C Y + 1 ] / / a n d n e x t s t a g e
/ / unsigned l o n g d o _ c s u m ( u n s i g n e d c h a r * b u f ,l o n g l e n )
GLOBAL_ E N T R Y ( d o _ c s u m )
.prologue
.save ar. p f s , s a v e d _ p f s
alloc s a v e d _ p f s =ar . p f s ,2 ,1 6 ,0 ,1 6
.rotr word1 [ 4 ] , w o r d2 [ 4 ] ,r e s u l t 1 [ L O A D _ L A T E N C Y + 2 ] ,r e s u l t 2 [ L O A D _ L A T E N C Y + 2 ]
.rotp p[ P I P E _ D E P T H ] , p C 1 [ 2 ] , p C 2 [ 2 ]
mov r e t 0 =r0 / / i n c a s e w e h a v e z e r o l e n g t h
cmp. l t p0 ,p6 =r0 ,l e n / / c h e c k f o r z e r o l e n g t h o r n e g a t i v e ( 3 2 b i t l e n )
;;
add t m p1 =buf ,l e n / / l a s t b y t e ' s a d d r e s s
.save pr, s a v e d _ p r
mov s a v e d _ p r =pr / / p r e s e r v e p r e d i c a t e s ( r o t a t i o n )
( p6 ) b r . r e t . s p n t . m a n y r p / / r e t u r n i f z e r o o r n e g a t i v e l e n g t h
mov h m a s k = - 1 / / i n i t i a l i z e h e a d m a s k
tbit. n z p15 ,p0 =buf ,0 / / i s b u f a n o d d a d d r e s s ?
and f i r s t 1 = - 8 ,b u f / / 8 - b y t e a l i g n d o w n a d d r e s s o f f i r s t 1 e l e m e n t
and f i r s t o f f =7 ,b u f / / h o w m a n y b y t e s o f f f o r f i r s t 1 e l e m e n t
mov t m a s k = - 1 / / i n i t i a l i z e t a i l m a s k
;;
adds t m p2 = - 1 ,t m p1 / / l a s t - 1
and l a s t o f f =7 ,t m p1 / / h o w m a n y b y t e s o f f f o r l a s t e l e m e n t
;;
sub t m p1 =8 ,l a s t o f f / / c o m p l e m e n t t o l a s t o f f
and l a s t = - 8 ,t m p2 / / a d d r e s s o f w o r d c o n t a i n i n g l a s t b y t e
;;
sub t m p3 =last ,f i r s t 1 / / t m p3 =distance f r o m f i r s t 1 t o l a s t
.save ar. l c , s a v e d _ l c
mov s a v e d _ l c =ar . l c / / s a v e l c
cmp. e q p8 ,p9 =last ,f i r s t 1 / / e v e r y t h i n g f i t s i n o n e w o r d ?
ld8 f i r s t v a l = [ f i r s t 1 ] ,8 / / l o a d , a h e a d o f t i m e , " f i r s t 1 " w o r d
and t m p1 =7 , t m p1 / / m a k e s u r e t h a t i f t m p1 = =8 - > t m p1 =0
shl t m p2 =firstoff ,3 / / n u m b e r o f b i t s
;;
( p9 ) l d8 l a s t v a l = [ l a s t ] / / l o a d , a h e a d o f t i m e , " l a s t " w o r d , i f n e e d e d
shl t m p1 =tmp1 ,3 / / n u m b e r o f b i t s
( p9 ) a d d s t m p3 = - 8 ,t m p3 / / e f f e c t i v e l y l o a d e d
;;
( p8 ) m o v l a s t v a l =r0 / / w e d o n ' t n e e d l a s t v a l i f f i r s t 1 = =last
shl h m a s k =hmask ,t m p2 / / b u i l d h e a d m a s k , m a s k o f f [ 0 ,f i r s t 1 o f f [
shr. u t m a s k =tmask ,t m p1 / / b u i l d t a i l m a s k , m a s k o f f ] 8 ,l a s t o f f ]
;;
.body
# define c o u n t t m p3
( p8 ) a n d h m a s k =hmask ,t m a s k / / a p p l y t a i l m a s k t o h e a d m a s k i f 1 w o r d o n l y
( p9 ) a n d w o r d2 [ 0 ] =lastval ,t m a s k / / m a s k l a s t i t a s a p p r o p r i a t e
shr. u c o u n t =count ,3 / / h o w m a n y 8 - b y t e ?
;;
/ / If c o u n t i s o d d , f i n i s h t h i s 8 - b y t e w o r d s o t h a t w e c a n
/ / load t w o b a c k - t o - b a c k 8 - b y t e w o r d s p e r l o o p t h e r e a f t e r .
and w o r d1 [ 0 ] =firstval ,h m a s k / / a n d m a s k i t a s a p p r o p r i a t e
tbit. n z p10 ,p11 =count ,0 / / i f ( c o u n t i s o d d )
;;
( p8 ) m o v r e s u l t 1 [ 0 ] =word1 [ 0 ]
( p9 ) a d d r e s u l t 1 [ 0 ] =word1 [ 0 ] ,w o r d2 [ 0 ]
;;
cmp. l t u p6 ,p0 =result1 [ 0 ] ,w o r d1 [ 0 ] / / c h e c k t h e c a r r y
cmp. e q . o r . a n d c m p8 ,p0 =0 ,c o u n t / / e x i t i f z e r o 8 - b y t e
;;
( p6 ) a d d s r e s u l t 1 [ 0 ] =1 ,r e s u l t 1 [ 0 ]
( p8 ) b r . c o n d . d p t k . d o _ c s u m _ e x i t / / i f ( w i t h i n a n 8 - b y t e w o r d )
( p1 1 ) b r . c o n d . d p t k . d o _ c s u m 1 6 / / i f ( c o u n t i s e v e n )
/ / Here c o u n t i s o d d .
ld8 w o r d1 [ 1 ] = [ f i r s t 1 ] ,8 / / l o a d a n 8 - b y t e w o r d
cmp. e q p9 ,p10 =1 ,c o u n t / / i f ( c o u n t = = 1 )
adds c o u n t = - 1 ,c o u n t / / l o a d e d a n 8 - b y t e w o r d
;;
add r e s u l t 1 [ 0 ] =result1 [ 0 ] ,w o r d1 [ 1 ]
;;
cmp. l t u p6 ,p0 =result1 [ 0 ] ,w o r d1 [ 1 ]
;;
( p6 ) a d d s r e s u l t 1 [ 0 ] =1 ,r e s u l t 1 [ 0 ]
( p9 ) b r . c o n d . s p t k . d o _ c s u m _ e x i t / / i f ( c o u n t = = 1 ) e x i t
2011-03-31 05:57:33 +04:00
/ / Fall t h r o u g h t o c a l c u l a t e t h e c h e c k s u m , f e e d i n g r e s u l t 1 [ 0 ] a s
2005-04-17 02:20:36 +04:00
/ / the i n i t i a l v a l u e i n r e s u l t 1 [ 0 ] .
/ /
/ / Calculate t h e c h e c k s u m l o a d i n g t w o 8 - b y t e w o r d s p e r l o o p .
/ /
.do_csum16 :
add f i r s t 2 =8 ,f i r s t 1
shr. u c o u n t =count ,1 / / w e d o 1 6 b y t e s p e r l o o p
;;
adds c o u n t = - 1 ,c o u n t
mov c a r r y 1 =r0
mov c a r r y 2 =r0
brp. l o o p . i m p 1 f ,2 f
;;
mov a r . e c =PIPE_DEPTH
mov a r . l c =count / / s e t l c
mov p r . r o t =1 < < 1 6
/ / result1 [ 0 ] m u s t b e i n i t i a l i z e d i n a d v a n c e .
mov r e s u l t 2 [ 0 ] =r0
;;
.align 32
1 :
( ELD_ 1 ) c m p . l t u p C 1 [ 0 ] ,p0 =result1 [ L O A D _ L A T E N C Y ] ,w o r d1 [ L O A D _ L A T E N C Y + 1 ]
( pC1 [ 1 ] ) a d d s c a r r y 1 =1 ,c a r r y 1
( ELD_ 1 ) c m p . l t u p C 2 [ 0 ] ,p0 =result2 [ L O A D _ L A T E N C Y ] ,w o r d2 [ L O A D _ L A T E N C Y + 1 ]
( pC2 [ 1 ] ) a d d s c a r r y 2 =1 ,c a r r y 2
( ELD) a d d r e s u l t 1 [ L O A D _ L A T E N C Y - 1 ] =result1 [ L O A D _ L A T E N C Y ] ,w o r d1 [ L O A D _ L A T E N C Y ]
( ELD) a d d r e s u l t 2 [ L O A D _ L A T E N C Y - 1 ] =result2 [ L O A D _ L A T E N C Y ] ,w o r d2 [ L O A D _ L A T E N C Y ]
2 :
( p[ 0 ] ) l d8 w o r d1 [ 0 ] = [ f i r s t 1 ] ,1 6
( p[ 0 ] ) l d8 w o r d2 [ 0 ] = [ f i r s t 2 ] ,1 6
br. c t o p . s p t k 1 b
;;
/ / Since l e n i s a 3 2 - b i t v a l u e , c a r r y c a n n o t b e l a r g e r t h a n a 6 4 - b i t v a l u e .
( pC1 [ 1 ] ) a d d s c a r r y 1 =1 ,c a r r y 1 / / s i n c e w e m i s s t h e l a s t o n e
( pC2 [ 1 ] ) a d d s c a r r y 2 =1 ,c a r r y 2
;;
add r e s u l t 1 [ L O A D _ L A T E N C Y + 1 ] =result1 [ L O A D _ L A T E N C Y + 1 ] ,c a r r y 1
add r e s u l t 2 [ L O A D _ L A T E N C Y + 1 ] =result2 [ L O A D _ L A T E N C Y + 1 ] ,c a r r y 2
;;
cmp. l t u p6 ,p0 =result1 [ L O A D _ L A T E N C Y + 1 ] ,c a r r y 1
cmp. l t u p7 ,p0 =result2 [ L O A D _ L A T E N C Y + 1 ] ,c a r r y 2
;;
( p6 ) a d d s r e s u l t 1 [ L O A D _ L A T E N C Y + 1 ] =1 ,r e s u l t 1 [ L O A D _ L A T E N C Y + 1 ]
( p7 ) a d d s r e s u l t 2 [ L O A D _ L A T E N C Y + 1 ] =1 ,r e s u l t 2 [ L O A D _ L A T E N C Y + 1 ]
;;
add r e s u l t 1 [ 0 ] =result1 [ L O A D _ L A T E N C Y + 1 ] ,r e s u l t 2 [ L O A D _ L A T E N C Y + 1 ]
;;
cmp. l t u p6 ,p0 =result1 [ 0 ] ,r e s u l t 2 [ L O A D _ L A T E N C Y + 1 ]
;;
( p6 ) a d d s r e s u l t 1 [ 0 ] =1 ,r e s u l t 1 [ 0 ]
;;
.do_csum_exit :
/ /
/ / now f o l d 6 4 i n t o 1 6 b i t s t a k i n g c a r e o f c a r r y
/ / that' s n o t v e r y g o o d b e c a u s e i t h a s l o t s o f s e q u e n t i a l i t y
/ /
mov t m p3 =0xffff
zxt4 t m p1 =result1 [ 0 ]
shr. u t m p2 =result1 [ 0 ] ,3 2
;;
add r e s u l t 1 [ 0 ] =tmp1 ,t m p2
;;
and t m p1 =result1 [ 0 ] ,t m p3
shr. u t m p2 =result1 [ 0 ] ,1 6
;;
add r e s u l t 1 [ 0 ] =tmp1 ,t m p2
;;
and t m p1 =result1 [ 0 ] ,t m p3
shr. u t m p2 =result1 [ 0 ] ,1 6
;;
add r e s u l t 1 [ 0 ] =tmp1 ,t m p2
;;
and t m p1 =result1 [ 0 ] ,t m p3
shr. u t m p2 =result1 [ 0 ] ,1 6
;;
add r e t 0 =tmp1 ,t m p2
mov p r =saved_pr ,0 x f f f f f f f f f f f f00 0 0
;;
/ / if b u f w a s o d d t h e n s w a p b y t e s
mov a r . p f s =saved_pfs / / r e s t o r e a r . e c
( p1 5 ) m u x1 r e t 0 =ret0 ,@rev // reverse word
;;
mov a r . l c =saved_lc
( p1 5 ) s h r . u r e t 0 =ret0 ,6 4 - 1 6 / / + s h i f t b a c k t o p o s i t i o n = s w a p b y t e s
br. r e t . s p t k . m a n y r p
/ / I ( J u n N a k a j i m a ) w r o t e a n e q u i v a l e n t c o d e ( s e e b e l o w ) , b u t i t w a s
/ / not m u c h b e t t e r t h a n t h e o r i g i n a l . S o k e e p t h e o r i g i n a l t h e r e s o t h a t
/ / someone e l s e c a n c h a l l e n g e .
/ /
/ / shr. u w o r d1 [ 0 ] =result1 [ 0 ] ,3 2
/ / zxt4 r e s u l t 1 [ 0 ] =result1 [ 0 ]
/ / ;;
/ / add r e s u l t 1 [ 0 ] =result1 [ 0 ] ,w o r d1 [ 0 ]
/ / ;;
/ / zxt2 r e s u l t 2 [ 0 ] =result1 [ 0 ]
/ / extr. u w o r d1 [ 0 ] =result1 [ 0 ] ,1 6 ,1 6
/ / shr. u c a r r y 1 =result1 [ 0 ] ,3 2
/ / ;;
/ / add r e s u l t 2 [ 0 ] =result2 [ 0 ] ,w o r d1 [ 0 ]
/ / ;;
/ / add r e s u l t 2 [ 0 ] =result2 [ 0 ] ,c a r r y 1
/ / ;;
/ / extr. u r e t 0 =result2 [ 0 ] ,1 6 ,1 6
/ / ;;
/ / add r e t 0 =ret0 ,r e s u l t 2 [ 0 ]
/ / ;;
/ / zxt2 r e t 0 =ret0
/ / mov a r . p f s =saved_pfs / / r e s t o r e a r . e c
/ / mov p r =saved_pr ,0 x f f f f f f f f f f f f00 0 0
/ / ;;
/ / / / if b u f w a s o d d t h e n s w a p b y t e s
/ / mov a r . l c =saved_lc
/ / ( p1 5 ) m u x1 r e t 0 =ret0 ,@rev // reverse word
/ / ;;
/ / ( p1 5 ) s h r . u r e t 0 =ret0 ,6 4 - 1 6 / / + s h i f t b a c k t o p o s i t i o n = s w a p b y t e s
/ / br. r e t . s p t k . m a n y r p
END( d o _ c s u m )