2005-04-16 15:20:36 -07:00
/ *
* This r o u t i n e c l e a r s t o z e r o a l i n e a r m e m o r y b u f f e r i n u s e r s p a c e .
*
* Inputs :
* in0 : address o f b u f f e r
* in1 : length o f b u f f e r i n b y t e s
* Outputs :
* r8 : number o f b y t e s t h a t d i d n ' t g e t c l e a r e d d u e t o a f a u l t
*
* Copyright ( C ) 1 9 9 8 , 1 9 9 9 , 2 0 0 1 H e w l e t t - P a c k a r d C o
* Stephane E r a n i a n < e r a n i a n @hpl.hp.com>
* /
# include < a s m / a s m m a c r o . h >
2016-01-17 01:13:41 -05:00
# include < a s m / e x p o r t . h >
2005-04-16 15:20:36 -07:00
/ /
/ / arguments
/ /
# define b u f r32
# define l e n r33
/ /
/ / local r e g i s t e r s
/ /
# define c n t r16
# define b u f2 r17
# define s a v e d _ l c r18
# define s a v e d _ p f s r19
# define t m p r20
# define l e n 2 r21
# define l e n 3 r22
/ /
/ / Theory o f o p e r a t i o n s :
/ / - we c h e c k w h e t h e r o r n o t t h e b u f f e r i s s m a l l , i . e . , l e s s t h a n 1 7
/ / in w h i c h c a s e w e d o t h e b y t e b y b y t e l o o p .
/ /
/ / - Otherwise w e g o p r o g r e s s i v e l y f r o m 1 b y t e s t o r e t o 8 b y t e s t o r e i n
/ / the h e a d p a r t , t h e b o d y i s a 1 6 b y t e s t o r e l o o p a n d w e f i n i s h w e t h e
/ / tail f o r t h e l a s t 1 5 b y t e s .
/ / The g o o d p o i n t a b o u t t h i s b r e a k d o w n i s t h a t t h e l o n g b u f f e r h a n d l i n g
/ / contains o n l y 2 b r a n c h e s .
/ /
/ / The r e a s o n f o r n o t u s i n g s h i f t i n g & m a s k i n g f o r b o t h t h e h e a d a n d t h e
/ / tail i s t o s t a y s e m a n t i c a l l y c o r r e c t . T h i s r o u t i n e i s n o t s u p p o s e d
/ / to w r i t e b y t e s o u t s i d e o f t h e b u f f e r . W h i l e m o s t o f t h e t i m e t h i s w o u l d
/ / be o k , w e c a n ' t t o l e r a t e a m i s t a k e . A c l a s s i c a l e x a m p l e i s t h e c a s e
/ / of m u l t i t h r e a d e d c o d e w e r e t o t h e e x t r a b y t e s t o u c h e d i s a c t u a l l y o w n e d
/ / by a n o t h e r t h r e a d w h i c h r u n s c o n c u r r e n t l y t o o u r s . A n o t h e r , l e s s l i k e l y ,
/ / example i s w i t h d e v i c e d r i v e r s w h e r e r e a d i n g a n I / O m a p p e d l o c a t i o n m a y
/ / have s i d e e f f e c t s ( s a m e t h i n g f o r w r i t i n g ) .
/ /
GLOBAL_ E N T R Y ( _ _ d o _ c l e a r _ u s e r )
.prologue
.save ar. p f s , s a v e d _ p f s
alloc s a v e d _ p f s =ar . p f s ,2 ,0 ,0 ,0
cmp. e q p6 ,p0 =r0 ,l e n / / c h e c k f o r z e r o l e n g t h
.save ar. l c , s a v e d _ l c
mov s a v e d _ l c =ar . l c / / p r e s e r v e a r . l c ( s l o w )
.body
;; // avoid WAW on CFM
adds t m p = - 1 ,l e n / / b r . c t o p i s r e p e a t / u n t i l
mov r e t 0 =len / / r e t u r n v a l u e i s l e n g t h a t t h i s p o i n t
( p6 ) b r . r e t . s p n t . m a n y r p
;;
cmp. l t p6 ,p0 =16 ,l e n / / i f l e n > 1 6 t h e n l o n g m e m s e t
mov a r . l c =tmp / / i n i t i a l i z e l c f o r s m a l l c o u n t
( p6 ) b r . c o n d . d p t k . l o n g _ d o _ c l e a r
;; // WAR on ar.lc
/ /
/ / worst c a s e 1 6 i t e r a t i o n s , a v g 8 i t e r a t i o n s
/ /
/ / We c o u l d h a v e p l a y e d w i t h t h e p r e d i c a t e s t o u s e t h e e x t r a
/ / M s l o t f o r 2 s t o r e s / i t e r a t i o n b u t t h e c o s t t h e i n i t i a l i z a t i o n
/ / the v a r i o u s c o u n t e r s c o m p a r e d t o h o w l o n g t h e l o o p i s s u p p o s e d
/ / to l a s t o n a v e r a g e d o e s n o t m a k e t h i s s o l u t i o n v i a b l e .
/ /
1 :
EX( . L e x i t 1 , s t 1 [ b u f ] =r0 ,1 )
adds l e n = - 1 ,l e n / / c o u n t d o w n l e n g t h u s i n g l e n
br. c l o o p . d p t k 1 b
;; // avoid RAW on ar.lc
/ /
/ / .Lexit4 : comes f r o m b y t e b y b y t e l o o p
/ / len c o n t a i n s b y t e s l e f t
.Lexit1 :
mov r e t 0 =len / / f a s t e r t h a n u s i n g a r . l c
mov a r . l c =saved_lc
br. r e t . s p t k . m a n y r p / / e n d o f s h o r t c l e a r _ u s e r
/ /
/ / At t h i s p o i n t w e k n o w w e h a v e m o r e t h a n 1 6 b y t e s t o c o p y
/ / so w e f o c u s o n a l i g n m e n t ( n o b r a n c h e s r e q u i r e d )
/ /
/ / The u s e o f l e n / l e n 2 f o r c o u n t d o w n o f t h e n u m b e r o f b y t e s l e f t
/ / instead o f r e t 0 i s d u e t o t h e f a c t t h a t t h e e x c e p t i o n c o d e
/ / changes t h e v a l u e s o f r8 .
/ /
.long_do_clear :
tbit. n z p6 ,p0 =buf ,0 / / o d d a l i g n m e n t ( f o r l o n g _ d o _ c l e a r )
;;
EX( . L e x i t 3 , ( p6 ) s t 1 [ b u f ] =r0 ,1 ) / / 1 - b y t e a l i g n e d
( p6 ) a d d s l e n = - 1 ,l e n ;; // sync because buf is modified
tbit. n z p6 ,p0 =buf ,1
;;
EX( . L e x i t 3 , ( p6 ) s t 2 [ b u f ] =r0 ,2 ) / / 2 - b y t e a l i g n e d
( p6 ) a d d s l e n = - 2 ,l e n ;;
tbit. n z p6 ,p0 =buf ,2
;;
EX( . L e x i t 3 , ( p6 ) s t 4 [ b u f ] =r0 ,4 ) / / 4 - b y t e a l i g n e d
( p6 ) a d d s l e n = - 4 ,l e n ;;
tbit. n z p6 ,p0 =buf ,3
;;
EX( . L e x i t 3 , ( p6 ) s t 8 [ b u f ] =r0 ,8 ) / / 8 - b y t e a l i g n e d
( p6 ) a d d s l e n = - 8 ,l e n ;;
shr. u c n t =len ,4 / / n u m b e r o f 1 2 8 - b i t ( 2 x64 b i t ) w o r d s
;;
cmp. e q p6 ,p0 =r0 ,c n t
adds t m p = - 1 ,c n t
( p6 ) b r . c o n d . d p n t . d o t a i l / / w e h a v e l e s s t h a n 1 6 b y t e s l e f t
;;
adds b u f2 =8 ,b u f / / s e t u p s e c o n d b a s e p o i n t e r
mov a r . l c =tmp
;;
/ /
/ / 1 6 bytes/ i t e r a t i o n c o r e l o o p
/ /
/ / The s e c o n d s t o r e c a n n e v e r g e n e r a t e a f a u l t b e c a u s e
/ / we c o m e i n t o t h e l o o p o n l y w h e n w e a r e 1 6 - b y t e a l i g n e d .
/ / This m e a n s t h a t i f w e c r o s s a p a g e t h e n i t w i l l a l w a y s b e
/ / in t h e f i r s t s t o r e a n d n e v e r i n t h e s e c o n d .
/ /
/ /
/ / We n e e d t o k e e p t r a c k o f t h e r e m a i n i n g l e n g t h . A p o s s i b l e ( o p t i m i s t i c )
/ / way w o u l d b e t o u s e a r . l c a n d d e r i v e h o w m a n y b y t e w e r e l e f t b y
/ / doing : l e f t = 1 6 * a r . l c + 1 6 . t h i s w o u l d a v o i d t h e a d d i t i o n a t
/ / every i t e r a t i o n .
/ / However w e n e e d t o k e e p t h e s y n c h r o n i z a t i o n p o i n t . A t e m p l a t e
/ / M;;MB does not exist and thus we can keep the addition at no
/ / extra c y c l e c o s t ( u s e a n o p s l o t a n y w a y ) . I t a l s o s i m p l i f i e s t h e
/ / ( unlikely) e r r o r r e c o v e r y c o d e
/ /
2 : EX( . L e x i t 3 , s t 8 [ b u f ] =r0 ,1 6 )
;; // needed to get len correct when error
st8 [ b u f2 ] =r0 ,1 6
adds l e n = - 1 6 ,l e n
br. c l o o p . d p t k 2 b
;;
mov a r . l c =saved_lc
/ /
/ / tail c o r r e c t i o n b a s e d o n l e n o n l y
/ /
/ / We a l t e r n a t e t h e u s e o f l e n 3 ,l e n 2 t o a l l o w p a r a l l e l i s m a n d c o r r e c t
/ / error h a n d l i n g . W e a l s o r e u s e p6 / p7 t o r e t u r n c o r r e c t v a l u e .
/ / The a d d i t i o n o f l e n 2 / l e n 3 d o e s n o t c o s t a n y t h i n g m o r e c o m p a r e d t o
/ / the r e g u l a r m e m s e t a s w e h a d e m p t y s l o t s .
/ /
.dotail :
mov l e n 2 =len / / f o r p a r a l l e l i z a t i o n o f e r r o r h a n d l i n g
mov l e n 3 =len
tbit. n z p6 ,p0 =len ,3
;;
EX( . L e x i t 2 , ( p6 ) s t 8 [ b u f ] =r0 ,8 ) / / a t l e a s t 8 b y t e s
( p6 ) a d d s l e n 3 = - 8 ,l e n 2
tbit. n z p7 ,p6 =len ,2
;;
EX( . L e x i t 2 , ( p7 ) s t 4 [ b u f ] =r0 ,4 ) / / a t l e a s t 4 b y t e s
( p7 ) a d d s l e n 2 = - 4 ,l e n 3
tbit. n z p6 ,p7 =len ,1
;;
EX( . L e x i t 2 , ( p6 ) s t 2 [ b u f ] =r0 ,2 ) / / a t l e a s t 2 b y t e s
( p6 ) a d d s l e n 3 = - 2 ,l e n 2
tbit. n z p7 ,p6 =len ,0
;;
EX( . L e x i t 2 , ( p7 ) s t 1 [ b u f ] =r0 ) / / o n l y 1 b y t e l e f t
mov r e t 0 =r0 / / s u c c e s s
br. r e t . s p t k . m a n y r p / / e n d o f m o s t l i k e l y p a t h
/ /
/ / Outlined e r r o r h a n d l i n g c o d e
/ /
/ /
/ / .Lexit3 : comes f r o m c o r e l o o p , n e e d r e s t o r e p r / l c
/ / len c o n t a i n s b y t e s l e f t
/ /
/ /
/ / .Lexit2 :
/ / if p6 - > c o m i n g f r o m s t 8 o r s t 2 : l e n 2 c o n t a i n s w h a t ' s l e f t
/ / if p7 - > c o m i n g f r o m s t 4 o r s t 1 : l e n 3 c o n t a i n s w h a t ' s l e f t
/ / We m u s t r e s t o r e l c / p r e v e n t h o u g h m i g h t n o t h a v e b e e n u s e d .
.Lexit2 :
.pred .rel " mutex" , p6 , p7
( p6 ) m o v l e n =len2
( p7 ) m o v l e n =len3
;;
/ /
/ / .Lexit4 : comes f r o m h e a d , n e e d n o t r e s t o r e p r / l c
/ / len c o n t a i n s b y t e s l e f t
/ /
.Lexit3 :
mov r e t 0 =len
mov a r . l c =saved_lc
br. r e t . s p t k . m a n y r p
END( _ _ d o _ c l e a r _ u s e r )
2016-01-17 01:13:41 -05:00
EXPORT_ S Y M B O L ( _ _ d o _ c l e a r _ u s e r )