2005-04-16 15:20:36 -07:00
/ *
* arch/ a l p h a / l i b / e v6 - s t x n c p y . S
* 2 1 2 6 4 version c o n t r i b u t e d b y R i c k G o r t o n < r i c k . g o r t o n @api-networks.com>
*
* Copy n o m o r e t h a n C O U N T b y t e s o f t h e n u l l - t e r m i n a t e d s t r i n g f r o m
* SRC t o D S T .
*
* This i s a n i n t e r n a l r o u t i n e u s e d b y s t r n c p y , s t p n c p y , a n d s t r n c a t .
* As s u c h , i t u s e s s p e c i a l l i n k a g e c o n v e n t i o n s t o m a k e i m p l e m e n t a t i o n
* of t h e s e p u b l i c f u n c t i o n s m o r e e f f i c i e n t .
*
* On i n p u t :
* t9 = r e t u r n a d d r e s s
* a0 = D S T
* a1 = S R C
* a2 = C O U N T
*
* Furthermore, C O U N T m a y n o t b e z e r o .
*
* On o u t p u t :
* t0 = l a s t w o r d w r i t t e n
* t1 0 = b i t m a s k ( w i t h o n e b i t s e t ) i n d i c a t i n g t h e b y t e p o s i t i o n o f
* the e n d o f t h e r a n g e s p e c i f i e d b y C O U N T
* t1 2 = b i t m a s k ( w i t h o n e b i t s e t ) i n d i c a t i n g t h e l a s t b y t e w r i t t e n
* a0 = u n a l i g n e d a d d r e s s o f t h e l a s t * w o r d * w r i t t e n
* a2 = t h e n u m b e r o f f u l l w o r d s l e f t i n C O U N T
*
* Furthermore, v0 , a3 - a5 , t 1 1 , a n d $ a t a r e u n t o u c h e d .
*
* Much o f t h e i n f o r m a t i o n a b o u t 2 1 2 6 4 s c h e d u l i n g / c o d i n g c o m e s f r o m :
* Compiler W r i t e r ' s G u i d e f o r t h e A l p h a 2 1 2 6 4
* abbreviated a s ' C W G ' i n o t h e r c o m m e n t s h e r e
* ftp. d i g i t a l . c o m / p u b / D i g i t a l / i n f o / s e m i c o n d u c t o r / l i t e r a t u r e / d s c - l i b r a r y . h t m l
* Scheduling n o t a t i o n :
* E - e i t h e r c l u s t e r
* U - u p p e r s u b c l u s t e r ; U0 - subcluster U0; U1 - subcluster U1
* L - l o w e r s u b c l u s t e r ; L0 - subcluster L0; L1 - subcluster L1
* Try n o t t o c h a n g e t h e a c t u a l a l g o r i t h m i f p o s s i b l e f o r c o n s i s t e n c y .
* /
# include < a s m / r e g d e f . h >
.set noat
.set noreorder
.text
/ * There i s a p r o b l e m w i t h e i t h e r g d b ( a s o f 4 . 1 6 ) o r g a s ( a s o f 2 . 7 ) t h a t
doesn' t l i k e p u t t i n g t h e e n t r y p o i n t f o r a p r o c e d u r e s o m e w h e r e i n t h e
middle o f t h e p r o c e d u r e d e s c r i p t o r . W o r k a r o u n d t h i s b y p u t t i n g t h e
aligned c o p y i n i t s o w n p r o c e d u r e d e s c r i p t o r * /
.ent stxncpy_aligned
.align 4
stxncpy_aligned :
.frame sp, 0 , t 9 , 0
.prologue 0
/ * On e n t r y t o t h i s b a s i c b l o c k :
t0 = = t h e f i r s t d e s t i n a t i o n w o r d f o r m a s k i n g b a c k i n
t1 = = t h e f i r s t s o u r c e w o r d . * /
/* Create the 1st output word and detect 0's in the 1st input word. */
lda t 2 , - 1 # E : b u i l d a m a s k a g a i n s t f a l s e z e r o
mskqh t 2 , a1 , t 2 # U : d e t e c t i o n i n t h e s r c w o r d ( s t a l l )
mskqh t 1 , a1 , t 3 # U :
ornot t 1 , t 2 , t 2 # E : ( s t a l l )
mskql t 0 , a1 , t 0 # U : a s s e m b l e t h e f i r s t o u t p u t w o r d
cmpbge z e r o , t 2 , t 8 # E : b i t s s e t i f f n u l l f o u n d
or t 0 , t 3 , t 0 # E : ( s t a l l )
beq a2 , $ a _ e o c # U :
bne t 8 , $ a _ e o s # U :
nop
nop
nop
/ * On e n t r y t o t h i s b a s i c b l o c k :
t0 = = a s o u r c e w o r d n o t c o n t a i n i n g a n u l l . * /
/ *
* nops h e r e t o :
* separate s t o r e q u a d s f r o m l o a d q u a d s
* limit o f 1 b c o n d / q u a d t o p e r m i t t r a i n i n g
* /
$ a_loop :
stq_ u t 0 , 0 ( a0 ) # L :
addq a0 , 8 , a0 # E :
subq a2 , 1 , a2 # E :
nop
ldq_ u t 0 , 0 ( a1 ) # L :
addq a1 , 8 , a1 # E :
cmpbge z e r o , t 0 , t 8 # E :
beq a2 , $ a _ e o c # U :
beq t 8 , $ a _ l o o p # U :
nop
nop
nop
/ * Take c a r e o f t h e f i n a l ( p a r t i a l ) w o r d s t o r e . A t t h i s p o i n t
the e n d - o f - c o u n t b i t i s s e t i n t 8 i f f i t a p p l i e s .
On e n t r y t o t h i s b a s i c b l o c k w e h a v e :
t0 = = t h e s o u r c e w o r d c o n t a i n i n g t h e n u l l
t8 = = t h e c m p b g e m a s k t h a t f o u n d i t . * /
$ a_eos :
negq t 8 , t 1 2 # E : f i n d l o w b i t s e t
and t 8 , t 1 2 , t 1 2 # E : ( s t a l l )
/ * For t h e s a k e o f t h e c a c h e , d o n ' t r e a d a d e s t i n a t i o n w o r d
if w e ' r e n o t g o i n g t o n e e d i t . * /
and t 1 2 , 0 x80 , t 6 # E : ( s t a l l )
bne t 6 , 1 f # U : ( s t a l l )
/ * We' r e d o i n g a p a r t i a l w o r d s t o r e a n d s o n e e d t o c o m b i n e
our s o u r c e a n d o r i g i n a l d e s t i n a t i o n w o r d s . * /
ldq_ u t 1 , 0 ( a0 ) # L :
subq t 1 2 , 1 , t 6 # E :
or t 1 2 , t 6 , t 8 # E : ( s t a l l )
zapnot t 0 , t 8 , t 0 # U : c l e a r s r c b y t e s > n u l l ( s t a l l )
zap t 1 , t 8 , t 1 # . . e 1 : c l e a r d s t b y t e s < = n u l l
or t 0 , t 1 , t 0 # e 1 : ( s t a l l )
nop
nop
1 : stq_ u t 0 , 0 ( a0 ) # L :
ret ( t 9 ) # L 0 : L a t e n c y =3
nop
nop
/* Add the end-of-count bit to the eos detection bitmask. */
$ a_eoc :
or t 1 0 , t 8 , t 8 # E :
br $ a _ e o s # L 0 : L a t e n c y =3
nop
nop
.end stxncpy_aligned
.align 4
.ent __stxncpy
.globl __stxncpy
__stxncpy :
.frame sp, 0 , t 9 , 0
.prologue 0
/* Are source and destination co-aligned? */
xor a0 , a1 , t 1 # E :
and a0 , 7 , t 0 # E : f i n d d e s t m i s a l i g n m e n t
and t 1 , 7 , t 1 # E : ( s t a l l )
addq a2 , t 0 , a2 # E : b i a s c o u n t b y d e s t m i s a l i g n m e n t ( s t a l l )
subq a2 , 1 , a2 # E :
and a2 , 7 , t 2 # E : ( s t a l l )
srl a2 , 3 , a2 # U : a 2 = l o o p c o u n t e r = ( c o u n t - 1 ) / 8 ( s t a l l )
addq z e r o , 1 , t 1 0 # E :
sll t 1 0 , t 2 , t 1 0 # U : t 10 = b i t m a s k o f l a s t c o u n t b y t e
bne t 1 , $ u n a l i g n e d # U :
/* We are co-aligned; take care of a partial first word. */
ldq_ u t 1 , 0 ( a1 ) # L : l o a d f i r s t s r c w o r d
addq a1 , 8 , a1 # E :
beq t 0 , s t x n c p y _ a l i g n e d # U : a v o i d l o a d i n g d e s t w o r d i f n o t n e e d e d
ldq_ u t 0 , 0 ( a0 ) # L :
nop
nop
br s t x n c p y _ a l i g n e d # . . e 1 :
nop
nop
nop
/ * The s o u r c e a n d d e s t i n a t i o n a r e n o t c o - a l i g n e d . A l i g n t h e d e s t i n a t i o n
and c o p e . W e h a v e t o b e v e r y c a r e f u l a b o u t n o t r e a d i n g t o o m u c h a n d
causing a S E G V . * /
.align 4
$ u_head :
/ * We k n o w j u s t e n o u g h n o w t o b e a b l e t o a s s e m b l e t h e f i r s t
full s o u r c e w o r d . W e c a n s t i l l f i n d a z e r o a t t h e e n d o f i t
that p r e v e n t s u s f r o m o u t p u t t i n g t h e w h o l e t h i n g .
On e n t r y t o t h i s b a s i c b l o c k :
t0 = = t h e f i r s t d e s t w o r d , u n m a s k e d
t1 = = t h e s h i f t e d l o w b i t s o f t h e f i r s t s o u r c e w o r d
t6 = = b y t e m a s k t h a t i s - 1 i n d e s t w o r d b y t e s * /
ldq_ u t 2 , 8 ( a1 ) # L : L a t e n c y = 3 l o a d s e c o n d s r c w o r d
addq a1 , 8 , a1 # E :
mskql t 0 , a0 , t 0 # U : m a s k t r a i l i n g g a r b a g e i n d s t
extqh t 2 , a1 , t 4 # U : ( 3 c y c l e s t a l l o n t 2 )
or t 1 , t 4 , t 1 # E : f i r s t a l i g n e d s r c w o r d c o m p l e t e ( s t a l l )
mskqh t 1 , a0 , t 1 # U : m a s k l e a d i n g g a r b a g e i n s r c ( s t a l l )
or t 0 , t 1 , t 0 # E : f i r s t o u t p u t w o r d c o m p l e t e ( s t a l l )
or t 0 , t 6 , t 6 # E : m a s k o r i g i n a l d a t a f o r z e r o t e s t ( s t a l l )
cmpbge z e r o , t 6 , t 8 # E :
beq a2 , $ u _ e o c f i n # U :
lda t 6 , - 1 # E :
nop
bne t 8 , $ u _ f i n a l # U :
mskql t 6 , a1 , t 6 # U : m a s k o u t b i t s a l r e a d y s e e n
stq_ u t 0 , 0 ( a0 ) # L : s t o r e f i r s t o u t p u t w o r d
or t 6 , t 2 , t 2 # E : ( s t a l l )
cmpbge z e r o , t 2 , t 8 # E : f i n d n u l l s i n s e c o n d p a r t i a l
addq a0 , 8 , a0 # E :
subq a2 , 1 , a2 # E :
bne t 8 , $ u _ l a t e _ h e a d _ e x i t # U :
/ * Finally, w e ' v e g o t a l l t h e s t u p i d l e a d i n g e d g e c a s e s t a k e n c a r e
of a n d w e c a n s e t u p t o e n t e r t h e m a i n l o o p . * /
extql t 2 , a1 , t 1 # U : p o s i t i o n h i - b i t s o f l o w o r d
beq a2 , $ u _ e o c # U :
ldq_ u t 2 , 8 ( a1 ) # L : r e a d n e x t h i g h - o r d e r s o u r c e w o r d
addq a1 , 8 , a1 # E :
extqh t 2 , a1 , t 0 # U : p o s i t i o n l o - b i t s o f h i w o r d ( s t a l l )
cmpbge z e r o , t 2 , t 8 # E :
nop
bne t 8 , $ u _ e o s # U :
/ * Unaligned c o p y m a i n l o o p . I n o r d e r t o a v o i d r e a d i n g t o o m u c h ,
the l o o p i s s t r u c t u r e d t o d e t e c t z e r o s i n a l i g n e d s o u r c e w o r d s .
This h a s , u n f o r t u n a t e l y , e f f e c t i v e l y p u l l e d h a l f o f a l o o p
iteration o u t i n t o t h e h e a d a n d h a l f i n t o t h e t a i l , b u t i t d o e s
prevent n a s t i n e s s f r o m a c c u m u l a t i n g i n t h e v e r y t h i n g w e w a n t
to r u n a s f a s t a s p o s s i b l e .
On e n t r y t o t h i s b a s i c b l o c k :
t0 = = t h e s h i f t e d l o w - o r d e r b i t s f r o m t h e c u r r e n t s o u r c e w o r d
t1 = = t h e s h i f t e d h i g h - o r d e r b i t s f r o m t h e p r e v i o u s s o u r c e w o r d
t2 = = t h e u n s h i f t e d c u r r e n t s o u r c e w o r d
We f u r t h e r k n o w t h a t t 2 d o e s n o t c o n t a i n a n u l l t e r m i n a t o r . * /
.align 4
$ u_loop :
or t 0 , t 1 , t 0 # E : c u r r e n t d s t w o r d n o w c o m p l e t e
subq a2 , 1 , a2 # E : d e c r e m e n t w o r d c o u n t
extql t 2 , a1 , t 1 # U : e x t r a c t l o w b i t s f o r n e x t t i m e
addq a0 , 8 , a0 # E :
stq_ u t 0 , - 8 ( a0 ) # U : s a v e t h e c u r r e n t w o r d
beq a2 , $ u _ e o c # U :
ldq_ u t 2 , 8 ( a1 ) # U : L a t e n c y = 3 l o a d h i g h w o r d f o r n e x t t i m e
addq a1 , 8 , a1 # E :
extqh t 2 , a1 , t 0 # U : e x t r a c t l o w b i t s ( 2 c y c l e s t a l l )
cmpbge z e r o , t 2 , t 8 # E : t e s t n e w w o r d f o r e o s
nop
beq t 8 , $ u _ l o o p # U :
/ * We' v e f o u n d a z e r o s o m e w h e r e i n t h e s o u r c e w o r d w e j u s t r e a d .
If i t r e s i d e s i n t h e l o w e r h a l f , w e h a v e o n e ( p r o b a b l y p a r t i a l )
word t o w r i t e o u t , a n d i f i t r e s i d e s i n t h e u p p e r h a l f , w e
have o n e f u l l a n d o n e p a r t i a l w o r d l e f t t o w r i t e o u t .
On e n t r y t o t h i s b a s i c b l o c k :
t0 = = t h e s h i f t e d l o w - o r d e r b i t s f r o m t h e c u r r e n t s o u r c e w o r d
t1 = = t h e s h i f t e d h i g h - o r d e r b i t s f r o m t h e p r e v i o u s s o u r c e w o r d
t2 = = t h e u n s h i f t e d c u r r e n t s o u r c e w o r d . * /
$ u_eos :
or t 0 , t 1 , t 0 # E : f i r s t ( p a r t i a l ) s o u r c e w o r d c o m p l e t e
nop
cmpbge z e r o , t 0 , t 8 # E : i s t h e n u l l i n t h i s f i r s t b i t ? ( s t a l l )
bne t 8 , $ u _ f i n a l # U : ( s t a l l )
stq_ u t 0 , 0 ( a0 ) # L : t h e n u l l w a s i n t h e h i g h - o r d e r b i t s
addq a0 , 8 , a0 # E :
subq a2 , 1 , a2 # E :
nop
$ u_late_head_exit :
extql t 2 , a1 , t 0 # U :
cmpbge z e r o , t 0 , t 8 # E :
or t 8 , t 1 0 , t 6 # E : ( s t a l l )
cmoveq a2 , t 6 , t 8 # E : L a t e n c y = 2 , e x t r a m a p s l o t ( s t a l l )
/ * Take c a r e o f a f i n a l ( p r o b a b l y p a r t i a l ) r e s u l t w o r d .
On e n t r y t o t h i s b a s i c b l o c k :
t0 = = a s s e m b l e d s o u r c e w o r d
t8 = = c m p b g e m a s k t h a t f o u n d t h e n u l l . * /
$ u_final :
negq t 8 , t 6 # E : i s o l a t e l o w b i t s e t
and t 6 , t 8 , t 1 2 # E : ( s t a l l )
and t 1 2 , 0 x80 , t 6 # E : a v o i d d e s t w o r d l o a d i f w e c a n ( s t a l l )
bne t 6 , 1 f # U : ( s t a l l )
ldq_ u t 1 , 0 ( a0 ) # L :
subq t 1 2 , 1 , t 6 # E :
or t 6 , t 1 2 , t 8 # E : ( s t a l l )
zapnot t 0 , t 8 , t 0 # U : k i l l s o u r c e b y t e s > n u l l
zap t 1 , t 8 , t 1 # U : k i l l d e s t b y t e s < = n u l l
or t 0 , t 1 , t 0 # E : ( s t a l l )
nop
nop
1 : stq_ u t 0 , 0 ( a0 ) # L :
ret ( t 9 ) # L 0 : L a t e n c y =3
/ * Got t o e n d - o f - c o u n t b e f o r e e n d o f s t r i n g .
On e n t r y t o t h i s b a s i c b l o c k :
t1 = = t h e s h i f t e d h i g h - o r d e r b i t s f r o m t h e p r e v i o u s s o u r c e w o r d * /
$ u_eoc :
and a1 , 7 , t 6 # E : a v o i d f i n a l l o a d i f p o s s i b l e
sll t 1 0 , t 6 , t 6 # U : ( s t a l l )
and t 6 , 0 x f f , t 6 # E : ( s t a l l )
bne t 6 , 1 f # U : ( s t a l l )
ldq_ u t 2 , 8 ( a1 ) # L : l o a d f i n a l s r c w o r d
nop
extqh t 2 , a1 , t 0 # U : e x t r a c t l o w b i t s f o r l a s t w o r d ( s t a l l )
or t 1 , t 0 , t 1 # E : ( s t a l l )
1 : cmpbge z e r o , t 1 , t 8 # E :
mov t 1 , t 0 # E :
$ u_eocfin : # end- o f - c o u n t , f i n a l w o r d
or t 1 0 , t 8 , t 8 # E :
br $ u _ f i n a l # L 0 : L a t e n c y =3
/* Unaligned copy entry point. */
.align 4
$ unaligned :
ldq_ u t 1 , 0 ( a1 ) # L : l o a d f i r s t s o u r c e w o r d
and a0 , 7 , t 4 # E : f i n d d e s t m i s a l i g n m e n t
and a1 , 7 , t 5 # E : f i n d s r c m i s a l i g n m e n t
/ * Conditionally l o a d t h e f i r s t d e s t i n a t i o n w o r d a n d a b y t e m a s k
with 0 x f f i n d i c a t i n g t h a t t h e d e s t i n a t i o n b y t e i s s a c r o s a n c t . * /
mov z e r o , t 0 # E :
mov z e r o , t 6 # E :
beq t 4 , 1 f # U :
ldq_ u t 0 , 0 ( a0 ) # L :
lda t 6 , - 1 # E :
mskql t 6 , a0 , t 6 # U :
nop
nop
subq a1 , t 4 , a1 # E : s u b d e s t m i s a l i g n m e n t f r o m s r c a d d r
/ * If s o u r c e m i s a l i g n m e n t i s l a r g e r t h a n d e s t m i s a l i g n m e n t , w e n e e d
extra s t a r t u p c h e c k s t o a v o i d S E G V . * /
1 : cmplt t 4 , t 5 , t 1 2 # E :
extql t 1 , a1 , t 1 # U : s h i f t s r c i n t o p l a c e
lda t 2 , - 1 # E : f o r c r e a t i n g m a s k s l a t e r
beq t 1 2 , $ u _ h e a d # U : ( s t a l l )
extql t 2 , a1 , t 2 # U :
cmpbge z e r o , t 1 , t 8 # E : i s t h e r e a z e r o ?
2007-12-17 16:19:48 -08:00
andnot t 2 , t 6 , t 2 # E : d e s t m a s k f o r a s i n g l e w o r d c o p y
2005-04-16 15:20:36 -07:00
or t 8 , t 1 0 , t 5 # E : t e s t f o r e n d - o f - c o u n t t o o
2007-12-17 16:19:48 -08:00
cmpbge z e r o , t 2 , t 3 # E :
2005-04-16 15:20:36 -07:00
cmoveq a2 , t 5 , t 8 # E : L a t e n c y = 2 , e x t r a m a p s l o t
nop # E : k e e p w i t h c m o v e q
andnot t 8 , t 3 , t 8 # E : ( s t a l l )
beq t 8 , $ u _ h e a d # U :
/ * At t h i s p o i n t w e ' v e f o u n d a z e r o i n t h e f i r s t p a r t i a l w o r d o f
the s o u r c e . W e n e e d t o i s o l a t e t h e v a l i d s o u r c e d a t a a n d m a s k
it i n t o t h e o r i g i n a l d e s t i n a t i o n d a t a . ( I n c i d e n t a l l y , w e k n o w
that w e ' l l n e e d a t l e a s t o n e b y t e o f t h a t o r i g i n a l d e s t w o r d . ) * /
ldq_ u t 0 , 0 ( a0 ) # L :
negq t 8 , t 6 # E : b u i l d b i t m a s k o f b y t e s < = z e r o
mskqh t 1 , t 4 , t 1 # U :
2007-12-17 16:19:48 -08:00
and t 6 , t 8 , t 1 2 # E :
subq t 1 2 , 1 , t 6 # E : ( s t a l l )
or t 6 , t 1 2 , t 8 # E : ( s t a l l )
zapnot t 2 , t 8 , t 2 # U : p r e p a r e s o u r c e w o r d ; m i r r o r c h a n g e s ( s t a l l )
2005-04-16 15:20:36 -07:00
zapnot t 1 , t 8 , t 1 # U : t o s o u r c e v a l i d i t y m a s k
2007-12-17 16:19:48 -08:00
andnot t 0 , t 2 , t 0 # E : z e r o p l a c e f o r s o u r c e t o r e s i d e
2005-04-16 15:20:36 -07:00
or t 0 , t 1 , t 0 # E : a n d p u t i t t h e r e ( s t a l l b o t h t 0 , t 1 )
stq_ u t 0 , 0 ( a0 ) # L : ( s t a l l )
ret ( t 9 ) # L 0 : L a t e n c y =3
nop
nop
nop
.end __stxncpy