2013-01-18 13:42:18 +04:00
/ *
* Copyright ( C ) 2 0 0 4 , 2 0 0 7 - 2 0 1 0 , 2 0 1 1 - 2 0 1 2 S y n o p s y s , I n c . ( w w w . s y n o p s y s . c o m )
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 a s
* published b y t h e F r e e S o f t w a r e F o u n d a t i o n .
* /
/ * This i s o p t i m i z e d p r i m a r i l y f o r t h e A R C 7 0 0 .
It w o u l d b e p o s s i b l e t o s p e e d u p t h e l o o p s b y o n e c y c l e / w o r d
respective o n e c y c l e / b y t e b y f o r c i n g d o u b l e s o u r c e 1 a l i g n m e n t , u n r o l l i n g
by a f a c t o r o f t w o , a n d s p e c u l a t i v e l y l o a d i n g t h e s e c o n d w o r d / b y t e o f
source 1 ; however, that would increase the overhead for loop setup / finish,
and s t r c m p m i g h t o f t e n t e r m i n a t e e a r l y . * /
2014-02-07 12:17:43 +04:00
# include < l i n u x / l i n k a g e . h >
2013-01-18 13:42:18 +04:00
2016-09-20 02:42:25 +03:00
ENTRY_ C F I ( s t r c m p )
2013-01-18 13:42:18 +04:00
or r2 ,r0 ,r1
bmsk_ s r2 ,r2 ,1
brne r2 ,0 ,. L c h a r l o o p
mov_ s r12 ,0 x01 0 1 0 1 0 1
ror r5 ,r12
.Lwordloop :
ld. a b r2 ,[ r0 ,4 ]
ld. a b r3 ,[ r1 ,4 ]
nop_ s
sub r4 ,r2 ,r12
bic r4 ,r4 ,r2
and r4 ,r4 ,r5
brne r4 ,0 ,. L f o u n d0
breq r2 ,r3 ,. L w o r d l o o p
# ifdef _ _ L I T T L E _ E N D I A N _ _
xor r0 ,r2 ,r3 ; mask for difference
sub_ s r1 ,r0 ,1
bic_ s r0 ,r0 ,r1 ; mask for least significant difference bit
sub r1 ,r5 ,r0
xor r0 ,r5 ,r1 ; mask for least significant difference byte
and_ s r2 ,r2 ,r0
and_ s r3 ,r3 ,r0
# endif / * L I T T L E E N D I A N * /
cmp_ s r2 ,r3
mov_ s r0 ,1
j_ s . d [ b l i n k ]
bset. l o r0 ,r0 ,3 1
.balign 4
# ifdef _ _ L I T T L E _ E N D I A N _ _
.Lfound0 :
xor r0 ,r2 ,r3 ; mask for difference
or r0 ,r0 ,r4 ; or in zero indicator
sub_ s r1 ,r0 ,1
bic_ s r0 ,r0 ,r1 ; mask for least significant difference bit
sub r1 ,r5 ,r0
xor r0 ,r5 ,r1 ; mask for least significant difference byte
and_ s r2 ,r2 ,r0
and_ s r3 ,r3 ,r0
sub. f r0 ,r2 ,r3
mov. h i r0 ,1
j_ s . d [ b l i n k ]
bset. l o r0 ,r0 ,3 1
# else / * B I G E N D I A N * /
/ * The z e r o - d e t e c t i o n a b o v e c a n m i s - d e t e c t 0 x01 b y t e s a s z e r o e s
because o f c a r r y - p r o p a g a t e i o n f r o m a l o w e r s i g n i f i c a n t z e r o b y t e .
We c a n c o m p e n s a t e f o r t h i s b y c h e c k i n g t h a t b i t 0 i s z e r o .
This c o m p e n s a t i o n i s n o t n e c e s s a r y i n t h e s t e p w h e r e w e
get a l o w e s t i m a t e f o r r2 , b e c a u s e i n a n y a f f e c t e d b y t e s
we a l r e a d y h a v e 0 x00 o r 0 x01 , w h i c h w i l l r e m a i n u n c h a n g e d
when b i t 7 i s c l e a r e d . * /
.balign 4
.Lfound0 :
lsr r0 ,r4 ,8
lsr_ s r1 ,r2
bic_ s r2 ,r2 ,r0 ; get low estimate for r2 and get ...
bic_ s r0 ,r0 ,r1 ; <this is the adjusted mask for zeros>
or_ s r3 ,r3 ,r0 ; ... high estimate r3 so that r2 > r3 will ...
cmp_ s r3 ,r2 ; ... be independent of trailing garbage
or_ s r2 ,r2 ,r0 ; likewise for r3 > r2
bic_ s r3 ,r3 ,r0
rlc r0 ,0 ; r0 := r2 > r3 ? 1 : 0
cmp_ s r2 ,r3
j_ s . d [ b l i n k ]
bset. l o r0 ,r0 ,3 1
# endif / * E N D I A N * /
.balign 4
.Lcharloop :
ldb. a b r2 ,[ r0 ,1 ]
ldb. a b r3 ,[ r1 ,1 ]
nop_ s
breq r2 ,0 ,. L c m p e n d
breq r2 ,r3 ,. L c h a r l o o p
.Lcmpend :
j_ s . d [ b l i n k ]
sub r0 ,r2 ,r3
2016-09-20 02:42:25 +03:00
END_ C F I ( s t r c m p )