2019-06-03 07:44:50 +02:00
/* SPDX-License-Identifier: GPL-2.0-only */
2014-04-28 13:11:33 +08:00
/ *
* Copyright ( C ) 2 0 1 3 A R M L t d .
* Copyright ( C ) 2 0 1 3 L i n a r o .
*
* This c o d e i s b a s e d o n g l i b c c o r t e x s t r i n g s w o r k o r i g i n a l l y a u t h o r e d b y L i n a r o
* be f o u n d @
*
* http : / / bazaar. l a u n c h p a d . n e t / ~ l i n a r o - t o o l c h a i n - d e v / c o r t e x - s t r i n g s / t r u n k /
* files/ h e a d : / s r c / a a r c h64 /
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
/ *
* compare t w o s t r i n g s
*
* Parameters :
* x0 - c o n s t s t r i n g 1 p o i n t e r
* x1 - c o n s t s t r i n g 2 p o i n t e r
* x2 - t h e m a x i m a l l e n g t h t o b e c o m p a r e d
* Returns :
* x0 - a n i n t e g e r l e s s t h a n , e q u a l t o , o r g r e a t e r t h a n z e r o i f s1 i s f o u n d ,
* respectively, t o b e l e s s t h a n , t o m a t c h , o r b e g r e a t e r t h a n s2 .
* /
# define R E P 8 _ 0 1 0 x01 0 1 0 1 0 1 0 1 0 1 0 1 0 1
# define R E P 8 _ 7 f 0 x7 f7 f7 f7 f7 f7 f7 f7 f
# define R E P 8 _ 8 0 0 x80 8 0 8 0 8 0 8 0 8 0 8 0 8 0
/* Parameters and result. */
src1 . r e q x0
src2 . r e q x1
limit . r e q x2
result . r e q x0
/* Internal variables. */
data1 . r e q x3
data1 w . r e q w3
data2 . r e q x4
data2 w . r e q w4
has_ n u l . r e q x5
diff . r e q x6
syndrome . r e q x7
tmp1 . r e q x8
tmp2 . r e q x9
tmp3 . r e q x10
zeroones . r e q x11
pos . r e q x12
limit_ w d . r e q x13
mask . r e q x14
endloop . r e q x15
2018-10-26 15:02:30 -07:00
WEAK( s t r n c m p )
2014-04-28 13:11:33 +08:00
cbz l i m i t , . L r e t 0
eor t m p1 , s r c1 , s r c2
mov z e r o o n e s , #R E P 8 _ 0 1
tst t m p1 , #7
b. n e . L m i s a l i g n e d8
ands t m p1 , s r c1 , #7
b. n e . L m u t u a l _ a l i g n
/* Calculate the number of full and partial words -1. */
/ *
* when l i m i t i s m u l i t p l y o f 8 , i f n o t s u b 1 ,
* the j u d g e m e n t o f l a s t d w o r d w i l l w r o n g .
* /
sub l i m i t _ w d , l i m i t , #1 / * l i m i t ! = 0 , s o n o u n d e r f l o w . * /
lsr l i m i t _ w d , l i m i t _ w d , #3 / * C o n v e r t t o D w o r d s . * /
/ *
* NUL d e t e c t i o n w o r k s o n t h e p r i n c i p l e t h a t ( X - 1 ) & ( ~ X ) & 0 x80
* ( = > ( X - 1 ) & ~ ( X | 0 x7 f ) ) i s n o n - z e r o i f f a b y t e i s z e r o , a n d
* can b e d o n e i n p a r a l l e l a c r o s s t h e e n t i r e w o r d .
* /
.Lloop_aligned :
ldr d a t a1 , [ s r c1 ] , #8
ldr d a t a2 , [ s r c2 ] , #8
.Lstart_realigned :
subs l i m i t _ w d , l i m i t _ w d , #1
sub t m p1 , d a t a1 , z e r o o n e s
orr t m p2 , d a t a1 , #R E P 8 _ 7 f
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
csinv e n d l o o p , d i f f , x z r , p l / * L a s t D w o r d o r d i f f e r e n c e s . * /
bics h a s _ n u l , t m p1 , t m p2 / * N o n - z e r o i f N U L t e r m i n a t o r . * /
ccmp e n d l o o p , #0 , #0 , e q
b. e q . L l o o p _ a l i g n e d
/*Not reached the limit, must have found the end or a diff. */
tbz l i m i t _ w d , #63 , . L n o t _ l i m i t
/* Limit % 8 == 0 => all bytes significant. */
ands l i m i t , l i m i t , #7
b. e q . L n o t _ l i m i t
lsl l i m i t , l i m i t , #3 / * B i t s - > b y t e s . * /
mov m a s k , #~ 0
CPU_ B E ( l s r m a s k , m a s k , l i m i t )
CPU_ L E ( l s l m a s k , m a s k , l i m i t )
bic d a t a1 , d a t a1 , m a s k
bic d a t a2 , d a t a2 , m a s k
/* Make sure that the NUL byte is marked in the syndrome. */
orr h a s _ n u l , h a s _ n u l , m a s k
.Lnot_limit :
orr s y n d r o m e , d i f f , h a s _ n u l
b . L c a l _ c m p r e s u l t
.Lmutual_align :
/ *
* Sources a r e m u t u a l l y a l i g n e d , b u t a r e n o t c u r r e n t l y a t a n
* alignment b o u n d a r y . R o u n d d o w n t h e a d d r e s s e s a n d t h e n m a s k o f f
* the b y t e s t h a t p r e c e d e t h e s t a r t p o i n t .
* We a l s o n e e d t o a d j u s t t h e l i m i t c a l c u l a t i o n s , b u t w i t h o u t
* overflowing i f t h e l i m i t i s n e a r U L O N G _ M A X .
* /
bic s r c1 , s r c1 , #7
bic s r c2 , s r c2 , #7
ldr d a t a1 , [ s r c1 ] , #8
neg t m p3 , t m p1 , l s l #3 / * 6 4 - b i t s ( b y t e s b e y o n d a l i g n ) . * /
ldr d a t a2 , [ s r c2 ] , #8
mov t m p2 , #~ 0
sub l i m i t _ w d , l i m i t , #1 / * l i m i t ! = 0 , s o n o u n d e r f l o w . * /
/* Big-endian. Early bytes are at MSB. */
CPU_ B E ( l s l t m p2 , t m p2 , t m p3 ) / * S h i f t ( t m p1 & 6 3 ) . * /
/* Little-endian. Early bytes are at LSB. */
CPU_ L E ( l s r t m p2 , t m p2 , t m p3 ) / * S h i f t ( t m p1 & 6 3 ) . * /
and t m p3 , l i m i t _ w d , #7
lsr l i m i t _ w d , l i m i t _ w d , #3
/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
add l i m i t , l i m i t , t m p1
add t m p3 , t m p3 , t m p1
orr d a t a1 , d a t a1 , t m p2
orr d a t a2 , d a t a2 , t m p2
add l i m i t _ w d , l i m i t _ w d , t m p3 , l s r #3
b . L s t a r t _ r e a l i g n e d
/*when src1 offset is not equal to src2 offset...*/
.Lmisaligned8 :
cmp l i m i t , #8
b. l o . L t i n y 8 p r o c / * l i m i t < 8 . . . * /
/ *
* Get t h e a l i g n o f f s e t l e n g t h t o c o m p a r e p e r b y t e f i r s t .
* After t h i s p r o c e s s , o n e s t r i n g ' s a d d r e s s w i l l b e a l i g n e d . * /
and t m p1 , s r c1 , #7
neg t m p1 , t m p1
add t m p1 , t m p1 , #8
and t m p2 , s r c2 , #7
neg t m p2 , t m p2
add t m p2 , t m p2 , #8
subs t m p3 , t m p1 , t m p2
csel p o s , t m p1 , t m p2 , h i / * C h o o s e t h e m a x i m u m . * /
/ *
* Here, l i m i t i s n o t l e s s t h a n 8 , s o d i r e c t l y r u n . L t i n y c m p
* without c h e c k i n g t h e l i m i t . * /
sub l i m i t , l i m i t , p o s
.Ltinycmp :
ldrb d a t a1 w , [ s r c1 ] , #1
ldrb d a t a2 w , [ s r c2 ] , #1
subs p o s , p o s , #1
ccmp d a t a1 w , #1 , #0 , n e / * N Z C V = 0 b00 0 0 . * /
ccmp d a t a1 w , d a t a2 w , #0 , c s / * N Z C V = 0 b00 0 0 . * /
b. e q . L t i n y c m p
cbnz p o s , 1 f / * f i n d t h e n u l l o r u n e q u a l . . . * /
cmp d a t a1 w , #1
ccmp d a t a1 w , d a t a2 w , #0 , c s
b. e q . L s t a r t _ a l i g n / * t h e l a s t b y t e s a r e e q u a l . . . . * /
1 :
sub r e s u l t , d a t a1 , d a t a2
ret
.Lstart_align :
lsr l i m i t _ w d , l i m i t , #3
cbz l i m i t _ w d , . L r e m a i n 8
/*process more leading bytes to make str1 aligned...*/
ands x z r , s r c1 , #7
b. e q . L r e c a l _ o f f s e t
add s r c1 , s r c1 , t m p3 / * t m p3 i s p o s i t i v e i n t h i s b r a n c h . * /
add s r c2 , s r c2 , t m p3
ldr d a t a1 , [ s r c1 ] , #8
ldr d a t a2 , [ s r c2 ] , #8
sub l i m i t , l i m i t , t m p3
lsr l i m i t _ w d , l i m i t , #3
subs l i m i t _ w d , l i m i t _ w d , #1
sub t m p1 , d a t a1 , z e r o o n e s
orr t m p2 , d a t a1 , #R E P 8 _ 7 f
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
csinv e n d l o o p , d i f f , x z r , n e / * i f l i m i t _ w d i s 0 ,w i l l f i n i s h t h e c m p * /
bics h a s _ n u l , t m p1 , t m p2
ccmp e n d l o o p , #0 , #0 , e q / * h a s _ n u l l i s Z E R O : n o n u l l b y t e * /
b. n e . L u n e q u a l _ p r o c
/*How far is the current str2 from the alignment boundary...*/
and t m p3 , t m p3 , #7
.Lrecal_offset :
neg p o s , t m p3
.Lloopcmp_proc :
/ *
* Divide t h e e i g h t b y t e s i n t o t w o p a r t s . F i r s t ,b a c k w a r d s t h e s r c2
* to a n a l i g n m e n t b o u n d a r y ,l o a d e i g h t b y t e s f r o m t h e S R C 2 a l i g n m e n t
* boundary,t h e n c o m p a r e w i t h t h e r e l a t i v e b y t e s f r o m S R C 1 .
* If a l l 8 b y t e s a r e e q u a l ,t h e n s t a r t t h e s e c o n d p a r t ' s c o m p a r i s o n .
* Otherwise f i n i s h t h e c o m p a r i s o n .
* This s p e c i a l h a n d l e c a n g a r a n t e e a l l t h e a c c e s s e s a r e i n t h e
* thread/ t a s k s p a c e i n a v o i d t o o v e r r a n g e a c c e s s .
* /
ldr d a t a1 , [ s r c1 ,p o s ]
ldr d a t a2 , [ s r c2 ,p o s ]
sub t m p1 , d a t a1 , z e r o o n e s
orr t m p2 , d a t a1 , #R E P 8 _ 7 f
bics h a s _ n u l , t m p1 , t m p2 / * N o n - z e r o i f N U L t e r m i n a t o r . * /
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
csinv e n d l o o p , d i f f , x z r , e q
cbnz e n d l o o p , . L u n e q u a l _ p r o c
/*The second part process*/
ldr d a t a1 , [ s r c1 ] , #8
ldr d a t a2 , [ s r c2 ] , #8
subs l i m i t _ w d , l i m i t _ w d , #1
sub t m p1 , d a t a1 , z e r o o n e s
orr t m p2 , d a t a1 , #R E P 8 _ 7 f
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
csinv e n d l o o p , d i f f , x z r , n e / * i f l i m i t _ w d i s 0 ,w i l l f i n i s h t h e c m p * /
bics h a s _ n u l , t m p1 , t m p2
ccmp e n d l o o p , #0 , #0 , e q / * h a s _ n u l l i s Z E R O : n o n u l l b y t e * /
b. e q . L l o o p c m p _ p r o c
.Lunequal_proc :
orr s y n d r o m e , d i f f , h a s _ n u l
cbz s y n d r o m e , . L r e m a i n 8
.Lcal_cmpresult :
/ *
* reversed t h e b y t e - o r d e r a s b i g - e n d i a n ,t h e n C L Z c a n f i n d t h e m o s t
* significant z e r o b i t s .
* /
CPU_ L E ( r e v s y n d r o m e , s y n d r o m e )
CPU_ L E ( r e v d a t a1 , d a t a1 )
CPU_ L E ( r e v d a t a2 , d a t a2 )
/ *
* For b i g - e n d i a n w e c a n n o t u s e t h e t r i c k w i t h t h e s y n d r o m e v a l u e
* as c a r r y - p r o p a g a t i o n c a n c o r r u p t t h e u p p e r b i t s i f t h e t r a i l i n g
* bytes i n t h e s t r i n g c o n t a i n 0 x01 .
* However, i f t h e r e i s n o N U L b y t e i n t h e d w o r d , w e c a n g e n e r a t e
* the r e s u l t d i r e c t l y . W e c a n ' t j u s t s u b t r a c t t h e b y t e s a s t h e
* MSB m i g h t b e s i g n i f i c a n t .
* /
CPU_ B E ( c b n z h a s _ n u l , 1 f )
CPU_ B E ( c m p d a t a1 , d a t a2 )
CPU_ B E ( c s e t r e s u l t , n e )
CPU_ B E ( c n e g r e s u l t , r e s u l t , l o )
CPU_ B E ( r e t )
CPU_ B E ( 1 : )
/* Re-compute the NUL-byte detection, using a byte-reversed value.*/
CPU_ B E ( r e v t m p3 , d a t a1 )
CPU_ B E ( s u b t m p1 , t m p3 , z e r o o n e s )
CPU_ B E ( o r r t m p2 , t m p3 , #R E P 8 _ 7 f )
CPU_ B E ( b i c h a s _ n u l , t m p1 , t m p2 )
CPU_ B E ( r e v h a s _ n u l , h a s _ n u l )
CPU_ B E ( o r r s y n d r o m e , d i f f , h a s _ n u l )
/ *
* The M S - n o n - z e r o b i t o f t h e s y n d r o m e m a r k s e i t h e r t h e f i r s t b i t
* that i s d i f f e r e n t , o r t h e t o p b i t o f t h e f i r s t z e r o b y t e .
* Shifting l e f t n o w w i l l b r i n g t h e c r i t i c a l i n f o r m a t i o n i n t o t h e
* top b i t s .
* /
clz p o s , s y n d r o m e
lsl d a t a1 , d a t a1 , p o s
lsl d a t a2 , d a t a2 , p o s
/ *
* But w e n e e d t o z e r o - e x t e n d ( c h a r i s u n s i g n e d ) t h e v a l u e a n d t h e n
* perform a s i g n e d 3 2 - b i t s u b t r a c t i o n .
* /
lsr d a t a1 , d a t a1 , #56
sub r e s u l t , d a t a1 , d a t a2 , l s r #56
ret
.Lremain8 :
/* Limit % 8 == 0 => all bytes significant. */
ands l i m i t , l i m i t , #7
b. e q . L r e t 0
.Ltiny8proc :
ldrb d a t a1 w , [ s r c1 ] , #1
ldrb d a t a2 w , [ s r c2 ] , #1
subs l i m i t , l i m i t , #1
ccmp d a t a1 w , #1 , #0 , n e / * N Z C V = 0 b00 0 0 . * /
ccmp d a t a1 w , d a t a2 w , #0 , c s / * N Z C V = 0 b00 0 0 . * /
b. e q . L t i n y 8 p r o c
sub r e s u l t , d a t a1 , d a t a2
ret
.Lret0 :
mov r e s u l t , #0
ret
2015-10-08 20:02:03 +01:00
ENDPIPROC( s t r n c m p )
2018-12-07 18:08:21 +00:00
EXPORT_ S Y M B O L _ N O K A S A N ( s t r n c m p )