2019-06-03 08:44:50 +03:00
/* SPDX-License-Identifier: GPL-2.0-only */
2014-04-28 09:11:33 +04:00
/ *
2021-06-02 18:13:58 +03:00
* Copyright ( c ) 2 0 1 3 - 2 0 2 1 , A r m L i m i t e d .
2014-04-28 09:11:33 +04:00
*
2021-05-27 18:34:44 +03:00
* Adapted f r o m t h e o r i g i n a l a t :
2021-06-02 18:13:58 +03:00
* https : / / github. c o m / A R M - s o f t w a r e / o p t i m i z e d - r o u t i n e s / b l o b / e 8 2 3 e 3 a b f5 f89 e c b / s t r i n g / a a r c h64 / s t r n c m p . S
2014-04-28 09:11:33 +04:00
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
2021-05-27 18:34:44 +03:00
/ * Assumptions :
2014-04-28 09:11:33 +04:00
*
2021-05-27 18:34:44 +03:00
* ARMv8 - a , A A r c h64
2014-04-28 09:11:33 +04:00
* /
2021-05-27 18:34:44 +03:00
# define L ( l a b e l ) . L ## l a b e l
2014-04-28 09:11:33 +04:00
# define R E P 8 _ 0 1 0 x01 0 1 0 1 0 1 0 1 0 1 0 1 0 1
# define R E P 8 _ 7 f 0 x7 f7 f7 f7 f7 f7 f7 f7 f
# define R E P 8 _ 8 0 0 x80 8 0 8 0 8 0 8 0 8 0 8 0 8 0
/* Parameters and result. */
2021-05-27 18:34:44 +03:00
# define s r c1 x0
# define s r c2 x1
# define l i m i t x2
# define r e s u l t x0
2014-04-28 09:11:33 +04:00
/* Internal variables. */
2021-05-27 18:34:44 +03:00
# define d a t a1 x3
# define d a t a1 w w3
# define d a t a2 x4
# define d a t a2 w w4
# define h a s _ n u l x5
# define d i f f x6
# define s y n d r o m e x7
# define t m p1 x8
# define t m p2 x9
# define t m p3 x10
# define z e r o o n e s x11
# define p o s x12
# define l i m i t _ w d x13
# define m a s k x14
# define e n d l o o p x15
# define c o u n t m a s k
2014-04-28 09:11:33 +04:00
2020-01-06 22:58:17 +03:00
SYM_ F U N C _ S T A R T _ W E A K _ P I ( s t r n c m p )
2021-05-27 18:34:44 +03:00
cbz l i m i t , L ( r e t 0 )
2014-04-28 09:11:33 +04:00
eor t m p1 , s r c1 , s r c2
mov z e r o o n e s , #R E P 8 _ 0 1
tst t m p1 , #7
2021-05-27 18:34:44 +03:00
and c o u n t , s r c1 , #7
b. n e L ( m i s a l i g n e d8 )
cbnz c o u n t , L ( m u t u a l _ a l i g n )
2014-04-28 09:11:33 +04:00
/* Calculate the number of full and partial words -1. */
2021-05-27 18:34:44 +03:00
sub l i m i t _ w d , l i m i t , #1 / * l i m i t ! = 0 , s o n o u n d e r f l o w . * /
lsr l i m i t _ w d , l i m i t _ w d , #3 / * C o n v e r t t o D w o r d s . * /
2014-04-28 09:11:33 +04:00
2021-05-27 18:34:44 +03:00
/ * NUL d e t e c t i o n w o r k s o n t h e p r i n c i p l e t h a t ( X - 1 ) & ( ~ X ) & 0 x80
( = > ( X - 1 ) & ~ ( X | 0 x7 f ) ) i s n o n - z e r o i f f a b y t e i s z e r o , a n d
can b e d o n e i n p a r a l l e l a c r o s s t h e e n t i r e w o r d . * /
.p2align 4
L( l o o p _ a l i g n e d ) :
2014-04-28 09:11:33 +04:00
ldr d a t a1 , [ s r c1 ] , #8
ldr d a t a2 , [ s r c2 ] , #8
2021-05-27 18:34:44 +03:00
L( s t a r t _ r e a l i g n e d ) :
2014-04-28 09:11:33 +04:00
subs l i m i t _ w d , l i m i t _ w d , #1
sub t m p1 , d a t a1 , z e r o o n e s
orr t m p2 , d a t a1 , #R E P 8 _ 7 f
2021-05-27 18:34:44 +03:00
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
csinv e n d l o o p , d i f f , x z r , p l / * L a s t D w o r d o r d i f f e r e n c e s . * /
bics h a s _ n u l , t m p1 , t m p2 / * N o n - z e r o i f N U L t e r m i n a t o r . * /
2014-04-28 09:11:33 +04:00
ccmp e n d l o o p , #0 , #0 , e q
2021-05-27 18:34:44 +03:00
b. e q L ( l o o p _ a l i g n e d )
/* End of main loop */
2014-04-28 09:11:33 +04:00
2021-05-27 18:34:44 +03:00
/* Not reached the limit, must have found the end or a diff. */
tbz l i m i t _ w d , #63 , L ( n o t _ l i m i t )
2014-04-28 09:11:33 +04:00
/* Limit % 8 == 0 => all bytes significant. */
ands l i m i t , l i m i t , #7
2021-05-27 18:34:44 +03:00
b. e q L ( n o t _ l i m i t )
2014-04-28 09:11:33 +04:00
2021-05-27 18:34:44 +03:00
lsl l i m i t , l i m i t , #3 / * B i t s - > b y t e s . * /
2014-04-28 09:11:33 +04:00
mov m a s k , #~ 0
2021-05-27 18:34:44 +03:00
# ifdef _ _ A A R C H 6 4 E B _ _
lsr m a s k , m a s k , l i m i t
# else
lsl m a s k , m a s k , l i m i t
# endif
2014-04-28 09:11:33 +04:00
bic d a t a1 , d a t a1 , m a s k
bic d a t a2 , d a t a2 , m a s k
/* Make sure that the NUL byte is marked in the syndrome. */
orr h a s _ n u l , h a s _ n u l , m a s k
2021-05-27 18:34:44 +03:00
L( n o t _ l i m i t ) :
2014-04-28 09:11:33 +04:00
orr s y n d r o m e , d i f f , h a s _ n u l
2021-05-27 18:34:44 +03:00
# ifndef _ _ A A R C H 6 4 E B _ _
rev s y n d r o m e , s y n d r o m e
rev d a t a1 , d a t a1
/ * The M S - n o n - z e r o b i t o f t h e s y n d r o m e m a r k s e i t h e r t h e f i r s t b i t
that i s d i f f e r e n t , o r t h e t o p b i t o f t h e f i r s t z e r o b y t e .
Shifting l e f t n o w w i l l b r i n g t h e c r i t i c a l i n f o r m a t i o n i n t o t h e
top b i t s . * /
clz p o s , s y n d r o m e
rev d a t a2 , d a t a2
lsl d a t a1 , d a t a1 , p o s
lsl d a t a2 , d a t a2 , p o s
/ * But w e n e e d t o z e r o - e x t e n d ( c h a r i s u n s i g n e d ) t h e v a l u e a n d t h e n
perform a s i g n e d 3 2 - b i t s u b t r a c t i o n . * /
lsr d a t a1 , d a t a1 , #56
sub r e s u l t , d a t a1 , d a t a2 , l s r #56
ret
# else
/ * For b i g - e n d i a n w e c a n n o t u s e t h e t r i c k w i t h t h e s y n d r o m e v a l u e
as c a r r y - p r o p a g a t i o n c a n c o r r u p t t h e u p p e r b i t s i f t h e t r a i l i n g
bytes i n t h e s t r i n g c o n t a i n 0 x01 . * /
/ * However, i f t h e r e i s n o N U L b y t e i n t h e d w o r d , w e c a n g e n e r a t e
the r e s u l t d i r e c t l y . W e c a n ' t j u s t s u b t r a c t t h e b y t e s a s t h e
MSB m i g h t b e s i g n i f i c a n t . * /
cbnz h a s _ n u l , 1 f
cmp d a t a1 , d a t a2
cset r e s u l t , n e
cneg r e s u l t , r e s u l t , l o
ret
1 :
/* Re-compute the NUL-byte detection, using a byte-reversed value. */
rev t m p3 , d a t a1
sub t m p1 , t m p3 , z e r o o n e s
orr t m p2 , t m p3 , #R E P 8 _ 7 f
bic h a s _ n u l , t m p1 , t m p2
rev h a s _ n u l , h a s _ n u l
orr s y n d r o m e , d i f f , h a s _ n u l
clz p o s , s y n d r o m e
/ * The M S - n o n - z e r o b i t o f t h e s y n d r o m e m a r k s e i t h e r t h e f i r s t b i t
that i s d i f f e r e n t , o r t h e t o p b i t o f t h e f i r s t z e r o b y t e .
Shifting l e f t n o w w i l l b r i n g t h e c r i t i c a l i n f o r m a t i o n i n t o t h e
top b i t s . * /
lsl d a t a1 , d a t a1 , p o s
lsl d a t a2 , d a t a2 , p o s
/ * But w e n e e d t o z e r o - e x t e n d ( c h a r i s u n s i g n e d ) t h e v a l u e a n d t h e n
perform a s i g n e d 3 2 - b i t s u b t r a c t i o n . * /
lsr d a t a1 , d a t a1 , #56
sub r e s u l t , d a t a1 , d a t a2 , l s r #56
ret
# endif
L( m u t u a l _ a l i g n ) :
/ * Sources a r e m u t u a l l y a l i g n e d , b u t a r e n o t c u r r e n t l y a t a n
alignment b o u n d a r y . R o u n d d o w n t h e a d d r e s s e s a n d t h e n m a s k o f f
the b y t e s t h a t p r e c e d e t h e s t a r t p o i n t .
We a l s o n e e d t o a d j u s t t h e l i m i t c a l c u l a t i o n s , b u t w i t h o u t
overflowing i f t h e l i m i t i s n e a r U L O N G _ M A X . * /
2014-04-28 09:11:33 +04:00
bic s r c1 , s r c1 , #7
bic s r c2 , s r c2 , #7
ldr d a t a1 , [ s r c1 ] , #8
2021-05-27 18:34:44 +03:00
neg t m p3 , c o u n t , l s l #3 / * 6 4 - b i t s ( b y t e s b e y o n d a l i g n ) . * /
2014-04-28 09:11:33 +04:00
ldr d a t a2 , [ s r c2 ] , #8
mov t m p2 , #~ 0
2021-05-27 18:34:44 +03:00
sub l i m i t _ w d , l i m i t , #1 / * l i m i t ! = 0 , s o n o u n d e r f l o w . * /
# ifdef _ _ A A R C H 6 4 E B _ _
2014-04-28 09:11:33 +04:00
/* Big-endian. Early bytes are at MSB. */
2021-05-27 18:34:44 +03:00
lsl t m p2 , t m p2 , t m p3 / * S h i f t ( c o u n t & 6 3 ) . * /
# else
2014-04-28 09:11:33 +04:00
/* Little-endian. Early bytes are at LSB. */
2021-05-27 18:34:44 +03:00
lsr t m p2 , t m p2 , t m p3 / * S h i f t ( c o u n t & 6 3 ) . * /
# endif
2014-04-28 09:11:33 +04:00
and t m p3 , l i m i t _ w d , #7
lsr l i m i t _ w d , l i m i t _ w d , #3
2021-05-27 18:34:44 +03:00
/* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
add l i m i t , l i m i t , c o u n t
add t m p3 , t m p3 , c o u n t
2014-04-28 09:11:33 +04:00
orr d a t a1 , d a t a1 , t m p2
orr d a t a2 , d a t a2 , t m p2
add l i m i t _ w d , l i m i t _ w d , t m p3 , l s r #3
2021-05-27 18:34:44 +03:00
b L ( s t a r t _ r e a l i g n e d )
.p2align 4
/* Don't bother with dwords for up to 16 bytes. */
L( m i s a l i g n e d8 ) :
cmp l i m i t , #16
b. h s L ( t r y _ m i s a l i g n e d _ w o r d s )
2014-04-28 09:11:33 +04:00
2021-05-27 18:34:44 +03:00
L( b y t e _ l o o p ) :
/* Perhaps we can do better than this. */
2014-04-28 09:11:33 +04:00
ldrb d a t a1 w , [ s r c1 ] , #1
ldrb d a t a2 w , [ s r c2 ] , #1
2021-05-27 18:34:44 +03:00
subs l i m i t , l i m i t , #1
ccmp d a t a1 w , #1 , #0 , h i / * N Z C V = 0 b00 0 0 . * /
ccmp d a t a1 w , d a t a2 w , #0 , c s / * N Z C V = 0 b00 0 0 . * /
b. e q L ( b y t e _ l o o p )
L( d o n e ) :
2014-04-28 09:11:33 +04:00
sub r e s u l t , d a t a1 , d a t a2
ret
2021-05-27 18:34:44 +03:00
/ * Align t h e S R C 1 t o a d w o r d b y d o i n g a b y t e w i s e c o m p a r e a n d t h e n d o
the d w o r d l o o p . * /
L( t r y _ m i s a l i g n e d _ w o r d s ) :
2014-04-28 09:11:33 +04:00
lsr l i m i t _ w d , l i m i t , #3
2021-05-27 18:34:44 +03:00
cbz c o u n t , L ( d o _ m i s a l i g n e d )
2014-04-28 09:11:33 +04:00
2021-05-27 18:34:44 +03:00
neg c o u n t , c o u n t
and c o u n t , c o u n t , #7
sub l i m i t , l i m i t , c o u n t
2014-04-28 09:11:33 +04:00
lsr l i m i t _ w d , l i m i t , #3
2021-05-27 18:34:44 +03:00
L( p a g e _ e n d _ l o o p ) :
ldrb d a t a1 w , [ s r c1 ] , #1
ldrb d a t a2 w , [ s r c2 ] , #1
cmp d a t a1 w , #1
ccmp d a t a1 w , d a t a2 w , #0 , c s / * N Z C V = 0 b00 0 0 . * /
b. n e L ( d o n e )
subs c o u n t , c o u n t , #1
b. h i L ( p a g e _ e n d _ l o o p )
L( d o _ m i s a l i g n e d ) :
/ * Prepare o u r s e l v e s f o r t h e n e x t p a g e c r o s s i n g . U n l i k e t h e a l i g n e d
loop, w e f e t c h 1 l e s s d w o r d b e c a u s e w e r i s k c r o s s i n g b o u n d s o n
SRC2 . * /
mov c o u n t , #8
subs l i m i t _ w d , l i m i t _ w d , #1
b. l o L ( d o n e _ l o o p )
L( l o o p _ m i s a l i g n e d ) :
and t m p2 , s r c2 , #0xff8
eor t m p2 , t m p2 , #0xff8
cbz t m p2 , L ( p a g e _ e n d _ l o o p )
2014-04-28 09:11:33 +04:00
ldr d a t a1 , [ s r c1 ] , #8
ldr d a t a2 , [ s r c2 ] , #8
sub t m p1 , d a t a1 , z e r o o n e s
orr t m p2 , d a t a1 , #R E P 8 _ 7 f
2021-05-27 18:34:44 +03:00
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
bics h a s _ n u l , t m p1 , t m p2 / * N o n - z e r o i f N U L t e r m i n a t o r . * /
ccmp d i f f , #0 , #0 , e q
b. n e L ( n o t _ l i m i t )
subs l i m i t _ w d , l i m i t _ w d , #1
b. p l L ( l o o p _ m i s a l i g n e d )
2014-04-28 09:11:33 +04:00
2021-05-27 18:34:44 +03:00
L( d o n e _ l o o p ) :
/* We found a difference or a NULL before the limit was reached. */
and l i m i t , l i m i t , #7
cbz l i m i t , L ( n o t _ l i m i t )
/* Read the last word. */
sub s r c1 , s r c1 , 8
sub s r c2 , s r c2 , 8
ldr d a t a1 , [ s r c1 , l i m i t ]
ldr d a t a2 , [ s r c2 , l i m i t ]
sub t m p1 , d a t a1 , z e r o o n e s
orr t m p2 , d a t a1 , #R E P 8 _ 7 f
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
bics h a s _ n u l , t m p1 , t m p2 / * N o n - z e r o i f N U L t e r m i n a t o r . * /
ccmp d i f f , #0 , #0 , e q
b. n e L ( n o t _ l i m i t )
2014-04-28 09:11:33 +04:00
2021-05-27 18:34:44 +03:00
L( r e t 0 ) :
2014-04-28 09:11:33 +04:00
mov r e s u l t , #0
ret
2021-05-27 18:34:44 +03:00
2020-01-06 22:58:17 +03:00
SYM_ F U N C _ E N D _ P I ( s t r n c m p )
2018-12-07 21:08:21 +03:00
EXPORT_ S Y M B O L _ N O K A S A N ( s t r n c m p )