2019-06-03 07:44:50 +02:00
/* SPDX-License-Identifier: GPL-2.0-only */
2014-04-28 06:11:32 +01:00
/ *
* Copyright ( C ) 2 0 1 3 A R M L t d .
* Copyright ( C ) 2 0 1 3 L i n a r o .
*
* This c o d e i s b a s e d o n g l i b c c o r t e x s t r i n g s w o r k o r i g i n a l l y a u t h o r e d b y L i n a r o
* be f o u n d @
*
* http : / / bazaar. l a u n c h p a d . n e t / ~ l i n a r o - t o o l c h a i n - d e v / c o r t e x - s t r i n g s / t r u n k /
* files/ h e a d : / s r c / a a r c h64 /
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
/ *
* compare m e m o r y a r e a s ( w h e n t w o m e m o r y a r e a s ' o f f s e t a r e d i f f e r e n t ,
* alignment h a n d l e d b y t h e h a r d w a r e )
*
* Parameters :
* x0 - c o n s t m e m o r y a r e a 1 p o i n t e r
* x1 - c o n s t m e m o r y a r e a 2 p o i n t e r
* x2 - t h e m a x i m a l c o m p a r e b y t e l e n g t h
* Returns :
* x0 - a c o m p a r e r e s u l t , m a y b e l e s s t h a n , e q u a l t o , o r g r e a t e r t h a n Z E R O
* /
/* Parameters and result. */
src1 . r e q x0
src2 . r e q x1
limit . r e q x2
result . r e q x0
/* Internal variables. */
data1 . r e q x3
data1 w . r e q w3
data2 . r e q x4
data2 w . r e q w4
has_ n u l . r e q x5
diff . r e q x6
endloop . r e q x7
tmp1 . r e q x8
tmp2 . r e q x9
tmp3 . r e q x10
pos . r e q x11
limit_ w d . r e q x12
mask . r e q x13
2018-10-26 15:02:30 -07:00
WEAK( m e m c m p )
2014-04-28 06:11:32 +01:00
cbz l i m i t , . L r e t 0
eor t m p1 , s r c1 , s r c2
tst t m p1 , #7
b. n e . L m i s a l i g n e d8
ands t m p1 , s r c1 , #7
b. n e . L m u t u a l _ a l i g n
sub l i m i t _ w d , l i m i t , #1 / * l i m i t ! = 0 , s o n o u n d e r f l o w . * /
lsr l i m i t _ w d , l i m i t _ w d , #3 / * C o n v e r t t o D w o r d s . * /
/ *
* The i n p u t s o u r c e a d d r e s s e s a r e a t a l i g n m e n t b o u n d a r y .
* Directly c o m p a r e e i g h t b y t e s e a c h t i m e .
* /
.Lloop_aligned :
ldr d a t a1 , [ s r c1 ] , #8
ldr d a t a2 , [ s r c2 ] , #8
.Lstart_realigned :
subs l i m i t _ w d , l i m i t _ w d , #1
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
csinv e n d l o o p , d i f f , x z r , c s / * L a s t D w o r d o r d i f f e r e n c e s . * /
cbz e n d l o o p , . L l o o p _ a l i g n e d
/* Not reached the limit, must have found a diff. */
tbz l i m i t _ w d , #63 , . L n o t _ l i m i t
/* Limit % 8 == 0 => the diff is in the last 8 bytes. */
ands l i m i t , l i m i t , #7
b. e q . L n o t _ l i m i t
/ *
* The r e m a i n e d b y t e s l e s s t h a n 8 . I t i s n e e d e d t o e x t r a c t v a l i d d a t a
* from l a s t e i g h t b y t e s o f t h e i n t e n d e d m e m o r y r a n g e .
* /
lsl l i m i t , l i m i t , #3 / * b y t e s - > b i t s . * /
mov m a s k , #~ 0
CPU_ B E ( l s r m a s k , m a s k , l i m i t )
CPU_ L E ( l s l m a s k , m a s k , l i m i t )
bic d a t a1 , d a t a1 , m a s k
bic d a t a2 , d a t a2 , m a s k
orr d i f f , d i f f , m a s k
b . L n o t _ l i m i t
.Lmutual_align :
/ *
* Sources a r e m u t u a l l y a l i g n e d , b u t a r e n o t c u r r e n t l y a t a n
* alignment b o u n d a r y . R o u n d d o w n t h e a d d r e s s e s a n d t h e n m a s k o f f
* the b y t e s t h a t p r e c e d e t h e s t a r t p o i n t .
* /
bic s r c1 , s r c1 , #7
bic s r c2 , s r c2 , #7
ldr d a t a1 , [ s r c1 ] , #8
ldr d a t a2 , [ s r c2 ] , #8
/ *
* We c a n n o t a d d l i m i t w i t h a l i g n m e n t o f f s e t ( t m p1 ) h e r e . S i n c e t h e
* addition p r o b a b l y m a k e t h e l i m i t o v e r f l o w n .
* /
sub l i m i t _ w d , l i m i t , #1 / * l i m i t ! = 0 , s o n o u n d e r f l o w . * /
and t m p3 , l i m i t _ w d , #7
lsr l i m i t _ w d , l i m i t _ w d , #3
add t m p3 , t m p3 , t m p1
add l i m i t _ w d , l i m i t _ w d , t m p3 , l s r #3
add l i m i t , l i m i t , t m p1 / * A d j u s t t h e l i m i t f o r t h e e x t r a . * /
lsl t m p1 , t m p1 , #3 / * B y t e s b e y o n d a l i g n m e n t - > b i t s . * /
neg t m p1 , t m p1 / * B i t s t o a l i g n m e n t - 6 4 . * /
mov t m p2 , #~ 0
/*mask off the non-intended bytes before the start address.*/
CPU_ B E ( l s l t m p2 , t m p2 , t m p1 ) / * B i g - e n d i a n . E a r l y b y t e s a r e a t M S B * /
/* Little-endian. Early bytes are at LSB. */
CPU_ L E ( l s r t m p2 , t m p2 , t m p1 )
orr d a t a1 , d a t a1 , t m p2
orr d a t a2 , d a t a2 , t m p2
b . L s t a r t _ r e a l i g n e d
/*src1 and src2 have different alignment offset.*/
.Lmisaligned8 :
cmp l i m i t , #8
b. l o . L t i n y 8 p r o c / * l i m i t < 8 : c o m p a r e b y t e b y b y t e * /
and t m p1 , s r c1 , #7
neg t m p1 , t m p1
add t m p1 , t m p1 , #8 / * v a l i d l e n g t h i n t h e f i r s t 8 b y t e s o f s r c1 * /
and t m p2 , s r c2 , #7
neg t m p2 , t m p2
add t m p2 , t m p2 , #8 / * v a l i d l e n g t h i n t h e f i r s t 8 b y t e s o f s r c2 * /
subs t m p3 , t m p1 , t m p2
csel p o s , t m p1 , t m p2 , h i / * C h o o s e t h e m a x i m u m . * /
sub l i m i t , l i m i t , p o s
/*compare the proceeding bytes in the first 8 byte segment.*/
.Ltinycmp :
ldrb d a t a1 w , [ s r c1 ] , #1
ldrb d a t a2 w , [ s r c2 ] , #1
subs p o s , p o s , #1
ccmp d a t a1 w , d a t a2 w , #0 , n e / * N Z C V = 0 b00 0 0 . * /
b. e q . L t i n y c m p
cbnz p o s , 1 f / * d i f f o c c u r r e d b e f o r e t h e l a s t b y t e . * /
cmp d a t a1 w , d a t a2 w
b. e q . L s t a r t _ a l i g n
1 :
sub r e s u l t , d a t a1 , d a t a2
ret
.Lstart_align :
lsr l i m i t _ w d , l i m i t , #3
cbz l i m i t _ w d , . L r e m a i n 8
ands x z r , s r c1 , #7
b. e q . L r e c a l _ o f f s e t
/*process more leading bytes to make src1 aligned...*/
add s r c1 , s r c1 , t m p3 / * b a c k w a r d s s r c1 t o a l i g n m e n t b o u n d a r y * /
add s r c2 , s r c2 , t m p3
sub l i m i t , l i m i t , t m p3
lsr l i m i t _ w d , l i m i t , #3
cbz l i m i t _ w d , . L r e m a i n 8
/*load 8 bytes from aligned SRC1..*/
ldr d a t a1 , [ s r c1 ] , #8
ldr d a t a2 , [ s r c2 ] , #8
subs l i m i t _ w d , l i m i t _ w d , #1
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
csinv e n d l o o p , d i f f , x z r , n e
cbnz e n d l o o p , . L u n e q u a l _ p r o c
/*How far is the current SRC2 from the alignment boundary...*/
and t m p3 , t m p3 , #7
.Lrecal_offset : /*src1 is aligned now..*/
neg p o s , t m p3
.Lloopcmp_proc :
/ *
* Divide t h e e i g h t b y t e s i n t o t w o p a r t s . F i r s t ,b a c k w a r d s t h e s r c2
* to a n a l i g n m e n t b o u n d a r y ,l o a d e i g h t b y t e s a n d c o m p a r e f r o m
* the S R C 2 a l i g n m e n t b o u n d a r y . I f a l l 8 b y t e s a r e e q u a l ,t h e n s t a r t
* the s e c o n d p a r t ' s c o m p a r i s o n . O t h e r w i s e f i n i s h t h e c o m p a r i s o n .
* This s p e c i a l h a n d l e c a n g a r a n t e e a l l t h e a c c e s s e s a r e i n t h e
* thread/ t a s k s p a c e i n a v o i d t o o v e r r a n g e a c c e s s .
* /
ldr d a t a1 , [ s r c1 ,p o s ]
ldr d a t a2 , [ s r c2 ,p o s ]
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
cbnz d i f f , . L n o t _ l i m i t
/*The second part process*/
ldr d a t a1 , [ s r c1 ] , #8
ldr d a t a2 , [ s r c2 ] , #8
eor d i f f , d a t a1 , d a t a2 / * N o n - z e r o i f d i f f e r e n c e s f o u n d . * /
subs l i m i t _ w d , l i m i t _ w d , #1
csinv e n d l o o p , d i f f , x z r , n e / * i f l i m i t _ w d i s 0 ,w i l l f i n i s h t h e c m p * /
cbz e n d l o o p , . L l o o p c m p _ p r o c
.Lunequal_proc :
cbz d i f f , . L r e m a i n 8
2016-02-24 09:52:41 -08:00
/* There is difference occurred in the latest comparison. */
2014-04-28 06:11:32 +01:00
.Lnot_limit :
/ *
* For l i t t l e e n d i a n ,r e v e r s e t h e l o w s i g n i f i c a n t e q u a l b i t s i n t o M S B ,t h e n
* following C L Z c a n f i n d h o w m a n y e q u a l b i t s e x i s t .
* /
CPU_ L E ( r e v d i f f , d i f f )
CPU_ L E ( r e v d a t a1 , d a t a1 )
CPU_ L E ( r e v d a t a2 , d a t a2 )
/ *
* The M S - n o n - z e r o b i t o f D I F F m a r k s e i t h e r t h e f i r s t b i t
* that i s d i f f e r e n t , o r t h e e n d o f t h e s i g n i f i c a n t d a t a .
* Shifting l e f t n o w w i l l b r i n g t h e c r i t i c a l i n f o r m a t i o n i n t o t h e
* top b i t s .
* /
clz p o s , d i f f
lsl d a t a1 , d a t a1 , p o s
lsl d a t a2 , d a t a2 , p o s
/ *
* We n e e d t o z e r o - e x t e n d ( c h a r i s u n s i g n e d ) t h e v a l u e a n d t h e n
* perform a s i g n e d s u b t r a c t i o n .
* /
lsr d a t a1 , d a t a1 , #56
sub r e s u l t , d a t a1 , d a t a2 , l s r #56
ret
.Lremain8 :
/* Limit % 8 == 0 =>. all data are equal.*/
ands l i m i t , l i m i t , #7
b. e q . L r e t 0
.Ltiny8proc :
ldrb d a t a1 w , [ s r c1 ] , #1
ldrb d a t a2 w , [ s r c2 ] , #1
subs l i m i t , l i m i t , #1
ccmp d a t a1 w , d a t a2 w , #0 , n e / * N Z C V = 0 b00 0 0 . * /
b. e q . L t i n y 8 p r o c
sub r e s u l t , d a t a1 , d a t a2
ret
.Lret0 :
mov r e s u l t , #0
ret
2015-10-08 20:02:03 +01:00
ENDPIPROC( m e m c m p )
2018-12-07 18:08:21 +00:00
EXPORT_ S Y M B O L _ N O K A S A N ( m e m c m p )