2019-06-03 07:44:50 +02:00
/* SPDX-License-Identifier: GPL-2.0-only */
2015-09-23 11:55:38 -07:00
/ *
* Copyright ( C ) 2 0 1 3 A R M L t d .
* Copyright ( C ) 2 0 1 3 L i n a r o .
*
* This c o d e i s b a s e d o n g l i b c c o r t e x s t r i n g s w o r k o r i g i n a l l y a u t h o r e d b y L i n a r o
* be f o u n d @
*
* http : / / bazaar. l a u n c h p a d . n e t / ~ l i n a r o - t o o l c h a i n - d e v / c o r t e x - s t r i n g s / t r u n k /
* files/ h e a d : / s r c / a a r c h64 /
* /
/ *
* Copy a b u f f e r f r o m s r c t o d e s t ( a l i g n m e n t h a n d l e d b y t h e h a r d w a r e )
*
* Parameters :
* x0 - d e s t
* x1 - s r c
* x2 - n
* Returns :
* x0 - d e s t
* /
dstin . r e q x0
src . r e q x1
count . r e q x2
tmp1 . r e q x3
tmp1 w . r e q w3
tmp2 . r e q x4
tmp2 w . r e q w4
dst . r e q x6
A_ l . r e q x7
A_ h . r e q x8
B_ l . r e q x9
B_ h . r e q x10
C_ l . r e q x11
C_ h . r e q x12
D_ l . r e q x13
D_ h . r e q x14
mov d s t , d s t i n
cmp c o u n t , #16
/*When memory length is less than 16, the accessed are not aligned.*/
b. l o . L t i n y 1 5
neg t m p2 , s r c
ands t m p2 , t m p2 , #15 / * B y t e s t o r e a c h a l i g n m e n t . * /
b. e q . L S r c A l i g n e d
sub c o u n t , c o u n t , t m p2
/ *
* Copy t h e l e a d i n g m e m o r y d a t a f r o m s r c t o d s t i n a n i n c r e a s i n g
2017-02-27 14:29:48 -08:00
* address o r d e r . B y t h i s w a y ,t h e r i s k o f o v e r w r i t i n g t h e s o u r c e
2015-09-23 11:55:38 -07:00
* memory d a t a i s e l i m i n a t e d w h e n t h e d i s t a n c e b e t w e e n s r c a n d
* dst i s l e s s t h a n 1 6 . T h e m e m o r y a c c e s s e s h e r e a r e a l i g n m e n t .
* /
tbz t m p2 , #0 , 1 f
ldrb1 t m p1 w , s r c , #1
strb1 t m p1 w , d s t , #1
1 :
tbz t m p2 , #1 , 2 f
ldrh1 t m p1 w , s r c , #2
strh1 t m p1 w , d s t , #2
2 :
tbz t m p2 , #2 , 3 f
ldr1 t m p1 w , s r c , #4
str1 t m p1 w , d s t , #4
3 :
tbz t m p2 , #3 , . L S r c A l i g n e d
ldr1 t m p1 , s r c , #8
str1 t m p1 , d s t , #8
.LSrcAligned :
cmp c o u n t , #64
b. g e . L c p y _ o v e r64
/ *
* Deal w i t h s m a l l c o p i e s q u i c k l y b y d r o p p i n g s t r a i g h t i n t o t h e
* exit b l o c k .
* /
.Ltail63 :
/ *
* Copy u p t o 4 8 b y t e s o f d a t a . A t t h i s p o i n t w e o n l y n e e d t h e
* bottom 6 b i t s o f c o u n t t o b e a c c u r a t e .
* /
ands t m p1 , c o u n t , #0x30
b. e q . L t i n y 1 5
cmp t m p1 w , #0x20
b. e q 1 f
b. l t 2 f
ldp1 A _ l , A _ h , s r c , #16
stp1 A _ l , A _ h , d s t , #16
1 :
ldp1 A _ l , A _ h , s r c , #16
stp1 A _ l , A _ h , d s t , #16
2 :
ldp1 A _ l , A _ h , s r c , #16
stp1 A _ l , A _ h , d s t , #16
.Ltiny15 :
/ *
* Prefer t o b r e a k o n e l d p / s t p i n t o s e v e r a l l o a d / s t o r e t o a c c e s s
* memory i n a n i n c r e a s i n g a d d r e s s o r d e r ,r a t h e r t h a n t o l o a d / s t o r e 1 6
* bytes f r o m ( s r c - 1 6 ) t o ( d s t - 1 6 ) a n d t o b a c k w a r d t h e s r c t o a l i g n e d
* address,w h i c h w a y i s u s e d i n o r i g i n a l c o r t e x m e m c p y . I f k e e p i n g
* the o r i g i n a l m e m c p y p r o c e s s h e r e , m e m m o v e n e e d t o s a t i s f y t h e
* precondition t h a t s r c a d d r e s s i s a t l e a s t 1 6 b y t e s b i g g e r t h a n d s t
* address,o t h e r w i s e s o m e s o u r c e d a t a w i l l b e o v e r w r i t t e n w h e n m e m o v e
* call m e m c p y d i r e c t l y . T o m a k e m e m m o v e s i m p l e r a n d d e c o u p l e t h e
* memcpy' s d e p e n d e n c y o n m e m m o v e , w i t h d r e w t h e o r i g i n a l p r o c e s s .
* /
tbz c o u n t , #3 , 1 f
ldr1 t m p1 , s r c , #8
str1 t m p1 , d s t , #8
1 :
tbz c o u n t , #2 , 2 f
ldr1 t m p1 w , s r c , #4
str1 t m p1 w , d s t , #4
2 :
tbz c o u n t , #1 , 3 f
ldrh1 t m p1 w , s r c , #2
strh1 t m p1 w , d s t , #2
3 :
tbz c o u n t , #0 , . L e x i t f u n c
ldrb1 t m p1 w , s r c , #1
strb1 t m p1 w , d s t , #1
b . L e x i t f u n c
.Lcpy_over64 :
subs c o u n t , c o u n t , #128
b. g e . L c p y _ b o d y _ l a r g e
/ *
* Less t h a n 1 2 8 b y t e s t o c o p y , s o h a n d l e 6 4 h e r e a n d t h e n j u m p
* to t h e t a i l .
* /
ldp1 A _ l , A _ h , s r c , #16
stp1 A _ l , A _ h , d s t , #16
ldp1 B _ l , B _ h , s r c , #16
ldp1 C _ l , C _ h , s r c , #16
stp1 B _ l , B _ h , d s t , #16
stp1 C _ l , C _ h , d s t , #16
ldp1 D _ l , D _ h , s r c , #16
stp1 D _ l , D _ h , d s t , #16
tst c o u n t , #0x3f
b. n e . L t a i l 6 3
b . L e x i t f u n c
/ *
* Critical l o o p . S t a r t a t a n e w c a c h e l i n e b o u n d a r y . A s s u m i n g
* 6 4 bytes p e r l i n e t h i s e n s u r e s t h e e n t i r e l o o p i s i n o n e l i n e .
* /
.p2align L1_CACHE_SHIFT
.Lcpy_body_large :
/* pre-get 64 bytes data. */
ldp1 A _ l , A _ h , s r c , #16
ldp1 B _ l , B _ h , s r c , #16
ldp1 C _ l , C _ h , s r c , #16
ldp1 D _ l , D _ h , s r c , #16
1 :
/ *
* interlace t h e l o a d o f n e x t 6 4 b y t e s d a t a b l o c k w i t h s t o r e o f t h e l a s t
* loaded 6 4 b y t e s d a t a .
* /
stp1 A _ l , A _ h , d s t , #16
ldp1 A _ l , A _ h , s r c , #16
stp1 B _ l , B _ h , d s t , #16
ldp1 B _ l , B _ h , s r c , #16
stp1 C _ l , C _ h , d s t , #16
ldp1 C _ l , C _ h , s r c , #16
stp1 D _ l , D _ h , d s t , #16
ldp1 D _ l , D _ h , s r c , #16
subs c o u n t , c o u n t , #64
b. g e 1 b
stp1 A _ l , A _ h , d s t , #16
stp1 B _ l , B _ h , d s t , #16
stp1 C _ l , C _ h , d s t , #16
stp1 D _ l , D _ h , d s t , #16
tst c o u n t , #0x3f
b. n e . L t a i l 6 3
.Lexitfunc :