2013-03-21 20:16:43 +04:00
/ *
* Copyright ( C ) 2 0 1 3 A R M L t d .
2014-04-28 09:11:29 +04:00
* Copyright ( C ) 2 0 1 3 L i n a r o .
*
* This c o d e i s b a s e d o n g l i b c c o r t e x s t r i n g s w o r k o r i g i n a l l y a u t h o r e d b y L i n a r o
* and r e - l i c e n s e d u n d e r G P L v2 f o r t h e L i n u x k e r n e l . T h e o r i g i n a l c o d e c a n
* be f o u n d @
*
* http : / / bazaar. l a u n c h p a d . n e t / ~ l i n a r o - t o o l c h a i n - d e v / c o r t e x - s t r i n g s / t r u n k /
* files/ h e a d : / s r c / a a r c h64 /
2013-03-21 20:16:43 +04:00
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 a s
* published b y t h e F r e e S o f t w a r e F o u n d a t i o n .
*
* This p r o g r a m i s d i s t r i b u t e d i n t h e h o p e t h a t i t w i l l b e u s e f u l ,
* but W I T H O U T A N Y W A R R A N T Y ; without even the implied warranty of
* MERCHANTABILITY o r F I T N E S S F O R A P A R T I C U L A R P U R P O S E . S e e t h e
* GNU G e n e r a l P u b l i c L i c e n s e f o r m o r e d e t a i l s .
*
* You s h o u l d h a v e r e c e i v e d a c o p y o f t h e G N U G e n e r a l P u b l i c L i c e n s e
* along w i t h t h i s p r o g r a m . I f n o t , s e e < h t t p : / / w w w . g n u . o r g / l i c e n s e s / > .
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
2014-04-28 09:11:29 +04:00
# include < a s m / c a c h e . h >
2013-03-21 20:16:43 +04:00
/ *
* Copy a b u f f e r f r o m s r c t o d e s t ( a l i g n m e n t h a n d l e d b y t h e h a r d w a r e )
*
* Parameters :
* x0 - d e s t
* x1 - s r c
* x2 - n
* Returns :
* x0 - d e s t
* /
2014-04-28 09:11:29 +04:00
dstin . r e q x0
src . r e q x1
count . r e q x2
tmp1 . r e q x3
tmp1 w . r e q w3
tmp2 . r e q x4
tmp2 w . r e q w4
tmp3 . r e q x5
tmp3 w . r e q w5
dst . r e q x6
A_ l . r e q x7
A_ h . r e q x8
B_ l . r e q x9
B_ h . r e q x10
C_ l . r e q x11
C_ h . r e q x12
D_ l . r e q x13
D_ h . r e q x14
2013-03-21 20:16:43 +04:00
ENTRY( m e m c p y )
2014-04-28 09:11:29 +04:00
mov d s t , d s t i n
cmp c o u n t , #16
/*When memory length is less than 16, the accessed are not aligned.*/
b. l o . L t i n y 1 5
neg t m p2 , s r c
ands t m p2 , t m p2 , #15 / * B y t e s t o r e a c h a l i g n m e n t . * /
b. e q . L S r c A l i g n e d
sub c o u n t , c o u n t , t m p2
/ *
* Copy t h e l e a d i n g m e m o r y d a t a f r o m s r c t o d s t i n a n i n c r e a s i n g
* address o r d e r . B y t h i s w a y ,t h e r i s k o f o v e r w r i t t i n g t h e s o u r c e
* memory d a t a i s e l i m i n a t e d w h e n t h e d i s t a n c e b e t w e e n s r c a n d
* dst i s l e s s t h a n 1 6 . T h e m e m o r y a c c e s s e s h e r e a r e a l i g n m e n t .
* /
tbz t m p2 , #0 , 1 f
ldrb t m p1 w , [ s r c ] , #1
strb t m p1 w , [ d s t ] , #1
1 :
tbz t m p2 , #1 , 2 f
ldrh t m p1 w , [ s r c ] , #2
strh t m p1 w , [ d s t ] , #2
2 :
tbz t m p2 , #2 , 3 f
ldr t m p1 w , [ s r c ] , #4
str t m p1 w , [ d s t ] , #4
3 :
tbz t m p2 , #3 , . L S r c A l i g n e d
ldr t m p1 , [ s r c ] ,#8
str t m p1 , [ d s t ] ,#8
.LSrcAligned :
cmp c o u n t , #64
b. g e . L c p y _ o v e r64
/ *
* Deal w i t h s m a l l c o p i e s q u i c k l y b y d r o p p i n g s t r a i g h t i n t o t h e
* exit b l o c k .
* /
.Ltail63 :
/ *
* Copy u p t o 4 8 b y t e s o f d a t a . A t t h i s p o i n t w e o n l y n e e d t h e
* bottom 6 b i t s o f c o u n t t o b e a c c u r a t e .
* /
ands t m p1 , c o u n t , #0x30
b. e q . L t i n y 1 5
cmp t m p1 w , #0x20
b. e q 1 f
b. l t 2 f
ldp A _ l , A _ h , [ s r c ] , #16
stp A _ l , A _ h , [ d s t ] , #16
1 :
ldp A _ l , A _ h , [ s r c ] , #16
stp A _ l , A _ h , [ d s t ] , #16
2 :
ldp A _ l , A _ h , [ s r c ] , #16
stp A _ l , A _ h , [ d s t ] , #16
.Ltiny15 :
/ *
* Prefer t o b r e a k o n e l d p / s t p i n t o s e v e r a l l o a d / s t o r e t o a c c e s s
* memory i n a n i n c r e a s i n g a d d r e s s o r d e r ,r a t h e r t h a n t o l o a d / s t o r e 1 6
* bytes f r o m ( s r c - 1 6 ) t o ( d s t - 1 6 ) a n d t o b a c k w a r d t h e s r c t o a l i g n e d
* address,w h i c h w a y i s u s e d i n o r i g i n a l c o r t e x m e m c p y . I f k e e p i n g
* the o r i g i n a l m e m c p y p r o c e s s h e r e , m e m m o v e n e e d t o s a t i s f y t h e
* precondition t h a t s r c a d d r e s s i s a t l e a s t 1 6 b y t e s b i g g e r t h a n d s t
* address,o t h e r w i s e s o m e s o u r c e d a t a w i l l b e o v e r w r i t t e n w h e n m e m o v e
* call m e m c p y d i r e c t l y . T o m a k e m e m m o v e s i m p l e r a n d d e c o u p l e t h e
* memcpy' s d e p e n d e n c y o n m e m m o v e , w i t h d r e w t h e o r i g i n a l p r o c e s s .
* /
tbz c o u n t , #3 , 1 f
ldr t m p1 , [ s r c ] , #8
str t m p1 , [ d s t ] , #8
1 :
tbz c o u n t , #2 , 2 f
ldr t m p1 w , [ s r c ] , #4
str t m p1 w , [ d s t ] , #4
2 :
tbz c o u n t , #1 , 3 f
ldrh t m p1 w , [ s r c ] , #2
strh t m p1 w , [ d s t ] , #2
3 :
tbz c o u n t , #0 , . L e x i t f u n c
ldrb t m p1 w , [ s r c ]
strb t m p1 w , [ d s t ]
.Lexitfunc :
ret
.Lcpy_over64 :
subs c o u n t , c o u n t , #128
b. g e . L c p y _ b o d y _ l a r g e
/ *
* Less t h a n 1 2 8 b y t e s t o c o p y , s o h a n d l e 6 4 h e r e a n d t h e n j u m p
* to t h e t a i l .
* /
ldp A _ l , A _ h , [ s r c ] ,#16
stp A _ l , A _ h , [ d s t ] ,#16
ldp B _ l , B _ h , [ s r c ] ,#16
ldp C _ l , C _ h , [ s r c ] ,#16
stp B _ l , B _ h , [ d s t ] ,#16
stp C _ l , C _ h , [ d s t ] ,#16
ldp D _ l , D _ h , [ s r c ] ,#16
stp D _ l , D _ h , [ d s t ] ,#16
tst c o u n t , #0x3f
b. n e . L t a i l 6 3
ret
/ *
* Critical l o o p . S t a r t a t a n e w c a c h e l i n e b o u n d a r y . A s s u m i n g
* 6 4 bytes p e r l i n e t h i s e n s u r e s t h e e n t i r e l o o p i s i n o n e l i n e .
* /
.p2align L1_CACHE_SHIFT
.Lcpy_body_large :
/* pre-get 64 bytes data. */
ldp A _ l , A _ h , [ s r c ] ,#16
ldp B _ l , B _ h , [ s r c ] ,#16
ldp C _ l , C _ h , [ s r c ] ,#16
ldp D _ l , D _ h , [ s r c ] ,#16
1 :
/ *
* interlace t h e l o a d o f n e x t 6 4 b y t e s d a t a b l o c k w i t h s t o r e o f t h e l a s t
* loaded 6 4 b y t e s d a t a .
* /
stp A _ l , A _ h , [ d s t ] ,#16
ldp A _ l , A _ h , [ s r c ] ,#16
stp B _ l , B _ h , [ d s t ] ,#16
ldp B _ l , B _ h , [ s r c ] ,#16
stp C _ l , C _ h , [ d s t ] ,#16
ldp C _ l , C _ h , [ s r c ] ,#16
stp D _ l , D _ h , [ d s t ] ,#16
ldp D _ l , D _ h , [ s r c ] ,#16
subs c o u n t , c o u n t , #64
b. g e 1 b
stp A _ l , A _ h , [ d s t ] ,#16
stp B _ l , B _ h , [ d s t ] ,#16
stp C _ l , C _ h , [ d s t ] ,#16
stp D _ l , D _ h , [ d s t ] ,#16
tst c o u n t , #0x3f
b. n e . L t a i l 6 3
ret
2013-03-21 20:16:43 +04:00
ENDPROC( m e m c p y )