2019-06-03 07:44:50 +02:00
/* SPDX-License-Identifier: GPL-2.0-only */
2013-03-21 16:16:43 +00:00
/ *
2021-06-02 16:13:58 +01:00
* Copyright ( c ) 2 0 1 2 - 2 0 2 1 , A r m L i m i t e d .
2014-04-28 06:11:29 +01:00
*
2021-05-27 16:34:46 +01:00
* Adapted f r o m t h e o r i g i n a l a t :
2021-06-02 16:13:58 +01:00
* https : / / github. c o m / A R M - s o f t w a r e / o p t i m i z e d - r o u t i n e s / b l o b / a f d62 4 4 a1 f8 d92 2 9 / s t r i n g / a a r c h64 / m e m c p y . S
2013-03-21 16:16:43 +00:00
* /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
2021-05-27 16:34:46 +01:00
/ * Assumptions :
*
* ARMv8 - a , A A r c h64 , u n a l i g n e d a c c e s s e s .
2013-03-21 16:16:43 +00:00
*
* /
2014-04-28 06:11:29 +01:00
2021-05-27 16:34:46 +01:00
# define L ( l a b e l ) . L ## l a b e l
2014-04-28 06:11:29 +01:00
2021-05-27 16:34:46 +01:00
# define d s t i n x0
# define s r c x1
# define c o u n t x2
# define d s t x3
# define s r c e n d x4
# define d s t e n d x5
# define A _ l x6
# define A _ l w w6
# define A _ h x7
# define B _ l x8
# define B _ l w w8
# define B _ h x9
# define C _ l x10
# define C _ l w w10
# define C _ h x11
# define D _ l x12
# define D _ h x13
# define E _ l x14
# define E _ h x15
# define F _ l x16
# define F _ h x17
# define G _ l c o u n t
# define G _ h d s t
# define H _ l s r c
# define H _ h s r c e n d
# define t m p1 x14
2014-04-28 06:11:29 +01:00
2021-05-27 16:34:46 +01:00
/ * This i m p l e m e n t a t i o n h a n d l e s o v e r l a p s a n d s u p p o r t s b o t h m e m c p y a n d m e m m o v e
from a s i n g l e e n t r y p o i n t . I t u s e s u n a l i g n e d a c c e s s e s a n d b r a n c h l e s s
sequences t o k e e p t h e c o d e s m a l l , s i m p l e a n d i m p r o v e p e r f o r m a n c e .
2014-04-28 06:11:29 +01:00
2021-05-27 16:34:46 +01:00
Copies a r e s p l i t i n t o 3 m a i n c a s e s : s m a l l c o p i e s o f u p t o 3 2 b y t e s , m e d i u m
copies o f u p t o 1 2 8 b y t e s , a n d l a r g e c o p i e s . T h e o v e r h e a d o f t h e o v e r l a p
check i s n e g l i g i b l e s i n c e i t i s o n l y r e q u i r e d f o r l a r g e c o p i e s .
2014-04-28 06:11:29 +01:00
2021-05-27 16:34:46 +01:00
Large c o p i e s u s e a s o f t w a r e p i p e l i n e d l o o p p r o c e s s i n g 6 4 b y t e s p e r i t e r a t i o n .
The d e s t i n a t i o n p o i n t e r i s 1 6 - b y t e a l i g n e d t o m i n i m i z e u n a l i g n e d a c c e s s e s .
The l o o p t a i l i s h a n d l e d b y a l w a y s c o p y i n g 6 4 b y t e s f r o m t h e e n d .
* /
2014-04-28 06:11:29 +01:00
2021-05-27 16:34:46 +01:00
SYM_ F U N C _ S T A R T _ A L I A S ( _ _ m e m m o v e )
SYM_ F U N C _ S T A R T _ W E A K _ A L I A S _ P I ( m e m m o v e )
2020-01-06 19:58:17 +00:00
SYM_ F U N C _ S T A R T _ A L I A S ( _ _ m e m c p y )
2020-10-29 11:19:51 -07:00
SYM_ F U N C _ S T A R T _ W E A K _ P I ( m e m c p y )
2021-05-27 16:34:46 +01:00
add s r c e n d , s r c , c o u n t
add d s t e n d , d s t i n , c o u n t
cmp c o u n t , 1 2 8
b. h i L ( c o p y _ l o n g )
cmp c o u n t , 3 2
b. h i L ( c o p y 3 2 _ 1 2 8 )
/* Small copies: 0..32 bytes. */
cmp c o u n t , 1 6
b. l o L ( c o p y 1 6 )
ldp A _ l , A _ h , [ s r c ]
ldp D _ l , D _ h , [ s r c e n d , - 1 6 ]
stp A _ l , A _ h , [ d s t i n ]
stp D _ l , D _ h , [ d s t e n d , - 1 6 ]
ret
/* Copy 8-15 bytes. */
L( c o p y 1 6 ) :
tbz c o u n t , 3 , L ( c o p y 8 )
ldr A _ l , [ s r c ]
ldr A _ h , [ s r c e n d , - 8 ]
str A _ l , [ d s t i n ]
str A _ h , [ d s t e n d , - 8 ]
ret
.p2align 3
/* Copy 4-7 bytes. */
L( c o p y 8 ) :
tbz c o u n t , 2 , L ( c o p y 4 )
ldr A _ l w , [ s r c ]
ldr B _ l w , [ s r c e n d , - 4 ]
str A _ l w , [ d s t i n ]
str B _ l w , [ d s t e n d , - 4 ]
ret
/* Copy 0..3 bytes using a branchless sequence. */
L( c o p y 4 ) :
cbz c o u n t , L ( c o p y 0 )
lsr t m p1 , c o u n t , 1
ldrb A _ l w , [ s r c ]
ldrb C _ l w , [ s r c e n d , - 1 ]
ldrb B _ l w , [ s r c , t m p1 ]
strb A _ l w , [ d s t i n ]
strb B _ l w , [ d s t i n , t m p1 ]
strb C _ l w , [ d s t e n d , - 1 ]
L( c o p y 0 ) :
ret
.p2align 4
/* Medium copies: 33..128 bytes. */
L( c o p y 3 2 _ 1 2 8 ) :
ldp A _ l , A _ h , [ s r c ]
ldp B _ l , B _ h , [ s r c , 1 6 ]
ldp C _ l , C _ h , [ s r c e n d , - 3 2 ]
ldp D _ l , D _ h , [ s r c e n d , - 1 6 ]
cmp c o u n t , 6 4
b. h i L ( c o p y 1 2 8 )
stp A _ l , A _ h , [ d s t i n ]
stp B _ l , B _ h , [ d s t i n , 1 6 ]
stp C _ l , C _ h , [ d s t e n d , - 3 2 ]
stp D _ l , D _ h , [ d s t e n d , - 1 6 ]
2014-04-28 06:11:29 +01:00
ret
2021-05-27 16:34:46 +01:00
.p2align 4
/* Copy 65..128 bytes. */
L( c o p y 1 2 8 ) :
ldp E _ l , E _ h , [ s r c , 3 2 ]
ldp F _ l , F _ h , [ s r c , 4 8 ]
cmp c o u n t , 9 6
b. l s L ( c o p y 9 6 )
ldp G _ l , G _ h , [ s r c e n d , - 6 4 ]
ldp H _ l , H _ h , [ s r c e n d , - 4 8 ]
stp G _ l , G _ h , [ d s t e n d , - 6 4 ]
stp H _ l , H _ h , [ d s t e n d , - 4 8 ]
L( c o p y 9 6 ) :
stp A _ l , A _ h , [ d s t i n ]
stp B _ l , B _ h , [ d s t i n , 1 6 ]
stp E _ l , E _ h , [ d s t i n , 3 2 ]
stp F _ l , F _ h , [ d s t i n , 4 8 ]
stp C _ l , C _ h , [ d s t e n d , - 3 2 ]
stp D _ l , D _ h , [ d s t e n d , - 1 6 ]
ret
.p2align 4
/* Copy more than 128 bytes. */
L( c o p y _ l o n g ) :
/* Use backwards copy if there is an overlap. */
sub t m p1 , d s t i n , s r c
cbz t m p1 , L ( c o p y 0 )
cmp t m p1 , c o u n t
b. l o L ( c o p y _ l o n g _ b a c k w a r d s )
/* Copy 16 bytes and then align dst to 16-byte alignment. */
ldp D _ l , D _ h , [ s r c ]
and t m p1 , d s t i n , 1 5
bic d s t , d s t i n , 1 5
sub s r c , s r c , t m p1
add c o u n t , c o u n t , t m p1 / * C o u n t i s n o w 1 6 t o o l a r g e . * /
ldp A _ l , A _ h , [ s r c , 1 6 ]
stp D _ l , D _ h , [ d s t i n ]
ldp B _ l , B _ h , [ s r c , 3 2 ]
ldp C _ l , C _ h , [ s r c , 4 8 ]
ldp D _ l , D _ h , [ s r c , 6 4 ] !
subs c o u n t , c o u n t , 1 2 8 + 1 6 / * T e s t a n d r e a d j u s t c o u n t . * /
b. l s L ( c o p y 6 4 _ f r o m _ e n d )
L( l o o p64 ) :
stp A _ l , A _ h , [ d s t , 1 6 ]
ldp A _ l , A _ h , [ s r c , 1 6 ]
stp B _ l , B _ h , [ d s t , 3 2 ]
ldp B _ l , B _ h , [ s r c , 3 2 ]
stp C _ l , C _ h , [ d s t , 4 8 ]
ldp C _ l , C _ h , [ s r c , 4 8 ]
stp D _ l , D _ h , [ d s t , 6 4 ] !
ldp D _ l , D _ h , [ s r c , 6 4 ] !
subs c o u n t , c o u n t , 6 4
b. h i L ( l o o p64 )
/* Write the last iteration and copy 64 bytes from the end. */
L( c o p y 6 4 _ f r o m _ e n d ) :
ldp E _ l , E _ h , [ s r c e n d , - 6 4 ]
stp A _ l , A _ h , [ d s t , 1 6 ]
ldp A _ l , A _ h , [ s r c e n d , - 4 8 ]
stp B _ l , B _ h , [ d s t , 3 2 ]
ldp B _ l , B _ h , [ s r c e n d , - 3 2 ]
stp C _ l , C _ h , [ d s t , 4 8 ]
ldp C _ l , C _ h , [ s r c e n d , - 1 6 ]
stp D _ l , D _ h , [ d s t , 6 4 ]
stp E _ l , E _ h , [ d s t e n d , - 6 4 ]
stp A _ l , A _ h , [ d s t e n d , - 4 8 ]
stp B _ l , B _ h , [ d s t e n d , - 3 2 ]
stp C _ l , C _ h , [ d s t e n d , - 1 6 ]
ret
.p2align 4
/ * Large b a c k w a r d s c o p y f o r o v e r l a p p i n g c o p i e s .
Copy 1 6 b y t e s a n d t h e n a l i g n d s t t o 1 6 - b y t e a l i g n m e n t . * /
L( c o p y _ l o n g _ b a c k w a r d s ) :
ldp D _ l , D _ h , [ s r c e n d , - 1 6 ]
and t m p1 , d s t e n d , 1 5
sub s r c e n d , s r c e n d , t m p1
sub c o u n t , c o u n t , t m p1
ldp A _ l , A _ h , [ s r c e n d , - 1 6 ]
stp D _ l , D _ h , [ d s t e n d , - 1 6 ]
ldp B _ l , B _ h , [ s r c e n d , - 3 2 ]
ldp C _ l , C _ h , [ s r c e n d , - 4 8 ]
ldp D _ l , D _ h , [ s r c e n d , - 6 4 ] !
sub d s t e n d , d s t e n d , t m p1
subs c o u n t , c o u n t , 1 2 8
b. l s L ( c o p y 6 4 _ f r o m _ s t a r t )
L( l o o p64 _ b a c k w a r d s ) :
stp A _ l , A _ h , [ d s t e n d , - 1 6 ]
ldp A _ l , A _ h , [ s r c e n d , - 1 6 ]
stp B _ l , B _ h , [ d s t e n d , - 3 2 ]
ldp B _ l , B _ h , [ s r c e n d , - 3 2 ]
stp C _ l , C _ h , [ d s t e n d , - 4 8 ]
ldp C _ l , C _ h , [ s r c e n d , - 4 8 ]
stp D _ l , D _ h , [ d s t e n d , - 6 4 ] !
ldp D _ l , D _ h , [ s r c e n d , - 6 4 ] !
subs c o u n t , c o u n t , 6 4
b. h i L ( l o o p64 _ b a c k w a r d s )
/* Write the last iteration and copy 64 bytes from the start. */
L( c o p y 6 4 _ f r o m _ s t a r t ) :
ldp G _ l , G _ h , [ s r c , 4 8 ]
stp A _ l , A _ h , [ d s t e n d , - 1 6 ]
ldp A _ l , A _ h , [ s r c , 3 2 ]
stp B _ l , B _ h , [ d s t e n d , - 3 2 ]
ldp B _ l , B _ h , [ s r c , 1 6 ]
stp C _ l , C _ h , [ d s t e n d , - 4 8 ]
ldp C _ l , C _ h , [ s r c ]
stp D _ l , D _ h , [ d s t e n d , - 6 4 ]
stp G _ l , G _ h , [ d s t i n , 4 8 ]
stp A _ l , A _ h , [ d s t i n , 3 2 ]
stp B _ l , B _ h , [ d s t i n , 1 6 ]
stp C _ l , C _ h , [ d s t i n ]
ret
2020-01-06 19:58:17 +00:00
SYM_ F U N C _ E N D _ P I ( m e m c p y )
2018-12-07 18:08:21 +00:00
EXPORT_ S Y M B O L ( m e m c p y )
2020-01-06 19:58:17 +00:00
SYM_ F U N C _ E N D _ A L I A S ( _ _ m e m c p y )
2018-12-07 18:08:21 +00:00
EXPORT_ S Y M B O L ( _ _ m e m c p y )
2021-05-27 16:34:46 +01:00
SYM_ F U N C _ E N D _ A L I A S _ P I ( m e m m o v e )
EXPORT_ S Y M B O L ( m e m m o v e )
SYM_ F U N C _ E N D _ A L I A S ( _ _ m e m m o v e )
2021-06-02 16:13:58 +01:00
EXPORT_ S Y M B O L ( _ _ m e m m o v e )