2008-07-02 17:53:13 +04:00
/ *
* Copyright 2 0 0 8 V i t a l y M a y a t s k i k h < v m a y a t s k @redhat.com>
* Copyright 2 0 0 2 A n d i K l e e n , S u S E L a b s .
2005-04-17 02:20:36 +04:00
* Subject t o t h e G N U P u b l i c L i c e n s e v2 .
2008-07-02 17:53:13 +04:00
*
* Functions t o c o p y f r o m a n d t o u s e r s p a c e .
* /
2005-04-17 02:20:36 +04:00
2006-09-26 12:52:32 +04:00
# include < l i n u x / l i n k a g e . h >
2006-09-26 12:52:39 +04:00
# include < a s m / c u r r e n t . h >
# include < a s m / a s m - o f f s e t s . h >
# include < a s m / t h r e a d _ i n f o . h >
2016-01-27 00:12:04 +03:00
# include < a s m / c p u f e a t u r e s . h >
2011-05-18 02:29:15 +04:00
# include < a s m / a l t e r n a t i v e - a s m . h >
2012-04-20 23:19:51 +04:00
# include < a s m / a s m . h >
2012-09-21 23:43:12 +04:00
# include < a s m / s m a p . h >
2016-01-11 19:04:34 +03:00
# include < a s m / e x p o r t . h >
2006-09-26 12:52:39 +04:00
2005-04-17 02:20:36 +04:00
/ *
2006-09-26 12:52:39 +04:00
* copy_ u s e r _ g e n e r i c _ u n r o l l e d - m e m o r y c o p y w i t h e x c e p t i o n h a n d l i n g .
2008-07-02 17:53:13 +04:00
* This v e r s i o n i s f o r C P U s l i k e P 4 t h a t d o n ' t h a v e e f f i c i e n t m i c r o
* code f o r r e p m o v s q
*
* Input :
2005-04-17 02:20:36 +04:00
* rdi d e s t i n a t i o n
* rsi s o u r c e
* rdx c o u n t
*
2008-07-02 17:53:13 +04:00
* Output :
2011-03-17 22:24:16 +03:00
* eax u n c o p i e d b y t e s o r 0 i f s u c c e s s f u l .
2005-04-17 02:20:36 +04:00
* /
2006-09-26 12:52:39 +04:00
ENTRY( c o p y _ u s e r _ g e n e r i c _ u n r o l l e d )
2012-09-21 23:43:12 +04:00
ASM_ S T A C
2008-07-02 17:53:13 +04:00
cmpl $ 8 ,% e d x
jb 2 0 f / * l e s s t h e n 8 b y t e s , g o t o b y t e c o p y l o o p * /
ALIGN_ D E S T I N A T I O N
movl % e d x ,% e c x
andl $ 6 3 ,% e d x
shrl $ 6 ,% e c x
jz 1 7 f
1 : movq ( % r s i ) ,% r8
2 : movq 1 * 8 ( % r s i ) ,% r9
3 : movq 2 * 8 ( % r s i ) ,% r10
4 : movq 3 * 8 ( % r s i ) ,% r11
5 : movq % r8 ,( % r d i )
6 : movq % r9 ,1 * 8 ( % r d i )
7 : movq % r10 ,2 * 8 ( % r d i )
8 : movq % r11 ,3 * 8 ( % r d i )
9 : movq 4 * 8 ( % r s i ) ,% r8
10 : movq 5 * 8 ( % r s i ) ,% r9
11 : movq 6 * 8 ( % r s i ) ,% r10
12 : movq 7 * 8 ( % r s i ) ,% r11
13 : movq % r8 ,4 * 8 ( % r d i )
14 : movq % r9 ,5 * 8 ( % r d i )
15 : movq % r10 ,6 * 8 ( % r d i )
16 : movq % r11 ,7 * 8 ( % r d i )
2006-02-03 23:51:02 +03:00
leaq 6 4 ( % r s i ) ,% r s i
leaq 6 4 ( % r d i ) ,% r d i
decl % e c x
2008-07-02 17:53:13 +04:00
jnz 1 b
17 : movl % e d x ,% e c x
andl $ 7 ,% e d x
shrl $ 3 ,% e c x
jz 2 0 f
18 : movq ( % r s i ) ,% r8
19 : movq % r8 ,( % r d i )
2006-02-03 23:51:02 +03:00
leaq 8 ( % r s i ) ,% r s i
2008-07-02 17:53:13 +04:00
leaq 8 ( % r d i ) ,% r d i
decl % e c x
jnz 1 8 b
20 : andl % e d x ,% e d x
jz 2 3 f
2006-02-03 23:51:02 +03:00
movl % e d x ,% e c x
2008-07-02 17:53:13 +04:00
21 : movb ( % r s i ) ,% a l
22 : movb % a l ,( % r d i )
2006-02-03 23:51:02 +03:00
incq % r s i
2008-07-02 17:53:13 +04:00
incq % r d i
2006-02-03 23:51:02 +03:00
decl % e c x
2008-07-02 17:53:13 +04:00
jnz 2 1 b
23 : xor % e a x ,% e a x
2012-09-21 23:43:12 +04:00
ASM_ C L A C
2006-02-03 23:51:02 +03:00
ret
2008-07-02 17:53:13 +04:00
.section .fixup , " ax"
30 : shll $ 6 ,% e c x
addl % e c x ,% e d x
jmp 6 0 f
2013-11-21 00:50:51 +04:00
40 : leal ( % r d x ,% r c x ,8 ) ,% e d x
2008-07-02 17:53:13 +04:00
jmp 6 0 f
50 : movl % e c x ,% e d x
60 : jmp c o p y _ u s e r _ h a n d l e _ t a i l / * e c x i s z e r o r e s t a l s o * /
.previous
2006-02-03 23:51:02 +03:00
2012-04-20 23:19:51 +04:00
_ ASM_ E X T A B L E ( 1 b ,3 0 b )
_ ASM_ E X T A B L E ( 2 b ,3 0 b )
_ ASM_ E X T A B L E ( 3 b ,3 0 b )
_ ASM_ E X T A B L E ( 4 b ,3 0 b )
_ ASM_ E X T A B L E ( 5 b ,3 0 b )
_ ASM_ E X T A B L E ( 6 b ,3 0 b )
_ ASM_ E X T A B L E ( 7 b ,3 0 b )
_ ASM_ E X T A B L E ( 8 b ,3 0 b )
_ ASM_ E X T A B L E ( 9 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 0 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 1 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 2 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 3 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 4 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 5 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 6 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 8 b ,4 0 b )
_ ASM_ E X T A B L E ( 1 9 b ,4 0 b )
_ ASM_ E X T A B L E ( 2 1 b ,5 0 b )
_ ASM_ E X T A B L E ( 2 2 b ,5 0 b )
2008-07-02 17:53:13 +04:00
ENDPROC( c o p y _ u s e r _ g e n e r i c _ u n r o l l e d )
2016-01-11 19:04:34 +03:00
EXPORT_ S Y M B O L ( c o p y _ u s e r _ g e n e r i c _ u n r o l l e d )
2006-09-26 12:52:32 +04:00
2008-07-02 17:53:13 +04:00
/ * Some C P U s r u n f a s t e r u s i n g t h e s t r i n g c o p y i n s t r u c t i o n s .
* This i s a l s o a l o t s i m p l e r . U s e t h e m w h e n p o s s i b l e .
*
* Only 4 G B o f c o p y i s s u p p o r t e d . T h i s s h o u l d n ' t b e a p r o b l e m
* because t h e k e r n e l n o r m a l l y o n l y w r i t e s f r o m / t o p a g e s i z e d c h u n k s
* even i f u s e r s p a c e p a s s e d a l o n g e r b u f f e r .
* And m o r e w o u l d b e d a n g e r o u s b e c a u s e b o t h I n t e l a n d A M D h a v e
* errata w i t h r e p m o v s q > 4 G B . I f s o m e o n e f e e l s t h e n e e d t o f i x
* this p l e a s e c o n s i d e r t h i s .
*
* Input :
* rdi d e s t i n a t i o n
* rsi s o u r c e
* rdx c o u n t
*
* Output :
* eax u n c o p i e d b y t e s o r 0 i f s u c c e s s f u l .
* /
2006-09-26 12:52:39 +04:00
ENTRY( c o p y _ u s e r _ g e n e r i c _ s t r i n g )
2012-09-21 23:43:12 +04:00
ASM_ S T A C
2008-07-02 17:53:13 +04:00
cmpl $ 8 ,% e d x
jb 2 f / * l e s s t h a n 8 b y t e s , g o t o b y t e c o p y l o o p * /
ALIGN_ D E S T I N A T I O N
2005-04-17 02:20:36 +04:00
movl % e d x ,% e c x
shrl $ 3 ,% e c x
2008-07-02 17:53:13 +04:00
andl $ 7 ,% e d x
1 : rep
2006-09-26 12:52:39 +04:00
movsq
2008-07-02 17:53:13 +04:00
2 : movl % e d x ,% e c x
3 : rep
movsb
2013-11-17 00:37:01 +04:00
xorl % e a x ,% e a x
2012-09-21 23:43:12 +04:00
ASM_ C L A C
2005-04-17 02:20:36 +04:00
ret
2006-09-26 12:52:39 +04:00
2008-07-02 17:53:13 +04:00
.section .fixup , " ax"
2013-11-21 00:50:51 +04:00
11 : leal ( % r d x ,% r c x ,8 ) ,% e c x
2008-07-02 17:53:13 +04:00
12 : movl % e c x ,% e d x / * e c x i s z e r o r e s t a l s o * /
jmp c o p y _ u s e r _ h a n d l e _ t a i l
.previous
2006-01-12 00:44:45 +03:00
2012-04-20 23:19:51 +04:00
_ ASM_ E X T A B L E ( 1 b ,1 1 b )
_ ASM_ E X T A B L E ( 3 b ,1 2 b )
2008-07-02 17:53:13 +04:00
ENDPROC( c o p y _ u s e r _ g e n e r i c _ s t r i n g )
2016-01-11 19:04:34 +03:00
EXPORT_ S Y M B O L ( c o p y _ u s e r _ g e n e r i c _ s t r i n g )
2011-05-18 02:29:15 +04:00
/ *
* Some C P U s a r e a d d i n g e n h a n c e d R E P M O V S B / S T O S B i n s t r u c t i o n s .
* It' s r e c o m m e n d e d t o u s e e n h a n c e d R E P M O V S B / S T O S B i f i t ' s e n a b l e d .
*
* Input :
* rdi d e s t i n a t i o n
* rsi s o u r c e
* rdx c o u n t
*
* Output :
* eax u n c o p i e d b y t e s o r 0 i f s u c c e s s f u l .
* /
ENTRY( c o p y _ u s e r _ e n h a n c e d _ f a s t _ s t r i n g )
2012-09-21 23:43:12 +04:00
ASM_ S T A C
2011-05-18 02:29:15 +04:00
movl % e d x ,% e c x
1 : rep
movsb
2013-11-17 00:37:01 +04:00
xorl % e a x ,% e a x
2012-09-21 23:43:12 +04:00
ASM_ C L A C
2011-05-18 02:29:15 +04:00
ret
.section .fixup , " ax"
12 : movl % e c x ,% e d x / * e c x i s z e r o r e s t a l s o * /
jmp c o p y _ u s e r _ h a n d l e _ t a i l
.previous
2012-04-20 23:19:51 +04:00
_ ASM_ E X T A B L E ( 1 b ,1 2 b )
2011-05-18 02:29:15 +04:00
ENDPROC( c o p y _ u s e r _ e n h a n c e d _ f a s t _ s t r i n g )
2016-01-11 19:04:34 +03:00
EXPORT_ S Y M B O L ( c o p y _ u s e r _ e n h a n c e d _ f a s t _ s t r i n g )
2015-05-13 20:42:24 +03:00
/ *
* copy_ u s e r _ n o c a c h e - U n c a c h e d m e m o r y c o p y w i t h e x c e p t i o n h a n d l i n g
2016-02-12 00:24:16 +03:00
* This w i l l f o r c e d e s t i n a t i o n o u t o f c a c h e f o r m o r e p e r f o r m a n c e .
*
* Note : Cached m e m o r y c o p y i s u s e d w h e n d e s t i n a t i o n o r s i z e i s n o t
* naturally a l i g n e d . T h a t i s :
* - Require 8 - b y t e a l i g n m e n t w h e n s i z e i s 8 b y t e s o r l a r g e r .
2016-02-12 00:24:17 +03:00
* - Require 4 - b y t e a l i g n m e n t w h e n s i z e i s 4 b y t e s .
2015-05-13 20:42:24 +03:00
* /
ENTRY( _ _ c o p y _ u s e r _ n o c a c h e )
ASM_ S T A C
2016-02-12 00:24:16 +03:00
2016-02-12 00:24:17 +03:00
/* If size is less than 8 bytes, go to 4-byte copy */
2015-05-13 20:42:24 +03:00
cmpl $ 8 ,% e d x
2016-02-12 00:24:17 +03:00
jb . L _ 4 b _ n o c a c h e _ c o p y _ e n t r y
2016-02-12 00:24:16 +03:00
/* If destination is not 8-byte aligned, "cache" copy to align it */
2015-05-13 20:42:24 +03:00
ALIGN_ D E S T I N A T I O N
2016-02-12 00:24:16 +03:00
/* Set 4x8-byte copy count and remainder */
2015-05-13 20:42:24 +03:00
movl % e d x ,% e c x
andl $ 6 3 ,% e d x
shrl $ 6 ,% e c x
2016-02-12 00:24:16 +03:00
jz . L _ 8 b _ n o c a c h e _ c o p y _ e n t r y / * j u m p i f c o u n t i s 0 * /
/* Perform 4x8-byte nocache loop-copy */
.L_4x8b_nocache_copy_loop :
2015-05-13 20:42:24 +03:00
1 : movq ( % r s i ) ,% r8
2 : movq 1 * 8 ( % r s i ) ,% r9
3 : movq 2 * 8 ( % r s i ) ,% r10
4 : movq 3 * 8 ( % r s i ) ,% r11
5 : movnti % r8 ,( % r d i )
6 : movnti % r9 ,1 * 8 ( % r d i )
7 : movnti % r10 ,2 * 8 ( % r d i )
8 : movnti % r11 ,3 * 8 ( % r d i )
9 : movq 4 * 8 ( % r s i ) ,% r8
10 : movq 5 * 8 ( % r s i ) ,% r9
11 : movq 6 * 8 ( % r s i ) ,% r10
12 : movq 7 * 8 ( % r s i ) ,% r11
13 : movnti % r8 ,4 * 8 ( % r d i )
14 : movnti % r9 ,5 * 8 ( % r d i )
15 : movnti % r10 ,6 * 8 ( % r d i )
16 : movnti % r11 ,7 * 8 ( % r d i )
leaq 6 4 ( % r s i ) ,% r s i
leaq 6 4 ( % r d i ) ,% r d i
decl % e c x
2016-02-12 00:24:16 +03:00
jnz . L _ 4 x8 b _ n o c a c h e _ c o p y _ l o o p
/* Set 8-byte copy count and remainder */
.L_8b_nocache_copy_entry :
movl % e d x ,% e c x
2015-05-13 20:42:24 +03:00
andl $ 7 ,% e d x
shrl $ 3 ,% e c x
2016-02-12 00:24:17 +03:00
jz . L _ 4 b _ n o c a c h e _ c o p y _ e n t r y / * j u m p i f c o u n t i s 0 * /
2016-02-12 00:24:16 +03:00
/* Perform 8-byte nocache loop-copy */
.L_8b_nocache_copy_loop :
20 : movq ( % r s i ) ,% r8
21 : movnti % r8 ,( % r d i )
2015-05-13 20:42:24 +03:00
leaq 8 ( % r s i ) ,% r s i
leaq 8 ( % r d i ) ,% r d i
decl % e c x
2016-02-12 00:24:16 +03:00
jnz . L _ 8 b _ n o c a c h e _ c o p y _ l o o p
/* If no byte left, we're done */
2016-02-12 00:24:17 +03:00
.L_4b_nocache_copy_entry :
andl % e d x ,% e d x
jz . L _ f i n i s h _ c o p y
/* If destination is not 4-byte aligned, go to byte copy: */
movl % e d i ,% e c x
andl $ 3 ,% e c x
jnz . L _ 1 b _ c a c h e _ c o p y _ e n t r y
/* Set 4-byte copy count (1 or 0) and remainder */
2015-05-13 20:42:24 +03:00
movl % e d x ,% e c x
2016-02-12 00:24:17 +03:00
andl $ 3 ,% e d x
shrl $ 2 ,% e c x
jz . L _ 1 b _ c a c h e _ c o p y _ e n t r y / * j u m p i f c o u n t i s 0 * /
/* Perform 4-byte nocache copy: */
30 : movl ( % r s i ) ,% r8 d
31 : movnti % r8 d ,( % r d i )
leaq 4 ( % r s i ) ,% r s i
leaq 4 ( % r d i ) ,% r d i
/* If no bytes left, we're done: */
2016-02-12 00:24:16 +03:00
andl % e d x ,% e d x
jz . L _ f i n i s h _ c o p y
/* Perform byte "cache" loop-copy for the remainder */
2016-02-12 00:24:17 +03:00
.L_1b_cache_copy_entry :
2015-05-13 20:42:24 +03:00
movl % e d x ,% e c x
2016-02-12 00:24:16 +03:00
.L_1b_cache_copy_loop :
40 : movb ( % r s i ) ,% a l
41 : movb % a l ,( % r d i )
2015-05-13 20:42:24 +03:00
incq % r s i
incq % r d i
decl % e c x
2016-02-12 00:24:16 +03:00
jnz . L _ 1 b _ c a c h e _ c o p y _ l o o p
/* Finished copying; fence the prior stores */
.L_finish_copy :
xorl % e a x ,% e a x
2015-05-13 20:42:24 +03:00
ASM_ C L A C
sfence
ret
.section .fixup , " ax"
2016-02-12 00:24:16 +03:00
.L_fixup_4x8b_copy :
shll $ 6 ,% e c x
2015-05-13 20:42:24 +03:00
addl % e c x ,% e d x
2016-02-12 00:24:16 +03:00
jmp . L _ f i x u p _ h a n d l e _ t a i l
.L_fixup_8b_copy :
lea ( % r d x ,% r c x ,8 ) ,% r d x
jmp . L _ f i x u p _ h a n d l e _ t a i l
2016-02-12 00:24:17 +03:00
.L_fixup_4b_copy :
lea ( % r d x ,% r c x ,4 ) ,% r d x
jmp . L _ f i x u p _ h a n d l e _ t a i l
2016-02-12 00:24:16 +03:00
.L_fixup_1b_copy :
movl % e c x ,% e d x
.L_fixup_handle_tail :
sfence
2015-05-13 20:42:24 +03:00
jmp c o p y _ u s e r _ h a n d l e _ t a i l
.previous
2016-02-12 00:24:16 +03:00
_ ASM_ E X T A B L E ( 1 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 2 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 3 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 4 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 5 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 6 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 7 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 8 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 9 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 1 0 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 1 1 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 1 2 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 1 3 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 1 4 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 1 5 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 1 6 b ,. L _ f i x u p _ 4 x8 b _ c o p y )
_ ASM_ E X T A B L E ( 2 0 b ,. L _ f i x u p _ 8 b _ c o p y )
_ ASM_ E X T A B L E ( 2 1 b ,. L _ f i x u p _ 8 b _ c o p y )
2016-02-12 00:24:17 +03:00
_ ASM_ E X T A B L E ( 3 0 b ,. L _ f i x u p _ 4 b _ c o p y )
_ ASM_ E X T A B L E ( 3 1 b ,. L _ f i x u p _ 4 b _ c o p y )
2016-02-12 00:24:16 +03:00
_ ASM_ E X T A B L E ( 4 0 b ,. L _ f i x u p _ 1 b _ c o p y )
_ ASM_ E X T A B L E ( 4 1 b ,. L _ f i x u p _ 1 b _ c o p y )
2015-05-13 20:42:24 +03:00
ENDPROC( _ _ c o p y _ u s e r _ n o c a c h e )
2016-01-11 19:04:34 +03:00
EXPORT_ S Y M B O L ( _ _ c o p y _ u s e r _ n o c a c h e )