2008-07-02 17:53:13 +04:00
/ *
* Copyright 2 0 0 8 V i t a l y M a y a t s k i k h < v m a y a t s k @redhat.com>
* Copyright 2 0 0 2 A n d i K l e e n , S u S E L a b s .
2005-04-17 02:20:36 +04:00
* Subject t o t h e G N U P u b l i c L i c e n s e v2 .
2008-07-02 17:53:13 +04:00
*
* Functions t o c o p y f r o m a n d t o u s e r s p a c e .
* /
2005-04-17 02:20:36 +04:00
2006-09-26 12:52:32 +04:00
# include < l i n u x / l i n k a g e . h >
# include < a s m / d w a r f2 . h >
2006-02-03 23:51:02 +03:00
# define F I X _ A L I G N M E N T 1
2006-09-26 12:52:39 +04:00
# include < a s m / c u r r e n t . h >
# include < a s m / a s m - o f f s e t s . h >
# include < a s m / t h r e a d _ i n f o . h >
# include < a s m / c p u f e a t u r e . h >
2011-05-18 02:29:15 +04:00
# include < a s m / a l t e r n a t i v e - a s m . h >
2012-04-20 23:19:51 +04:00
# include < a s m / a s m . h >
2006-09-26 12:52:39 +04:00
2011-05-18 02:29:15 +04:00
/ *
* By p l a c i n g f e a t u r e 2 a f t e r f e a t u r e 1 i n a l t i n s t r u c t i o n s s e c t i o n , w e l o g i c a l l y
* implement :
* If C P U h a s f e a t u r e 2 , j m p t o a l t 2 i s u s e d
* else i f C P U h a s f e a t u r e 1 , j m p t o a l t 1 i s u s e d
* else j m p t o o r i g i s u s e d .
* /
.macro ALTERNATIVE_JUMP feature1 ,f e a t u r e 2 ,o r i g ,a l t 1 ,a l t 2
2006-09-26 12:52:39 +04:00
0 :
.byte 0xe9 /* 32bit jump */
.long \ orig- 1 f / * b y d e f a u l t j u m p t o o r i g * /
1 :
.section .altinstr_replacement , " ax"
2008-07-02 17:53:13 +04:00
2 : .byte 0xe9 /* near jump with 32bit immediate */
2011-05-18 02:29:15 +04:00
.long \ alt1 - 1 b / * o f f s e t * / / * o r a l t e r n a t i v e l y t o a l t 1 * /
3 : .byte 0xe9 /* near jump with 32bit immediate */
.long \ alt2 - 1 b / * o f f s e t * / / * o r a l t e r n a t i v e l y t o a l t 2 * /
2006-09-26 12:52:39 +04:00
.previous
2011-05-18 02:29:15 +04:00
2006-09-26 12:52:39 +04:00
.section .altinstructions , " a"
2011-05-18 02:29:15 +04:00
altinstruction_ e n t r y 0 b ,2 b ,\ f e a t u r e 1 ,5 ,5
altinstruction_ e n t r y 0 b ,3 b ,\ f e a t u r e 2 ,5 ,5
2006-09-26 12:52:39 +04:00
.previous
.endm
2005-04-17 02:20:36 +04:00
2008-07-02 17:53:13 +04:00
.macro ALIGN_DESTINATION
# ifdef F I X _ A L I G N M E N T
/* check for bad alignment of destination */
movl % e d i ,% e c x
andl $ 7 ,% e c x
jz 1 0 2 f / * a l r e a d y a l i g n e d * /
subl $ 8 ,% e c x
negl % e c x
subl % e c x ,% e d x
100 : movb ( % r s i ) ,% a l
101 : movb % a l ,( % r d i )
incq % r s i
incq % r d i
decl % e c x
jnz 1 0 0 b
102 :
.section .fixup , " ax"
2008-07-30 15:30:14 +04:00
103 : addl % e c x ,% e d x / * e c x i s z e r o r e s t a l s o * /
2008-07-02 17:53:13 +04:00
jmp c o p y _ u s e r _ h a n d l e _ t a i l
.previous
2012-04-20 23:19:51 +04:00
_ ASM_ E X T A B L E ( 1 0 0 b ,1 0 3 b )
_ ASM_ E X T A B L E ( 1 0 1 b ,1 0 3 b )
2008-07-02 17:53:13 +04:00
# endif
.endm
/* Standard copy_to_user with segment limit checking */
2009-11-16 17:42:18 +03:00
ENTRY( _ c o p y _ t o _ u s e r )
2006-09-26 12:52:32 +04:00
CFI_ S T A R T P R O C
2005-04-17 02:20:36 +04:00
GET_ T H R E A D _ I N F O ( % r a x )
movq % r d i ,% r c x
addq % r d x ,% r c x
2008-07-02 17:53:13 +04:00
jc b a d _ t o _ u s e r
2008-06-24 18:19:35 +04:00
cmpq T I _ a d d r _ l i m i t ( % r a x ) ,% r c x
x86, 64-bit: Fix copy_[to/from]_user() checks for the userspace address limit
As reported in BZ #30352:
https://bugzilla.kernel.org/show_bug.cgi?id=30352
there's a kernel bug related to reading the last allowed page on x86_64.
The _copy_to_user() and _copy_from_user() functions use the following
check for address limit:
if (buf + size >= limit)
fail();
while it should be more permissive:
if (buf + size > limit)
fail();
That's because the size represents the number of bytes being
read/write from/to buf address AND including the buf address.
So the copy function will actually never touch the limit
address even if "buf + size == limit".
Following program fails to use the last page as buffer
due to the wrong limit check:
#include <sys/mman.h>
#include <sys/socket.h>
#include <assert.h>
#define PAGE_SIZE (4096)
#define LAST_PAGE ((void*)(0x7fffffffe000))
int main()
{
int fds[2], err;
void * ptr = mmap(LAST_PAGE, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
assert(ptr == LAST_PAGE);
err = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
assert(err == 0);
err = send(fds[0], ptr, PAGE_SIZE, 0);
perror("send");
assert(err == PAGE_SIZE);
err = recv(fds[1], ptr, PAGE_SIZE, MSG_WAITALL);
perror("recv");
assert(err == PAGE_SIZE);
return 0;
}
The other place checking the addr limit is the access_ok() function,
which is working properly. There's just a misleading comment
for the __range_not_ok() macro - which this patch fixes as well.
The last page of the user-space address range is a guard page and
Brian Gerst observed that the guard page itself due to an erratum on K8 cpus
(#121 Sequential Execution Across Non-Canonical Boundary Causes Processor
Hang).
However, the test code is using the last valid page before the guard page.
The bug is that the last byte before the guard page can't be read
because of the off-by-one error. The guard page is left in place.
This bug would normally not show up because the last page is
part of the process stack and never accessed via syscalls.
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/1305210630-7136-1-git-send-email-jolsa@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-05-12 18:30:30 +04:00
ja b a d _ t o _ u s e r
2011-05-18 02:29:15 +04:00
ALTERNATIVE_ J U M P X 8 6 _ F E A T U R E _ R E P _ G O O D ,X 8 6 _ F E A T U R E _ E R M S , \
copy_ u s e r _ g e n e r i c _ u n r o l l e d ,c o p y _ u s e r _ g e n e r i c _ s t r i n g , \
copy_ u s e r _ e n h a n c e d _ f a s t _ s t r i n g
2006-09-26 12:52:32 +04:00
CFI_ E N D P R O C
2009-11-16 17:42:18 +03:00
ENDPROC( _ c o p y _ t o _ u s e r )
2006-02-03 23:51:02 +03:00
2008-07-02 17:53:13 +04:00
/* Standard copy_from_user with segment limit checking */
2009-09-26 16:33:01 +04:00
ENTRY( _ c o p y _ f r o m _ u s e r )
2006-09-26 12:52:39 +04:00
CFI_ S T A R T P R O C
2008-07-02 17:53:13 +04:00
GET_ T H R E A D _ I N F O ( % r a x )
movq % r s i ,% r c x
addq % r d x ,% r c x
jc b a d _ f r o m _ u s e r
cmpq T I _ a d d r _ l i m i t ( % r a x ) ,% r c x
x86, 64-bit: Fix copy_[to/from]_user() checks for the userspace address limit
As reported in BZ #30352:
https://bugzilla.kernel.org/show_bug.cgi?id=30352
there's a kernel bug related to reading the last allowed page on x86_64.
The _copy_to_user() and _copy_from_user() functions use the following
check for address limit:
if (buf + size >= limit)
fail();
while it should be more permissive:
if (buf + size > limit)
fail();
That's because the size represents the number of bytes being
read/write from/to buf address AND including the buf address.
So the copy function will actually never touch the limit
address even if "buf + size == limit".
Following program fails to use the last page as buffer
due to the wrong limit check:
#include <sys/mman.h>
#include <sys/socket.h>
#include <assert.h>
#define PAGE_SIZE (4096)
#define LAST_PAGE ((void*)(0x7fffffffe000))
int main()
{
int fds[2], err;
void * ptr = mmap(LAST_PAGE, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
assert(ptr == LAST_PAGE);
err = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds);
assert(err == 0);
err = send(fds[0], ptr, PAGE_SIZE, 0);
perror("send");
assert(err == PAGE_SIZE);
err = recv(fds[1], ptr, PAGE_SIZE, MSG_WAITALL);
perror("recv");
assert(err == PAGE_SIZE);
return 0;
}
The other place checking the addr limit is the access_ok() function,
which is working properly. There's just a misleading comment
for the __range_not_ok() macro - which this patch fixes as well.
The last page of the user-space address range is a guard page and
Brian Gerst observed that the guard page itself due to an erratum on K8 cpus
(#121 Sequential Execution Across Non-Canonical Boundary Causes Processor
Hang).
However, the test code is using the last valid page before the guard page.
The bug is that the last byte before the guard page can't be read
because of the off-by-one error. The guard page is left in place.
This bug would normally not show up because the last page is
part of the process stack and never accessed via syscalls.
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Brian Gerst <brgerst@gmail.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/1305210630-7136-1-git-send-email-jolsa@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-05-12 18:30:30 +04:00
ja b a d _ f r o m _ u s e r
2011-05-18 02:29:15 +04:00
ALTERNATIVE_ J U M P X 8 6 _ F E A T U R E _ R E P _ G O O D ,X 8 6 _ F E A T U R E _ E R M S , \
copy_ u s e r _ g e n e r i c _ u n r o l l e d ,c o p y _ u s e r _ g e n e r i c _ s t r i n g , \
copy_ u s e r _ e n h a n c e d _ f a s t _ s t r i n g
2006-09-26 12:52:39 +04:00
CFI_ E N D P R O C
2009-09-26 16:33:01 +04:00
ENDPROC( _ c o p y _ f r o m _ u s e r )
2006-09-26 12:52:39 +04:00
2005-04-17 02:20:36 +04:00
.section .fixup , " ax"
/* must zero dest */
2008-07-02 17:53:13 +04:00
ENTRY( b a d _ f r o m _ u s e r )
2005-04-17 02:20:36 +04:00
bad_from_user :
2006-09-26 12:52:32 +04:00
CFI_ S T A R T P R O C
2005-04-17 02:20:36 +04:00
movl % e d x ,% e c x
xorl % e a x ,% e a x
rep
stosb
bad_to_user :
2008-07-02 17:53:13 +04:00
movl % e d x ,% e a x
2005-04-17 02:20:36 +04:00
ret
2006-09-26 12:52:32 +04:00
CFI_ E N D P R O C
2008-07-02 17:53:13 +04:00
ENDPROC( b a d _ f r o m _ u s e r )
2005-04-17 02:20:36 +04:00
.previous
2008-07-02 17:53:13 +04:00
2005-04-17 02:20:36 +04:00
/ *
2006-09-26 12:52:39 +04:00
* copy_ u s e r _ g e n e r i c _ u n r o l l e d - m e m o r y c o p y w i t h e x c e p t i o n h a n d l i n g .
2008-07-02 17:53:13 +04:00
* This v e r s i o n i s f o r C P U s l i k e P 4 t h a t d o n ' t h a v e e f f i c i e n t m i c r o
* code f o r r e p m o v s q
*
* Input :
2005-04-17 02:20:36 +04:00
* rdi d e s t i n a t i o n
* rsi s o u r c e
* rdx c o u n t
*
2008-07-02 17:53:13 +04:00
* Output :
2011-03-17 22:24:16 +03:00
* eax u n c o p i e d b y t e s o r 0 i f s u c c e s s f u l .
2005-04-17 02:20:36 +04:00
* /
2006-09-26 12:52:39 +04:00
ENTRY( c o p y _ u s e r _ g e n e r i c _ u n r o l l e d )
2006-09-26 12:52:32 +04:00
CFI_ S T A R T P R O C
2008-07-02 17:53:13 +04:00
cmpl $ 8 ,% e d x
jb 2 0 f / * l e s s t h e n 8 b y t e s , g o t o b y t e c o p y l o o p * /
ALIGN_ D E S T I N A T I O N
movl % e d x ,% e c x
andl $ 6 3 ,% e d x
shrl $ 6 ,% e c x
jz 1 7 f
1 : movq ( % r s i ) ,% r8
2 : movq 1 * 8 ( % r s i ) ,% r9
3 : movq 2 * 8 ( % r s i ) ,% r10
4 : movq 3 * 8 ( % r s i ) ,% r11
5 : movq % r8 ,( % r d i )
6 : movq % r9 ,1 * 8 ( % r d i )
7 : movq % r10 ,2 * 8 ( % r d i )
8 : movq % r11 ,3 * 8 ( % r d i )
9 : movq 4 * 8 ( % r s i ) ,% r8
10 : movq 5 * 8 ( % r s i ) ,% r9
11 : movq 6 * 8 ( % r s i ) ,% r10
12 : movq 7 * 8 ( % r s i ) ,% r11
13 : movq % r8 ,4 * 8 ( % r d i )
14 : movq % r9 ,5 * 8 ( % r d i )
15 : movq % r10 ,6 * 8 ( % r d i )
16 : movq % r11 ,7 * 8 ( % r d i )
2006-02-03 23:51:02 +03:00
leaq 6 4 ( % r s i ) ,% r s i
leaq 6 4 ( % r d i ) ,% r d i
decl % e c x
2008-07-02 17:53:13 +04:00
jnz 1 b
17 : movl % e d x ,% e c x
andl $ 7 ,% e d x
shrl $ 3 ,% e c x
jz 2 0 f
18 : movq ( % r s i ) ,% r8
19 : movq % r8 ,( % r d i )
2006-02-03 23:51:02 +03:00
leaq 8 ( % r s i ) ,% r s i
2008-07-02 17:53:13 +04:00
leaq 8 ( % r d i ) ,% r d i
decl % e c x
jnz 1 8 b
20 : andl % e d x ,% e d x
jz 2 3 f
2006-02-03 23:51:02 +03:00
movl % e d x ,% e c x
2008-07-02 17:53:13 +04:00
21 : movb ( % r s i ) ,% a l
22 : movb % a l ,( % r d i )
2006-02-03 23:51:02 +03:00
incq % r s i
2008-07-02 17:53:13 +04:00
incq % r d i
2006-02-03 23:51:02 +03:00
decl % e c x
2008-07-02 17:53:13 +04:00
jnz 2 1 b
23 : xor % e a x ,% e a x
2006-02-03 23:51:02 +03:00
ret
2008-07-02 17:53:13 +04:00
.section .fixup , " ax"
30 : shll $ 6 ,% e c x
addl % e c x ,% e d x
jmp 6 0 f
2008-07-10 23:52:52 +04:00
40 : lea ( % r d x ,% r c x ,8 ) ,% r d x
2008-07-02 17:53:13 +04:00
jmp 6 0 f
50 : movl % e c x ,% e d x
60 : jmp c o p y _ u s e r _ h a n d l e _ t a i l / * e c x i s z e r o r e s t a l s o * /
.previous
2006-02-03 23:51:02 +03:00
2012-04-20 23:19:51 +04:00
_ ASM_ E X T A B L E ( 1 b ,3 0 b )
_ ASM_ E X T A B L E ( 2 b ,3 0 b )
_ ASM_ E X T A B L E ( 3 b ,3 0 b )
_ ASM_ E X T A B L E ( 4 b ,3 0 b )
_ ASM_ E X T A B L E ( 5 b ,3 0 b )
_ ASM_ E X T A B L E ( 6 b ,3 0 b )
_ ASM_ E X T A B L E ( 7 b ,3 0 b )
_ ASM_ E X T A B L E ( 8 b ,3 0 b )
_ ASM_ E X T A B L E ( 9 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 0 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 1 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 2 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 3 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 4 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 5 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 6 b ,3 0 b )
_ ASM_ E X T A B L E ( 1 8 b ,4 0 b )
_ ASM_ E X T A B L E ( 1 9 b ,4 0 b )
_ ASM_ E X T A B L E ( 2 1 b ,5 0 b )
_ ASM_ E X T A B L E ( 2 2 b ,5 0 b )
2006-09-26 12:52:32 +04:00
CFI_ E N D P R O C
2008-07-02 17:53:13 +04:00
ENDPROC( c o p y _ u s e r _ g e n e r i c _ u n r o l l e d )
2006-09-26 12:52:32 +04:00
2008-07-02 17:53:13 +04:00
/ * Some C P U s r u n f a s t e r u s i n g t h e s t r i n g c o p y i n s t r u c t i o n s .
* This i s a l s o a l o t s i m p l e r . U s e t h e m w h e n p o s s i b l e .
*
* Only 4 G B o f c o p y i s s u p p o r t e d . T h i s s h o u l d n ' t b e a p r o b l e m
* because t h e k e r n e l n o r m a l l y o n l y w r i t e s f r o m / t o p a g e s i z e d c h u n k s
* even i f u s e r s p a c e p a s s e d a l o n g e r b u f f e r .
* And m o r e w o u l d b e d a n g e r o u s b e c a u s e b o t h I n t e l a n d A M D h a v e
* errata w i t h r e p m o v s q > 4 G B . I f s o m e o n e f e e l s t h e n e e d t o f i x
* this p l e a s e c o n s i d e r t h i s .
*
* Input :
* rdi d e s t i n a t i o n
* rsi s o u r c e
* rdx c o u n t
*
* Output :
* eax u n c o p i e d b y t e s o r 0 i f s u c c e s s f u l .
* /
2006-09-26 12:52:39 +04:00
ENTRY( c o p y _ u s e r _ g e n e r i c _ s t r i n g )
2006-09-26 12:52:32 +04:00
CFI_ S T A R T P R O C
2008-07-02 17:53:13 +04:00
andl % e d x ,% e d x
jz 4 f
cmpl $ 8 ,% e d x
jb 2 f / * l e s s t h a n 8 b y t e s , g o t o b y t e c o p y l o o p * /
ALIGN_ D E S T I N A T I O N
2005-04-17 02:20:36 +04:00
movl % e d x ,% e c x
shrl $ 3 ,% e c x
2008-07-02 17:53:13 +04:00
andl $ 7 ,% e d x
1 : rep
2006-09-26 12:52:39 +04:00
movsq
2008-07-02 17:53:13 +04:00
2 : movl % e d x ,% e c x
3 : rep
movsb
4 : xorl % e a x ,% e a x
2005-04-17 02:20:36 +04:00
ret
2006-09-26 12:52:39 +04:00
2008-07-02 17:53:13 +04:00
.section .fixup , " ax"
2008-07-10 23:52:52 +04:00
11 : lea ( % r d x ,% r c x ,8 ) ,% r c x
2008-07-02 17:53:13 +04:00
12 : movl % e c x ,% e d x / * e c x i s z e r o r e s t a l s o * /
jmp c o p y _ u s e r _ h a n d l e _ t a i l
.previous
2006-01-12 00:44:45 +03:00
2012-04-20 23:19:51 +04:00
_ ASM_ E X T A B L E ( 1 b ,1 1 b )
_ ASM_ E X T A B L E ( 3 b ,1 2 b )
2008-07-02 17:53:13 +04:00
CFI_ E N D P R O C
ENDPROC( c o p y _ u s e r _ g e n e r i c _ s t r i n g )
2011-05-18 02:29:15 +04:00
/ *
* Some C P U s a r e a d d i n g e n h a n c e d R E P M O V S B / S T O S B i n s t r u c t i o n s .
* It' s r e c o m m e n d e d t o u s e e n h a n c e d R E P M O V S B / S T O S B i f i t ' s e n a b l e d .
*
* Input :
* rdi d e s t i n a t i o n
* rsi s o u r c e
* rdx c o u n t
*
* Output :
* eax u n c o p i e d b y t e s o r 0 i f s u c c e s s f u l .
* /
ENTRY( c o p y _ u s e r _ e n h a n c e d _ f a s t _ s t r i n g )
CFI_ S T A R T P R O C
andl % e d x ,% e d x
jz 2 f
movl % e d x ,% e c x
1 : rep
movsb
2 : xorl % e a x ,% e a x
ret
.section .fixup , " ax"
12 : movl % e c x ,% e d x / * e c x i s z e r o r e s t a l s o * /
jmp c o p y _ u s e r _ h a n d l e _ t a i l
.previous
2012-04-20 23:19:51 +04:00
_ ASM_ E X T A B L E ( 1 b ,1 2 b )
2011-05-18 02:29:15 +04:00
CFI_ E N D P R O C
ENDPROC( c o p y _ u s e r _ e n h a n c e d _ f a s t _ s t r i n g )