2005-04-17 02:20:36 +04:00
# ifndef __i386_UACCESS_H
# define __i386_UACCESS_H
/*
* User space memory access functions
*/
# include <linux/errno.h>
# include <linux/thread_info.h>
# include <linux/prefetch.h>
# include <linux/string.h>
# include <asm/page.h>
# define VERIFY_READ 0
# define VERIFY_WRITE 1
/*
* The fs value determines whether argument validity checking should be
* performed or not . If get_fs ( ) = = USER_DS , checking is performed , with
* get_fs ( ) = = KERNEL_DS , checking is bypassed .
*
* For historical reasons , these macros are grossly misnamed .
*/
# define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
# define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFUL)
# define USER_DS MAKE_MM_SEG(PAGE_OFFSET)
# define get_ds() (KERNEL_DS)
# define get_fs() (current_thread_info()->addr_limit)
# define set_fs(x) (current_thread_info()->addr_limit = (x))
# define segment_eq(a,b) ((a).seg == (b).seg)
/*
* movsl can be slow when source and dest are not both 8 - byte aligned
*/
# ifdef CONFIG_X86_INTEL_USERCOPY
extern struct movsl_mask {
int mask ;
} ____cacheline_aligned_in_smp movsl_mask ;
# endif
# define __addr_ok(addr) ((unsigned long __force)(addr) < (current_thread_info()->addr_limit.seg))
/*
* Test whether a block of memory is a valid user space address .
* Returns 0 if the range is valid , nonzero otherwise .
*
* This is equivalent to the following test :
* ( u33 ) addr + ( u33 ) size > = ( u33 ) current - > addr_limit . seg
*
* This needs 33 - bit arithmetic . We have a carry . . .
*/
# define __range_ok(addr,size) ({ \
unsigned long flag , sum ; \
__chk_user_ptr ( addr ) ; \
asm ( " addl %3,%1 ; sbbl %0,%0; cmpl %1,%4; sbbl $0,%0 " \
: " =&r " ( flag ) , " =r " ( sum ) \
2006-06-23 13:04:31 +04:00
: " 1 " ( addr ) , " g " ( ( int ) ( size ) ) , " rm " ( current_thread_info ( ) - > addr_limit . seg ) ) ; \
2005-04-17 02:20:36 +04:00
flag ; } )
/**
* access_ok : - Checks if a user space pointer is valid
* @ type : Type of access : % VERIFY_READ or % VERIFY_WRITE . Note that
* % VERIFY_WRITE is a superset of % VERIFY_READ - if it is safe
* to write to a block , it is always safe to read from it .
* @ addr : User space pointer to start of block to check
* @ size : Size of block to check
*
* Context : User context only . This function may sleep .
*
* Checks if a pointer to a block of memory in user space is valid .
*
* Returns true ( nonzero ) if the memory block may be valid , false ( zero )
* if it is definitely invalid .
*
* Note that , depending on architecture , this function probably just
* checks that the pointer is in the user space range - after calling
* this function , memory access functions may still return - EFAULT .
*/
# define access_ok(type,addr,size) (likely(__range_ok(addr,size) == 0))
/*
* The exception table consists of pairs of addresses : the first is the
* address of an instruction that is allowed to fault , and the second is
* the address at which the program should continue . No registers are
* modified , so it is entirely up to the continuation code to figure out
* what to do .
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path . This means when everything is well ,
* we don ' t even have to jump over them . Further , they do not intrude
* on our cache or tlb entries .
*/
struct exception_table_entry
{
unsigned long insn , fixup ;
} ;
extern int fixup_exception ( struct pt_regs * regs ) ;
/*
* These are the main single - value transfer routines . They automatically
* use the right size if we just have the right pointer type .
*
* This gets kind of ugly . We want to return _two_ values in " get_user() "
* and yet we don ' t want to do any pointers , because that is too much
* of a performance impact . Thus we have a few rather ugly macros here ,
* and hide all the ugliness from the user .
*
* The " __xxx " versions of the user access functions are versions that
* do not verify the address space , that must have been done previously
* with a separate " access_ok() " call ( this is used when we do multiple
* accesses to the same area of user memory ) .
*/
extern void __get_user_1 ( void ) ;
extern void __get_user_2 ( void ) ;
extern void __get_user_4 ( void ) ;
# define __get_user_x(size,ret,x,ptr) \
__asm__ __volatile__ ( " call __get_user_ " # size \
: " =a " ( ret ) , " =d " ( x ) \
: " 0 " ( ptr ) )
/* Careful: we have to cast the result to the type of the pointer for sign reasons */
/**
* get_user : - Get a simple variable from user space .
* @ x : Variable to store result .
* @ ptr : Source address , in user space .
*
* Context : User context only . This function may sleep .
*
* This macro copies a single simple variable from user space to kernel
* space . It supports simple types like char and int , but not larger
* data types like structures or arrays .
*
* @ ptr must have pointer - to - simple - variable type , and the result of
* dereferencing @ ptr must be assignable to @ x without a cast .
*
* Returns zero on success , or - EFAULT on error .
* On error , the variable @ x is set to zero .
*/
# define get_user(x,ptr) \
( { int __ret_gu ; \
unsigned long __val_gu ; \
__chk_user_ptr ( ptr ) ; \
switch ( sizeof ( * ( ptr ) ) ) { \
case 1 : __get_user_x ( 1 , __ret_gu , __val_gu , ptr ) ; break ; \
case 2 : __get_user_x ( 2 , __ret_gu , __val_gu , ptr ) ; break ; \
case 4 : __get_user_x ( 4 , __ret_gu , __val_gu , ptr ) ; break ; \
default : __get_user_x ( X , __ret_gu , __val_gu , ptr ) ; break ; \
} \
( x ) = ( __typeof__ ( * ( ptr ) ) ) __val_gu ; \
__ret_gu ; \
} )
extern void __put_user_bad ( void ) ;
/*
* Strange magic calling convention : pointer in % ecx ,
* value in % eax ( : % edx ) , return value in % eax , no clobbers .
*/
extern void __put_user_1 ( void ) ;
extern void __put_user_2 ( void ) ;
extern void __put_user_4 ( void ) ;
extern void __put_user_8 ( void ) ;
# define __put_user_1(x, ptr) __asm__ __volatile__("call __put_user_1":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
# define __put_user_2(x, ptr) __asm__ __volatile__("call __put_user_2":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
# define __put_user_4(x, ptr) __asm__ __volatile__("call __put_user_4":"=a" (__ret_pu):"0" ((typeof(*(ptr)))(x)), "c" (ptr))
# define __put_user_8(x, ptr) __asm__ __volatile__("call __put_user_8":"=a" (__ret_pu):"A" ((typeof(*(ptr)))(x)), "c" (ptr))
# define __put_user_X(x, ptr) __asm__ __volatile__("call __put_user_X":"=a" (__ret_pu):"c" (ptr))
/**
* put_user : - Write a simple value into user space .
* @ x : Value to copy to user space .
* @ ptr : Destination address , in user space .
*
* Context : User context only . This function may sleep .
*
* This macro copies a single simple value from kernel space to user
* space . It supports simple types like char and int , but not larger
* data types like structures or arrays .
*
* @ ptr must have pointer - to - simple - variable type , and @ x must be assignable
* to the result of dereferencing @ ptr .
*
* Returns zero on success , or - EFAULT on error .
*/
# ifdef CONFIG_X86_WP_WORKS_OK
# define put_user(x,ptr) \
( { int __ret_pu ; \
2006-03-23 13:59:35 +03:00
__typeof__ ( * ( ptr ) ) __pu_val ; \
2005-04-17 02:20:36 +04:00
__chk_user_ptr ( ptr ) ; \
2006-03-23 13:59:35 +03:00
__pu_val = x ; \
2005-04-17 02:20:36 +04:00
switch ( sizeof ( * ( ptr ) ) ) { \
2006-03-23 13:59:35 +03:00
case 1 : __put_user_1 ( __pu_val , ptr ) ; break ; \
case 2 : __put_user_2 ( __pu_val , ptr ) ; break ; \
case 4 : __put_user_4 ( __pu_val , ptr ) ; break ; \
case 8 : __put_user_8 ( __pu_val , ptr ) ; break ; \
default : __put_user_X ( __pu_val , ptr ) ; break ; \
2005-04-17 02:20:36 +04:00
} \
__ret_pu ; \
} )
# else
# define put_user(x,ptr) \
( { \
int __ret_pu ; \
__typeof__ ( * ( ptr ) ) __pus_tmp = x ; \
__ret_pu = 0 ; \
if ( unlikely ( __copy_to_user_ll ( ptr , & __pus_tmp , \
sizeof ( * ( ptr ) ) ) ! = 0 ) ) \
__ret_pu = - EFAULT ; \
__ret_pu ; \
} )
# endif
/**
* __get_user : - Get a simple variable from user space , with less checking .
* @ x : Variable to store result .
* @ ptr : Source address , in user space .
*
* Context : User context only . This function may sleep .
*
* This macro copies a single simple variable from user space to kernel
* space . It supports simple types like char and int , but not larger
* data types like structures or arrays .
*
* @ ptr must have pointer - to - simple - variable type , and the result of
* dereferencing @ ptr must be assignable to @ x without a cast .
*
* Caller must check the pointer with access_ok ( ) before calling this
* function .
*
* Returns zero on success , or - EFAULT on error .
* On error , the variable @ x is set to zero .
*/
# define __get_user(x,ptr) \
__get_user_nocheck ( ( x ) , ( ptr ) , sizeof ( * ( ptr ) ) )
/**
* __put_user : - Write a simple value into user space , with less checking .
* @ x : Value to copy to user space .
* @ ptr : Destination address , in user space .
*
* Context : User context only . This function may sleep .
*
* This macro copies a single simple value from kernel space to user
* space . It supports simple types like char and int , but not larger
* data types like structures or arrays .
*
* @ ptr must have pointer - to - simple - variable type , and @ x must be assignable
* to the result of dereferencing @ ptr .
*
* Caller must check the pointer with access_ok ( ) before calling this
* function .
*
* Returns zero on success , or - EFAULT on error .
*/
# define __put_user(x,ptr) \
__put_user_nocheck ( ( __typeof__ ( * ( ptr ) ) ) ( x ) , ( ptr ) , sizeof ( * ( ptr ) ) )
# define __put_user_nocheck(x,ptr,size) \
( { \
long __pu_err ; \
__put_user_size ( ( x ) , ( ptr ) , ( size ) , __pu_err , - EFAULT ) ; \
__pu_err ; \
} )
# define __put_user_u64(x, addr, err) \
__asm__ __volatile__ ( \
" 1: movl %%eax,0(%2) \n " \
" 2: movl %%edx,4(%2) \n " \
" 3: \n " \
" .section .fixup, \" ax \" \n " \
" 4: movl %3,%0 \n " \
" jmp 3b \n " \
" .previous \n " \
" .section __ex_table, \" a \" \n " \
" .align 4 \n " \
" .long 1b,4b \n " \
" .long 2b,4b \n " \
" .previous " \
: " =r " ( err ) \
: " A " ( x ) , " r " ( addr ) , " i " ( - EFAULT ) , " 0 " ( err ) )
# ifdef CONFIG_X86_WP_WORKS_OK
# define __put_user_size(x,ptr,size,retval,errret) \
do { \
retval = 0 ; \
__chk_user_ptr ( ptr ) ; \
switch ( size ) { \
case 1 : __put_user_asm ( x , ptr , retval , " b " , " b " , " iq " , errret ) ; break ; \
case 2 : __put_user_asm ( x , ptr , retval , " w " , " w " , " ir " , errret ) ; break ; \
case 4 : __put_user_asm ( x , ptr , retval , " l " , " " , " ir " , errret ) ; break ; \
case 8 : __put_user_u64 ( ( __typeof__ ( * ptr ) ) ( x ) , ptr , retval ) ; break ; \
default : __put_user_bad ( ) ; \
} \
} while ( 0 )
# else
# define __put_user_size(x,ptr,size,retval,errret) \
do { \
__typeof__ ( * ( ptr ) ) __pus_tmp = x ; \
retval = 0 ; \
\
if ( unlikely ( __copy_to_user_ll ( ptr , & __pus_tmp , size ) ! = 0 ) ) \
retval = errret ; \
} while ( 0 )
# endif
struct __large_struct { unsigned long buf [ 100 ] ; } ;
# define __m(x) (*(struct __large_struct __user *)(x))
/*
* Tell gcc we read from memory instead of writing : this is because
* we do not write to any memory gcc knows about , so there are no
* aliasing issues .
*/
# define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \
__asm__ __volatile__ ( \
" 1: mov " itype " % " rtype " 1,%2 \n " \
" 2: \n " \
" .section .fixup, \" ax \" \n " \
" 3: movl %3,%0 \n " \
" jmp 2b \n " \
" .previous \n " \
" .section __ex_table, \" a \" \n " \
" .align 4 \n " \
" .long 1b,3b \n " \
" .previous " \
: " =r " ( err ) \
: ltype ( x ) , " m " ( __m ( addr ) ) , " i " ( errret ) , " 0 " ( err ) )
# define __get_user_nocheck(x,ptr,size) \
( { \
long __gu_err ; \
unsigned long __gu_val ; \
__get_user_size ( __gu_val , ( ptr ) , ( size ) , __gu_err , - EFAULT ) ; \
( x ) = ( __typeof__ ( * ( ptr ) ) ) __gu_val ; \
__gu_err ; \
} )
extern long __get_user_bad ( void ) ;
# define __get_user_size(x,ptr,size,retval,errret) \
do { \
retval = 0 ; \
__chk_user_ptr ( ptr ) ; \
switch ( size ) { \
case 1 : __get_user_asm ( x , ptr , retval , " b " , " b " , " =q " , errret ) ; break ; \
case 2 : __get_user_asm ( x , ptr , retval , " w " , " w " , " =r " , errret ) ; break ; \
case 4 : __get_user_asm ( x , ptr , retval , " l " , " " , " =r " , errret ) ; break ; \
default : ( x ) = __get_user_bad ( ) ; \
} \
} while ( 0 )
# define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \
__asm__ __volatile__ ( \
" 1: mov " itype " %2,% " rtype " 1 \n " \
" 2: \n " \
" .section .fixup, \" ax \" \n " \
" 3: movl %3,%0 \n " \
" xor " itype " % " rtype " 1,% " rtype " 1 \n " \
" jmp 2b \n " \
" .previous \n " \
" .section __ex_table, \" a \" \n " \
" .align 4 \n " \
" .long 1b,3b \n " \
" .previous " \
: " =r " ( err ) , ltype ( x ) \
: " m " ( __m ( addr ) ) , " i " ( errret ) , " 0 " ( err ) )
unsigned long __must_check __copy_to_user_ll ( void __user * to ,
const void * from , unsigned long n ) ;
unsigned long __must_check __copy_from_user_ll ( void * to ,
const void __user * from , unsigned long n ) ;
2006-06-23 13:04:16 +04:00
unsigned long __must_check __copy_from_user_ll_nocache ( void * to ,
const void __user * from , unsigned long n ) ;
2005-04-17 02:20:36 +04:00
/*
* Here we special - case 1 , 2 and 4 - byte copy_ * _user invocations . On a fault
* we return the initial request size ( 1 , 2 or 4 ) , as copy_ * _user should do .
* If a store crosses a page boundary and gets a fault , the x86 will not write
* anything , so this is accurate .
*/
/**
* __copy_to_user : - Copy a block of data into user space , with less checking .
* @ to : Destination address , in user space .
* @ from : Source address , in kernel space .
* @ n : Number of bytes to copy .
*
* Context : User context only . This function may sleep .
*
* Copy data from kernel space to user space . Caller must check
* the specified block with access_ok ( ) before calling this function .
*
* Returns number of bytes that could not be copied .
* On success , this will be zero .
*/
2006-01-15 00:21:30 +03:00
static __always_inline unsigned long __must_check
2005-04-17 02:20:36 +04:00
__copy_to_user_inatomic ( void __user * to , const void * from , unsigned long n )
{
if ( __builtin_constant_p ( n ) ) {
unsigned long ret ;
switch ( n ) {
case 1 :
__put_user_size ( * ( u8 * ) from , ( u8 __user * ) to , 1 , ret , 1 ) ;
return ret ;
case 2 :
__put_user_size ( * ( u16 * ) from , ( u16 __user * ) to , 2 , ret , 2 ) ;
return ret ;
case 4 :
__put_user_size ( * ( u32 * ) from , ( u32 __user * ) to , 4 , ret , 4 ) ;
return ret ;
}
}
return __copy_to_user_ll ( to , from , n ) ;
}
2006-01-15 00:21:30 +03:00
static __always_inline unsigned long __must_check
2005-04-17 02:20:36 +04:00
__copy_to_user ( void __user * to , const void * from , unsigned long n )
{
might_sleep ( ) ;
return __copy_to_user_inatomic ( to , from , n ) ;
}
/**
* __copy_from_user : - Copy a block of data from user space , with less checking .
* @ to : Destination address , in kernel space .
* @ from : Source address , in user space .
* @ n : Number of bytes to copy .
*
* Context : User context only . This function may sleep .
*
* Copy data from user space to kernel space . Caller must check
* the specified block with access_ok ( ) before calling this function .
*
* Returns number of bytes that could not be copied .
* On success , this will be zero .
*
* If some data could not be copied , this function will pad the copied
* data to the requested size using zero bytes .
[PATCH] Prepare for __copy_from_user_inatomic to not zero missed bytes
The problem is that when we write to a file, the copy from userspace to
pagecache is first done with preemption disabled, so if the source address is
not immediately available the copy fails *and* *zeros* *the* *destination*.
This is a problem because a concurrent read (which admittedly is an odd thing
to do) might see zeros rather that was there before the write, or what was
there after, or some mixture of the two (any of these being a reasonable thing
to see).
If the copy did fail, it will immediately be retried with preemption
re-enabled so any transient problem with accessing the source won't cause an
error.
The first copying does not need to zero any uncopied bytes, and doing so
causes the problem. It uses copy_from_user_atomic rather than copy_from_user
so the simple expedient is to change copy_from_user_atomic to *not* zero out
bytes on failure.
The first of these two patches prepares for the change by fixing two places
which assume copy_from_user_atomic does zero the tail. The two usages are
very similar pieces of code which copy from a userspace iovec into one or more
page-cache pages. These are changed to remove the assumption.
The second patch changes __copy_from_user_inatomic* to not zero the tail.
Once these are accepted, I will look at similar patches of other architectures
where this is important (ppc, mips and sparc being the ones I can find).
This patch:
There is a problem with __copy_from_user_inatomic zeroing the tail of the
buffer in the case of an error. As it is called in atomic context, the error
may be transient, so it results in zeros being written where maybe they
shouldn't be.
In the usage in filemap, this opens a window for a well timed read to see data
(zeros) which is not consistent with any ordering of reads and writes.
Most cases where __copy_from_user_inatomic is called, a failure results in
__copy_from_user being called immediately. As long as the latter zeros the
tail, the former doesn't need to. However in *copy_from_user_iovec
implementations (in both filemap and ntfs/file), it is assumed that
copy_from_user_inatomic will zero the tail.
This patch removes that assumption, so that after this patch it will
be safe for copy_from_user_inatomic to not zero the tail.
This patch also adds some commentary to filemap.h and asm-i386/uaccess.h.
After this patch, all architectures that might disable preempt when
kmap_atomic is called need to have their __copy_from_user_inatomic* "fixed".
This includes
- powerpc
- i386
- mips
- sparc
Signed-off-by: Neil Brown <neilb@suse.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Anton Altaparmakov <aia21@cantab.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: William Lee Irwin III <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-25 16:47:58 +04:00
*
* An alternate version - __copy_from_user_inatomic ( ) - may be called from
* atomic context and will fail rather than sleep . In this case the
* uncopied bytes will * NOT * be padded with zeros . See fs / filemap . h
* for explanation of why this is needed .
* FIXME this isn ' t implimented yet EMXIF
2005-04-17 02:20:36 +04:00
*/
2006-01-15 00:21:30 +03:00
static __always_inline unsigned long
2005-04-17 02:20:36 +04:00
__copy_from_user_inatomic ( void * to , const void __user * from , unsigned long n )
{
if ( __builtin_constant_p ( n ) ) {
unsigned long ret ;
switch ( n ) {
case 1 :
__get_user_size ( * ( u8 * ) to , from , 1 , ret , 1 ) ;
return ret ;
case 2 :
__get_user_size ( * ( u16 * ) to , from , 2 , ret , 2 ) ;
return ret ;
case 4 :
__get_user_size ( * ( u32 * ) to , from , 4 , ret , 4 ) ;
return ret ;
}
}
return __copy_from_user_ll ( to , from , n ) ;
}
2006-06-23 13:04:16 +04:00
# define ARCH_HAS_NOCACHE_UACCESS
static __always_inline unsigned long __copy_from_user_inatomic_nocache ( void * to ,
const void __user * from , unsigned long n )
{
if ( __builtin_constant_p ( n ) ) {
unsigned long ret ;
switch ( n ) {
case 1 :
__get_user_size ( * ( u8 * ) to , from , 1 , ret , 1 ) ;
return ret ;
case 2 :
__get_user_size ( * ( u16 * ) to , from , 2 , ret , 2 ) ;
return ret ;
case 4 :
__get_user_size ( * ( u32 * ) to , from , 4 , ret , 4 ) ;
return ret ;
}
}
return __copy_from_user_ll_nocache ( to , from , n ) ;
}
2006-01-15 00:21:30 +03:00
static __always_inline unsigned long
2005-04-17 02:20:36 +04:00
__copy_from_user ( void * to , const void __user * from , unsigned long n )
{
might_sleep ( ) ;
return __copy_from_user_inatomic ( to , from , n ) ;
}
2006-06-23 13:04:16 +04:00
static __always_inline unsigned long
__copy_from_user_nocache ( void * to , const void __user * from , unsigned long n )
{
might_sleep ( ) ;
return __copy_from_user_inatomic_nocache ( to , from , n ) ;
}
2005-04-17 02:20:36 +04:00
unsigned long __must_check copy_to_user ( void __user * to ,
const void * from , unsigned long n ) ;
unsigned long __must_check copy_from_user ( void * to ,
const void __user * from , unsigned long n ) ;
long __must_check strncpy_from_user ( char * dst , const char __user * src ,
long count ) ;
long __must_check __strncpy_from_user ( char * dst ,
const char __user * src , long count ) ;
/**
* strlen_user : - Get the size of a string in user space .
* @ str : The string to measure .
*
* Context : User context only . This function may sleep .
*
* Get the size of a NUL - terminated string in user space .
*
* Returns the size of the string INCLUDING the terminating NUL .
* On exception , returns 0.
*
* If there is a limit on the length of a valid string , you may wish to
* consider using strnlen_user ( ) instead .
*/
# define strlen_user(str) strnlen_user(str, ~0UL >> 1)
long strnlen_user ( const char __user * str , long n ) ;
unsigned long __must_check clear_user ( void __user * mem , unsigned long len ) ;
unsigned long __must_check __clear_user ( void __user * mem , unsigned long len ) ;
# endif /* __i386_UACCESS_H */