2011-11-01 04:06:39 +04:00
/*
* linux / mm / process_vm_access . c
*
* Copyright ( C ) 2010 - 2011 Christopher Yeoh < cyeoh @ au1 . ibm . com > , IBM Corp .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*/
# include <linux/mm.h>
# include <linux/uio.h>
# include <linux/sched.h>
# include <linux/highmem.h>
# include <linux/ptrace.h>
# include <linux/slab.h>
# include <linux/syscalls.h>
# ifdef CONFIG_COMPAT
# include <linux/compat.h>
# endif
/**
* process_vm_rw_pages - read / write pages from task specified
2014-02-05 22:25:32 +04:00
* @ pages : array of pointers to pages we want to copy
2011-11-01 04:06:39 +04:00
* @ start_offset : offset in page to start copying from / to
* @ len : number of bytes to copy
2014-02-05 22:25:32 +04:00
* @ iter : where to copy to / from locally
2011-11-01 04:06:39 +04:00
* @ vm_write : 0 means copy from , 1 means copy to
* Returns 0 on success , error code otherwise
*/
2014-02-05 21:44:24 +04:00
static int process_vm_rw_pages ( struct page * * pages ,
unsigned offset ,
2014-02-05 21:55:11 +04:00
size_t len ,
2014-02-05 20:51:53 +04:00
struct iov_iter * iter ,
2014-02-05 22:15:28 +04:00
int vm_write )
2011-11-01 04:06:39 +04:00
{
/* Do the copy for each page */
2014-02-05 22:15:28 +04:00
while ( len & & iov_iter_count ( iter ) ) {
2014-02-05 21:44:24 +04:00
struct page * page = * pages + + ;
2014-02-05 21:55:11 +04:00
size_t copy = PAGE_SIZE - offset ;
2014-02-05 21:44:24 +04:00
size_t copied ;
2014-02-05 21:14:11 +04:00
2014-02-05 21:55:11 +04:00
if ( copy > len )
copy = len ;
2014-02-05 21:14:11 +04:00
if ( vm_write ) {
2014-04-11 04:54:51 +04:00
copied = copy_page_from_iter ( page , offset , copy , iter ) ;
2014-02-05 21:14:11 +04:00
set_page_dirty_lock ( page ) ;
2011-11-01 04:06:39 +04:00
} else {
2014-02-05 21:44:24 +04:00
copied = copy_page_to_iter ( page , offset , copy , iter ) ;
2011-11-01 04:06:39 +04:00
}
2014-02-05 21:44:24 +04:00
len - = copied ;
if ( copied < copy & & iov_iter_count ( iter ) )
return - EFAULT ;
offset = 0 ;
2011-11-01 04:06:39 +04:00
}
2014-02-05 21:44:24 +04:00
return 0 ;
2011-11-01 04:06:39 +04:00
}
/* Maximum number of pages kmalloc'd to hold struct page's during copy */
# define PVM_MAX_KMALLOC_PAGES (PAGE_SIZE * 2)
/**
* process_vm_rw_single_vec - read / write pages from task specified
* @ addr : start memory address of target process
* @ len : size of area to copy to / from
2014-02-05 22:25:32 +04:00
* @ iter : where to copy to / from locally
2011-11-01 04:06:39 +04:00
* @ process_pages : struct pages area that can store at least
* nr_pages_to_copy struct page pointers
* @ mm : mm for task
* @ task : task to read / write from
* @ vm_write : 0 means copy from , 1 means copy to
* Returns 0 on success or on failure error code
*/
static int process_vm_rw_single_vec ( unsigned long addr ,
unsigned long len ,
2014-02-05 20:51:53 +04:00
struct iov_iter * iter ,
2011-11-01 04:06:39 +04:00
struct page * * process_pages ,
struct mm_struct * mm ,
struct task_struct * task ,
2014-02-05 22:15:28 +04:00
int vm_write )
2011-11-01 04:06:39 +04:00
{
unsigned long pa = addr & PAGE_MASK ;
unsigned long start_offset = addr - pa ;
unsigned long nr_pages ;
ssize_t rc = 0 ;
unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
/ sizeof ( struct pages * ) ;
2016-10-13 03:20:12 +03:00
unsigned int flags = FOLL_REMOTE ;
2011-11-01 04:06:39 +04:00
/* Work out address and page range required */
if ( len = = 0 )
return 0 ;
nr_pages = ( addr + len - 1 ) / PAGE_SIZE - addr / PAGE_SIZE + 1 ;
2016-10-13 03:20:12 +03:00
if ( vm_write )
flags | = FOLL_WRITE ;
2014-02-05 22:25:32 +04:00
while ( ! rc & & nr_pages & & iov_iter_count ( iter ) ) {
int pages = min ( nr_pages , max_pages_per_loop ) ;
size_t bytes ;
2011-11-01 04:06:39 +04:00
2016-02-13 00:01:54 +03:00
/*
* Get the pages we ' re interested in . We must
* add FOLL_REMOTE because task / mm might not
* current / current - > mm
*/
pages = __get_user_pages_unlocked ( task , mm , pa , pages ,
2016-10-13 03:20:12 +03:00
process_pages , flags ) ;
2014-02-05 22:25:32 +04:00
if ( pages < = 0 )
2014-02-05 21:44:24 +04:00
return - EFAULT ;
2014-02-05 22:25:32 +04:00
bytes = pages * PAGE_SIZE - start_offset ;
if ( bytes > len )
bytes = len ;
2014-02-05 21:55:11 +04:00
2014-02-05 21:44:24 +04:00
rc = process_vm_rw_pages ( process_pages ,
2014-02-05 22:25:32 +04:00
start_offset , bytes , iter ,
2014-02-05 22:15:28 +04:00
vm_write ) ;
2014-02-05 22:25:32 +04:00
len - = bytes ;
2011-11-01 04:06:39 +04:00
start_offset = 0 ;
2014-02-05 22:25:32 +04:00
nr_pages - = pages ;
pa + = pages * PAGE_SIZE ;
while ( pages )
put_page ( process_pages [ - - pages ] ) ;
2011-11-01 04:06:39 +04:00
}
return rc ;
}
/* Maximum number of entries for process pages array
which lives on stack */
# define PVM_MAX_PP_ARRAY_COUNT 16
/**
* process_vm_rw_core - core of reading / writing pages from task specified
* @ pid : PID of process to read / write from / to
2014-02-05 22:25:32 +04:00
* @ iter : where to copy to / from locally
2011-11-01 04:06:39 +04:00
* @ rvec : iovec array specifying where to copy to / from in the other process
* @ riovcnt : size of rvec array
* @ flags : currently unused
* @ vm_write : 0 if reading from other process , 1 if writing to other process
* Returns the number of bytes read / written or error code . May
* return less bytes than expected if an error occurs during the copying
* process .
*/
2014-02-05 20:51:53 +04:00
static ssize_t process_vm_rw_core ( pid_t pid , struct iov_iter * iter ,
2011-11-01 04:06:39 +04:00
const struct iovec * rvec ,
unsigned long riovcnt ,
unsigned long flags , int vm_write )
{
struct task_struct * task ;
struct page * pp_stack [ PVM_MAX_PP_ARRAY_COUNT ] ;
struct page * * process_pages = pp_stack ;
struct mm_struct * mm ;
unsigned long i ;
ssize_t rc = 0 ;
unsigned long nr_pages = 0 ;
unsigned long nr_pages_iov ;
ssize_t iov_len ;
2014-02-05 22:15:28 +04:00
size_t total_len = iov_iter_count ( iter ) ;
2011-11-01 04:06:39 +04:00
/*
* Work out how many pages of struct pages we ' re going to need
* when eventually calling get_user_pages
*/
for ( i = 0 ; i < riovcnt ; i + + ) {
iov_len = rvec [ i ] . iov_len ;
if ( iov_len > 0 ) {
nr_pages_iov = ( ( unsigned long ) rvec [ i ] . iov_base
+ iov_len )
/ PAGE_SIZE - ( unsigned long ) rvec [ i ] . iov_base
/ PAGE_SIZE + 1 ;
nr_pages = max ( nr_pages , nr_pages_iov ) ;
}
}
if ( nr_pages = = 0 )
return 0 ;
if ( nr_pages > PVM_MAX_PP_ARRAY_COUNT ) {
/* For reliability don't try to kmalloc more than
2 pages worth */
process_pages = kmalloc ( min_t ( size_t , PVM_MAX_KMALLOC_PAGES ,
sizeof ( struct pages * ) * nr_pages ) ,
GFP_KERNEL ) ;
if ( ! process_pages )
return - ENOMEM ;
}
/* Get process information */
rcu_read_lock ( ) ;
task = find_task_by_vpid ( pid ) ;
if ( task )
get_task_struct ( task ) ;
rcu_read_unlock ( ) ;
if ( ! task ) {
rc = - ESRCH ;
goto free_proc_pages ;
}
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21 02:00:04 +03:00
mm = mm_access ( task , PTRACE_MODE_ATTACH_REALCREDS ) ;
2012-02-02 05:04:09 +04:00
if ( ! mm | | IS_ERR ( mm ) ) {
rc = IS_ERR ( mm ) ? PTR_ERR ( mm ) : - ESRCH ;
/*
* Explicitly map EACCES to EPERM as EPERM is a more a
* appropriate error code for process_vw_readv / writev
*/
if ( rc = = - EACCES )
rc = - EPERM ;
2011-11-01 04:06:39 +04:00
goto put_task_struct ;
}
2014-02-05 22:15:28 +04:00
for ( i = 0 ; i < riovcnt & & iov_iter_count ( iter ) & & ! rc ; i + + )
2011-11-01 04:06:39 +04:00
rc = process_vm_rw_single_vec (
( unsigned long ) rvec [ i ] . iov_base , rvec [ i ] . iov_len ,
2014-02-05 22:15:28 +04:00
iter , process_pages , mm , task , vm_write ) ;
/* copied = space before - space after */
total_len - = iov_iter_count ( iter ) ;
/* If we have managed to copy any data at all then
we return the number of bytes copied . Otherwise
we return the error code */
if ( total_len )
rc = total_len ;
2011-11-01 04:06:39 +04:00
mmput ( mm ) ;
put_task_struct :
put_task_struct ( task ) ;
free_proc_pages :
if ( process_pages ! = pp_stack )
kfree ( process_pages ) ;
return rc ;
}
/**
* process_vm_rw - check iovecs before calling core routine
* @ pid : PID of process to read / write from / to
* @ lvec : iovec array specifying where to copy to / from locally
* @ liovcnt : size of lvec array
* @ rvec : iovec array specifying where to copy to / from in the other process
* @ riovcnt : size of rvec array
* @ flags : currently unused
* @ vm_write : 0 if reading from other process , 1 if writing to other process
* Returns the number of bytes read / written or error code . May
* return less bytes than expected if an error occurs during the copying
* process .
*/
static ssize_t process_vm_rw ( pid_t pid ,
const struct iovec __user * lvec ,
unsigned long liovcnt ,
const struct iovec __user * rvec ,
unsigned long riovcnt ,
unsigned long flags , int vm_write )
{
struct iovec iovstack_l [ UIO_FASTIOV ] ;
struct iovec iovstack_r [ UIO_FASTIOV ] ;
struct iovec * iov_l = iovstack_l ;
struct iovec * iov_r = iovstack_r ;
2014-02-05 20:51:53 +04:00
struct iov_iter iter ;
2011-11-01 04:06:39 +04:00
ssize_t rc ;
2015-03-21 21:47:11 +03:00
int dir = vm_write ? WRITE : READ ;
2011-11-01 04:06:39 +04:00
if ( flags ! = 0 )
return - EINVAL ;
/* Check iovecs */
2015-03-21 21:47:11 +03:00
rc = import_iovec ( dir , lvec , liovcnt , UIO_FASTIOV , & iov_l , & iter ) ;
if ( rc < 0 )
return rc ;
if ( ! iov_iter_count ( & iter ) )
2011-11-01 04:06:39 +04:00
goto free_iovecs ;
2012-06-01 03:26:42 +04:00
rc = rw_copy_check_uvector ( CHECK_IOVEC_ONLY , rvec , riovcnt , UIO_FASTIOV ,
iovstack_r , & iov_r ) ;
2011-11-01 04:06:39 +04:00
if ( rc < = 0 )
goto free_iovecs ;
2014-02-05 20:51:53 +04:00
rc = process_vm_rw_core ( pid , & iter , iov_r , riovcnt , flags , vm_write ) ;
2011-11-01 04:06:39 +04:00
free_iovecs :
if ( iov_r ! = iovstack_r )
kfree ( iov_r ) ;
2015-03-21 21:47:11 +03:00
kfree ( iov_l ) ;
2011-11-01 04:06:39 +04:00
return rc ;
}
SYSCALL_DEFINE6 ( process_vm_readv , pid_t , pid , const struct iovec __user * , lvec ,
unsigned long , liovcnt , const struct iovec __user * , rvec ,
unsigned long , riovcnt , unsigned long , flags )
{
return process_vm_rw ( pid , lvec , liovcnt , rvec , riovcnt , flags , 0 ) ;
}
SYSCALL_DEFINE6 ( process_vm_writev , pid_t , pid ,
const struct iovec __user * , lvec ,
unsigned long , liovcnt , const struct iovec __user * , rvec ,
unsigned long , riovcnt , unsigned long , flags )
{
return process_vm_rw ( pid , lvec , liovcnt , rvec , riovcnt , flags , 1 ) ;
}
# ifdef CONFIG_COMPAT
2014-04-04 01:48:04 +04:00
static ssize_t
2011-11-01 04:06:39 +04:00
compat_process_vm_rw ( compat_pid_t pid ,
const struct compat_iovec __user * lvec ,
unsigned long liovcnt ,
const struct compat_iovec __user * rvec ,
unsigned long riovcnt ,
unsigned long flags , int vm_write )
{
struct iovec iovstack_l [ UIO_FASTIOV ] ;
struct iovec iovstack_r [ UIO_FASTIOV ] ;
struct iovec * iov_l = iovstack_l ;
struct iovec * iov_r = iovstack_r ;
2014-02-05 20:51:53 +04:00
struct iov_iter iter ;
2011-11-01 04:06:39 +04:00
ssize_t rc = - EFAULT ;
2015-03-21 21:47:11 +03:00
int dir = vm_write ? WRITE : READ ;
2011-11-01 04:06:39 +04:00
if ( flags ! = 0 )
return - EINVAL ;
2015-03-21 21:47:11 +03:00
rc = compat_import_iovec ( dir , lvec , liovcnt , UIO_FASTIOV , & iov_l , & iter ) ;
if ( rc < 0 )
return rc ;
if ( ! iov_iter_count ( & iter ) )
2011-11-01 04:06:39 +04:00
goto free_iovecs ;
2012-06-01 03:26:42 +04:00
rc = compat_rw_copy_check_uvector ( CHECK_IOVEC_ONLY , rvec , riovcnt ,
2011-11-01 04:06:39 +04:00
UIO_FASTIOV , iovstack_r ,
2012-06-01 03:26:42 +04:00
& iov_r ) ;
2011-11-01 04:06:39 +04:00
if ( rc < = 0 )
goto free_iovecs ;
2014-02-05 20:51:53 +04:00
rc = process_vm_rw_core ( pid , & iter , iov_r , riovcnt , flags , vm_write ) ;
2011-11-01 04:06:39 +04:00
free_iovecs :
if ( iov_r ! = iovstack_r )
kfree ( iov_r ) ;
2015-03-21 21:47:11 +03:00
kfree ( iov_l ) ;
2011-11-01 04:06:39 +04:00
return rc ;
}
2014-03-04 20:18:23 +04:00
COMPAT_SYSCALL_DEFINE6 ( process_vm_readv , compat_pid_t , pid ,
const struct compat_iovec __user * , lvec ,
compat_ulong_t , liovcnt ,
const struct compat_iovec __user * , rvec ,
compat_ulong_t , riovcnt ,
compat_ulong_t , flags )
2011-11-01 04:06:39 +04:00
{
return compat_process_vm_rw ( pid , lvec , liovcnt , rvec ,
riovcnt , flags , 0 ) ;
}
2014-03-04 20:18:23 +04:00
COMPAT_SYSCALL_DEFINE6 ( process_vm_writev , compat_pid_t , pid ,
const struct compat_iovec __user * , lvec ,
compat_ulong_t , liovcnt ,
const struct compat_iovec __user * , rvec ,
compat_ulong_t , riovcnt ,
compat_ulong_t , flags )
2011-11-01 04:06:39 +04:00
{
return compat_process_vm_rw ( pid , lvec , liovcnt , rvec ,
riovcnt , flags , 1 ) ;
}
# endif