2010-03-07 16:41:34 -08:00
# include <linux/proc_fs.h>
# include <linux/nsproxy.h>
# include <linux/ptrace.h>
# include <linux/namei.h>
# include <linux/file.h>
# include <linux/utsname.h>
# include <net/net_namespace.h>
# include <linux/ipc_namespace.h>
# include <linux/pid_namespace.h>
2012-07-26 06:24:06 -07:00
# include <linux/user_namespace.h>
2010-03-07 16:41:34 -08:00
# include "internal.h"
static const struct proc_ns_operations * ns_entries [ ] = {
2010-03-07 18:14:23 -08:00
# ifdef CONFIG_NET_NS
& netns_operations ,
# endif
2010-03-07 18:43:27 -08:00
# ifdef CONFIG_UTS_NS
& utsns_operations ,
# endif
2010-03-07 18:48:39 -08:00
# ifdef CONFIG_IPC_NS
& ipcns_operations ,
# endif
2010-03-07 18:17:03 -08:00
# ifdef CONFIG_PID_NS
& pidns_operations ,
2017-05-08 15:56:41 -07:00
& pidns_for_children_operations ,
2012-07-26 06:24:06 -07:00
# endif
# ifdef CONFIG_USER_NS
& userns_operations ,
2010-03-07 18:17:03 -08:00
# endif
2010-03-07 18:49:36 -08:00
& mntns_operations ,
2016-01-29 02:54:06 -06:00
# ifdef CONFIG_CGROUPS
& cgroupns_operations ,
# endif
2010-03-07 16:41:34 -08:00
} ;
2015-11-17 10:20:54 -05:00
static const char * proc_ns_get_link ( struct dentry * dentry ,
2015-12-29 15:58:39 -05:00
struct inode * inode ,
struct delayed_call * done )
2011-06-18 17:48:18 -07:00
{
2014-11-01 11:10:28 -04:00
const struct proc_ns_operations * ns_ops = PROC_I ( inode ) - > ns_ops ;
2011-06-18 17:48:18 -07:00
struct task_struct * task ;
2013-03-09 00:14:45 -08:00
struct path ns_path ;
2011-06-18 17:48:18 -07:00
void * error = ERR_PTR ( - EACCES ) ;
2015-11-17 10:20:54 -05:00
if ( ! dentry )
return ERR_PTR ( - ECHILD ) ;
2011-06-18 17:48:18 -07:00
task = get_proc_task ( inode ) ;
if ( ! task )
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 10:57:28 -04:00
return error ;
2011-06-18 17:48:18 -07:00
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-20 15:00:04 -08:00
if ( ptrace_may_access ( task , PTRACE_MODE_READ_FSCREDS ) ) {
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 10:57:28 -04:00
error = ns_get_path ( & ns_path , task , ns_ops ) ;
if ( ! error )
2015-05-02 13:37:52 -04:00
nd_jump_link ( & ns_path ) ;
2011-06-18 17:48:18 -07:00
}
put_task_struct ( task ) ;
return error ;
}
static int proc_ns_readlink ( struct dentry * dentry , char __user * buffer , int buflen )
{
2015-03-17 22:25:59 +00:00
struct inode * inode = d_inode ( dentry ) ;
2014-11-01 11:10:28 -04:00
const struct proc_ns_operations * ns_ops = PROC_I ( inode ) - > ns_ops ;
2011-06-18 17:48:18 -07:00
struct task_struct * task ;
char name [ 50 ] ;
2014-03-14 13:42:45 -04:00
int res = - EACCES ;
2011-06-18 17:48:18 -07:00
task = get_proc_task ( inode ) ;
if ( ! task )
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 10:57:28 -04:00
return res ;
2011-06-18 17:48:18 -07:00
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-20 15:00:04 -08:00
if ( ptrace_may_access ( task , PTRACE_MODE_READ_FSCREDS ) ) {
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 10:57:28 -04:00
res = ns_get_name ( name , sizeof ( name ) , task , ns_ops ) ;
if ( res > = 0 )
res = readlink_copy ( buffer , buflen , name ) ;
}
2011-06-18 17:48:18 -07:00
put_task_struct ( task ) ;
2014-03-14 13:42:45 -04:00
return res ;
2011-06-18 17:48:18 -07:00
}
static const struct inode_operations proc_ns_link_inode_operations = {
. readlink = proc_ns_readlink ,
2015-11-17 10:20:54 -05:00
. get_link = proc_ns_get_link ,
2011-06-18 17:48:18 -07:00
. setattr = proc_setattr ,
} ;
2013-06-15 11:15:20 +04:00
static int proc_ns_instantiate ( struct inode * dir ,
2010-03-07 16:41:34 -08:00
struct dentry * dentry , struct task_struct * task , const void * ptr )
{
const struct proc_ns_operations * ns_ops = ptr ;
struct inode * inode ;
struct proc_inode * ei ;
2016-11-10 22:18:28 +01:00
inode = proc_pid_make_inode ( dir - > i_sb , task , S_IFLNK | S_IRWXUGO ) ;
2010-03-07 16:41:34 -08:00
if ( ! inode )
goto out ;
ei = PROC_I ( inode ) ;
2011-06-18 17:48:18 -07:00
inode - > i_op = & proc_ns_link_inode_operations ;
2014-11-01 11:10:28 -04:00
ei - > ns_ops = ns_ops ;
2010-03-07 16:41:34 -08:00
2012-03-23 15:02:55 -07:00
d_set_d_op ( dentry , & pid_dentry_operations ) ;
2010-03-07 16:41:34 -08:00
d_add ( dentry , inode ) ;
/* Close the race of the process dying before we return the dentry */
2012-06-10 16:03:43 -04:00
if ( pid_revalidate ( dentry , 0 ) )
2013-06-15 11:15:20 +04:00
return 0 ;
2010-03-07 16:41:34 -08:00
out :
2013-06-15 11:15:20 +04:00
return - ENOENT ;
2010-03-07 16:41:34 -08:00
}
2013-05-16 12:07:31 -04:00
static int proc_ns_dir_readdir ( struct file * file , struct dir_context * ctx )
2010-03-07 16:41:34 -08:00
{
2013-05-16 12:07:31 -04:00
struct task_struct * task = get_proc_task ( file_inode ( file ) ) ;
2010-03-07 16:41:34 -08:00
const struct proc_ns_operations * * entry , * * last ;
if ( ! task )
2013-05-16 12:07:31 -04:00
return - ENOENT ;
2010-03-07 16:41:34 -08:00
2013-05-16 12:07:31 -04:00
if ( ! dir_emit_dots ( file , ctx ) )
goto out ;
if ( ctx - > pos > = 2 + ARRAY_SIZE ( ns_entries ) )
goto out ;
entry = ns_entries + ( ctx - > pos - 2 ) ;
last = & ns_entries [ ARRAY_SIZE ( ns_entries ) - 1 ] ;
while ( entry < = last ) {
const struct proc_ns_operations * ops = * entry ;
if ( ! proc_fill_cache ( file , ctx , ops - > name , strlen ( ops - > name ) ,
proc_ns_instantiate , task , ops ) )
break ;
ctx - > pos + + ;
entry + + ;
}
2010-03-07 16:41:34 -08:00
out :
put_task_struct ( task ) ;
2013-05-16 12:07:31 -04:00
return 0 ;
2010-03-07 16:41:34 -08:00
}
const struct file_operations proc_ns_dir_operations = {
. read = generic_read_dir ,
2016-04-20 17:13:54 -04:00
. iterate_shared = proc_ns_dir_readdir ,
. llseek = generic_file_llseek ,
2010-03-07 16:41:34 -08:00
} ;
static struct dentry * proc_ns_dir_lookup ( struct inode * dir ,
2012-06-10 17:13:09 -04:00
struct dentry * dentry , unsigned int flags )
2010-03-07 16:41:34 -08:00
{
2013-06-15 11:15:20 +04:00
int error ;
2010-03-07 16:41:34 -08:00
struct task_struct * task = get_proc_task ( dir ) ;
const struct proc_ns_operations * * entry , * * last ;
unsigned int len = dentry - > d_name . len ;
2013-06-15 11:15:20 +04:00
error = - ENOENT ;
2010-03-07 16:41:34 -08:00
if ( ! task )
goto out_no_task ;
2012-03-28 14:42:52 -07:00
last = & ns_entries [ ARRAY_SIZE ( ns_entries ) ] ;
for ( entry = ns_entries ; entry < last ; entry + + ) {
2010-03-07 16:41:34 -08:00
if ( strlen ( ( * entry ) - > name ) ! = len )
continue ;
if ( ! memcmp ( dentry - > d_name . name , ( * entry ) - > name , len ) )
break ;
}
2012-03-28 14:42:52 -07:00
if ( entry = = last )
2010-03-07 16:41:34 -08:00
goto out ;
error = proc_ns_instantiate ( dir , dentry , task , * entry ) ;
out :
put_task_struct ( task ) ;
out_no_task :
2013-06-15 11:15:20 +04:00
return ERR_PTR ( error ) ;
2010-03-07 16:41:34 -08:00
}
const struct inode_operations proc_ns_dir_inode_operations = {
. lookup = proc_ns_dir_lookup ,
. getattr = pid_getattr ,
. setattr = proc_setattr ,
} ;