2005-04-16 15:20:36 -07:00
/*
* linux / kernel / ptrace . c
*
* ( C ) Copyright 1999 Linus Torvalds
*
* Common interfaces for " ptrace() " which we do not want
* to continually duplicate across every architecture .
*/
2006-01-11 12:17:46 -08:00
# include <linux/capability.h>
2005-04-16 15:20:36 -07:00
# include <linux/module.h>
# include <linux/sched.h>
# include <linux/errno.h>
# include <linux/mm.h>
# include <linux/highmem.h>
# include <linux/pagemap.h>
# include <linux/ptrace.h>
# include <linux/security.h>
2005-05-01 08:59:14 -07:00
# include <linux/signal.h>
2007-03-20 13:58:35 -04:00
# include <linux/audit.h>
2007-10-18 23:40:14 -07:00
# include <linux/pid_namespace.h>
2008-02-06 01:36:44 -08:00
# include <linux/syscalls.h>
2009-04-07 23:21:06 -07:00
# include <linux/uaccess.h>
2010-02-11 11:51:00 -08:00
# include <linux/regset.h>
2011-04-07 16:53:20 +02:00
# include <linux/hw_breakpoint.h>
2005-04-16 15:20:36 -07:00
2008-12-19 15:10:24 +01:00
2005-04-16 15:20:36 -07:00
/*
* ptrace a task : make the debugger its new parent and
* move it to the ptrace list .
*
* Must be called with the tasklist lock write - held .
*/
2006-07-03 00:25:41 -07:00
void __ptrace_link ( struct task_struct * child , struct task_struct * new_parent )
2005-04-16 15:20:36 -07:00
{
2008-03-24 18:36:23 -07:00
BUG_ON ( ! list_empty ( & child - > ptrace_entry ) ) ;
list_add ( & child - > ptrace_entry , & new_parent - > ptraced ) ;
2005-04-16 15:20:36 -07:00
child - > parent = new_parent ;
}
2009-04-07 23:21:06 -07:00
2011-03-23 10:37:01 +01:00
/**
* __ptrace_unlink - unlink ptracee and restore its execution state
* @ child : ptracee to be unlinked
2005-04-16 15:20:36 -07:00
*
ptrace: Always put ptracee into appropriate execution state
Currently, __ptrace_unlink() wakes up the tracee iff it's in
TASK_TRACED. For unlinking from PTRACE_DETACH, this is correct as the
tracee is guaranteed to be in TASK_TRACED or dead; however, unlinking
also happens when the ptracer exits and in this case the ptracee can
be in any state and ptrace might be left running even if the group it
belongs to is stopped.
This patch updates __ptrace_unlink() such that GROUP_STOP_PENDING is
reinstated regardless of the ptracee's current state as long as it's
alive and makes sure that signal_wake_up() is called if execution
state transition is necessary.
Test case follows.
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
static const struct timespec ts1s = { .tv_sec = 1 };
int main(void)
{
pid_t tracee;
siginfo_t si;
tracee = fork();
if (tracee == 0) {
while (1) {
nanosleep(&ts1s, NULL);
write(1, ".", 1);
}
}
ptrace(PTRACE_ATTACH, tracee, NULL, NULL);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
write(1, "exiting", 7);
return 0;
}
Before the patch, after the parent process exits, the child is left
running and prints out "." every second.
exiting..... (continues)
After the patch, the group stop initiated by the implied SIGSTOP from
PTRACE_ATTACH is re-established when the parent exits.
exiting
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
2011-03-23 10:37:01 +01:00
* Remove @ child from the ptrace list , move it back to the original parent ,
* and restore the execution state so that it conforms to the group stop
* state .
*
* Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer
* exiting . For PTRACE_DETACH , unless the ptracee has been killed between
* ptrace_check_attach ( ) and here , it ' s guaranteed to be in TASK_TRACED .
* If the ptracer is exiting , the ptracee can be in any state .
*
* After detach , the ptracee should be in a state which conforms to the
* group stop . If the group is stopped or in the process of stopping , the
* ptracee should be put into TASK_STOPPED ; otherwise , it should be woken
* up from TASK_TRACED .
*
* If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED ,
* it goes through TRACED - > RUNNING - > STOPPED transition which is similar
* to but in the opposite direction of what happens while attaching to a
* stopped task . However , in this direction , the intermediate RUNNING
* state is not hidden even from the current ptracer and if it immediately
* re - attaches and performs a WNOHANG wait ( 2 ) , it may fail .
2011-03-23 10:37:01 +01:00
*
* CONTEXT :
* write_lock_irq ( tasklist_lock )
2005-04-16 15:20:36 -07:00
*/
2006-07-03 00:25:41 -07:00
void __ptrace_unlink ( struct task_struct * child )
2005-04-16 15:20:36 -07:00
{
2006-02-15 22:50:10 +03:00
BUG_ON ( ! child - > ptrace ) ;
2005-04-16 15:20:36 -07:00
child - > ptrace = 0 ;
2008-03-24 18:36:23 -07:00
child - > parent = child - > real_parent ;
list_del_init ( & child - > ptrace_entry ) ;
2005-04-16 15:20:36 -07:00
spin_lock ( & child - > sighand - > siglock ) ;
ptrace: Always put ptracee into appropriate execution state
Currently, __ptrace_unlink() wakes up the tracee iff it's in
TASK_TRACED. For unlinking from PTRACE_DETACH, this is correct as the
tracee is guaranteed to be in TASK_TRACED or dead; however, unlinking
also happens when the ptracer exits and in this case the ptracee can
be in any state and ptrace might be left running even if the group it
belongs to is stopped.
This patch updates __ptrace_unlink() such that GROUP_STOP_PENDING is
reinstated regardless of the ptracee's current state as long as it's
alive and makes sure that signal_wake_up() is called if execution
state transition is necessary.
Test case follows.
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
static const struct timespec ts1s = { .tv_sec = 1 };
int main(void)
{
pid_t tracee;
siginfo_t si;
tracee = fork();
if (tracee == 0) {
while (1) {
nanosleep(&ts1s, NULL);
write(1, ".", 1);
}
}
ptrace(PTRACE_ATTACH, tracee, NULL, NULL);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
write(1, "exiting", 7);
return 0;
}
Before the patch, after the parent process exits, the child is left
running and prints out "." every second.
exiting..... (continues)
After the patch, the group stop initiated by the implied SIGSTOP from
PTRACE_ATTACH is re-established when the parent exits.
exiting
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
2011-03-23 10:37:01 +01:00
/*
* Reinstate GROUP_STOP_PENDING if group stop is in effect and
* @ child isn ' t dead .
*/
if ( ! ( child - > flags & PF_EXITING ) & &
( child - > signal - > flags & SIGNAL_STOP_STOPPED | |
child - > signal - > group_stop_count ) )
child - > group_stop | = GROUP_STOP_PENDING ;
/*
* If transition to TASK_STOPPED is pending or in TASK_TRACED , kick
* @ child in the butt . Note that @ resume should be used iff @ child
* is in TASK_TRACED ; otherwise , we might unduly disrupt
* TASK_KILLABLE sleeps .
*/
if ( child - > group_stop & GROUP_STOP_PENDING | | task_is_traced ( child ) )
signal_wake_up ( child , task_is_traced ( child ) ) ;
2005-04-16 15:20:36 -07:00
spin_unlock ( & child - > sighand - > siglock ) ;
}
/*
* Check that we have indeed attached to the thing . .
*/
int ptrace_check_attach ( struct task_struct * child , int kill )
{
int ret = - ESRCH ;
/*
* We take the read lock around doing both checks to close a
* possible race where someone else was tracing our child and
* detached between these two checks . After this locked check ,
* we are sure that this is our traced child and that can only
* be changed by us so it ' s not changing right after this .
*/
read_lock ( & tasklist_lock ) ;
2008-02-08 04:19:00 -08:00
if ( ( child - > ptrace & PT_PTRACED ) & & child - > parent = = current ) {
/*
* child - > sighand can ' t be NULL , release_task ( )
* does ptrace_unlink ( ) before __exit_signal ( ) .
*/
2005-04-16 15:20:36 -07:00
spin_lock_irq ( & child - > sighand - > siglock ) ;
2011-04-01 20:13:01 +02:00
WARN_ON_ONCE ( task_is_stopped ( child ) ) ;
if ( task_is_traced ( child ) | | kill )
ret = 0 ;
2005-04-16 15:20:36 -07:00
spin_unlock_irq ( & child - > sighand - > siglock ) ;
}
read_unlock ( & tasklist_lock ) ;
2008-02-06 01:36:13 -08:00
if ( ! ret & & ! kill )
2008-07-25 19:45:58 -07:00
ret = wait_task_inactive ( child , TASK_TRACED ) ? 0 : - ESRCH ;
2005-04-16 15:20:36 -07:00
/* All systems go.. */
return ret ;
}
Security: split proc ptrace checking into read vs. attach
Enable security modules to distinguish reading of process state via
proc from full ptrace access by renaming ptrace_may_attach to
ptrace_may_access and adding a mode argument indicating whether only
read access or full attach access is requested. This allows security
modules to permit access to reading process state without granting
full ptrace access. The base DAC/capability checking remains unchanged.
Read access to /proc/pid/mem continues to apply a full ptrace attach
check since check_mem_permission() already requires the current task
to already be ptracing the target. The other ptrace checks within
proc for elements like environ, maps, and fds are changed to pass the
read mode instead of attach.
In the SELinux case, we model such reading of process state as a
reading of a proc file labeled with the target process' label. This
enables SELinux policy to permit such reading of process state without
permitting control or manipulation of the target process, as there are
a number of cases where programs probe for such information via proc
but do not need to be able to control the target (e.g. procps,
lsof, PolicyKit, ConsoleKit). At present we have to choose between
allowing full ptrace in policy (more permissive than required/desired)
or breaking functionality (or in some cases just silencing the denials
via dontaudit rules but this can hide genuine attacks).
This version of the patch incorporates comments from Casey Schaufler
(change/replace existing ptrace_may_attach interface, pass access
mode), and Chris Wright (provide greater consistency in the checking).
Note that like their predecessors __ptrace_may_attach and
ptrace_may_attach, the __ptrace_may_access and ptrace_may_access
interfaces use different return value conventions from each other (0
or -errno vs. 1 or 0). I retained this difference to avoid any
changes to the caller logic but made the difference clearer by
changing the latter interface to return a bool rather than an int and
by adding a comment about it to ptrace.h for any future callers.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-05-19 08:32:49 -04:00
int __ptrace_may_access ( struct task_struct * task , unsigned int mode )
2005-09-06 15:18:24 -07:00
{
2008-11-14 10:39:19 +11:00
const struct cred * cred = current_cred ( ) , * tcred ;
2008-11-14 10:39:16 +11:00
2006-06-26 00:25:59 -07:00
/* May we inspect the given task?
* This check is used both for attaching with ptrace
* and for allowing access to sensitive information in / proc .
*
* ptrace_attach denies several cases that / proc allows
* because setting up the necessary parent / child relationship
* or halting the specified task is impossible .
*/
int dumpable = 0 ;
/* Don't let security modules deny introspection */
if ( task = = current )
return 0 ;
2008-11-14 10:39:19 +11:00
rcu_read_lock ( ) ;
tcred = __task_cred ( task ) ;
2011-03-23 16:43:20 -07:00
if ( cred - > user - > user_ns = = tcred - > user - > user_ns & &
( cred - > uid = = tcred - > euid & &
cred - > uid = = tcred - > suid & &
cred - > uid = = tcred - > uid & &
cred - > gid = = tcred - > egid & &
cred - > gid = = tcred - > sgid & &
cred - > gid = = tcred - > gid ) )
goto ok ;
if ( ns_capable ( tcred - > user - > user_ns , CAP_SYS_PTRACE ) )
goto ok ;
rcu_read_unlock ( ) ;
return - EPERM ;
ok :
2008-11-14 10:39:19 +11:00
rcu_read_unlock ( ) ;
2005-09-06 15:18:24 -07:00
smp_rmb ( ) ;
2006-06-26 00:25:59 -07:00
if ( task - > mm )
2007-07-19 01:48:27 -07:00
dumpable = get_dumpable ( task - > mm ) ;
2011-03-23 16:43:20 -07:00
if ( ! dumpable & & ! task_ns_capable ( task , CAP_SYS_PTRACE ) )
2005-09-06 15:18:24 -07:00
return - EPERM ;
2009-05-07 19:26:19 +10:00
return security_ptrace_access_check ( task , mode ) ;
2005-09-06 15:18:24 -07:00
}
Security: split proc ptrace checking into read vs. attach
Enable security modules to distinguish reading of process state via
proc from full ptrace access by renaming ptrace_may_attach to
ptrace_may_access and adding a mode argument indicating whether only
read access or full attach access is requested. This allows security
modules to permit access to reading process state without granting
full ptrace access. The base DAC/capability checking remains unchanged.
Read access to /proc/pid/mem continues to apply a full ptrace attach
check since check_mem_permission() already requires the current task
to already be ptracing the target. The other ptrace checks within
proc for elements like environ, maps, and fds are changed to pass the
read mode instead of attach.
In the SELinux case, we model such reading of process state as a
reading of a proc file labeled with the target process' label. This
enables SELinux policy to permit such reading of process state without
permitting control or manipulation of the target process, as there are
a number of cases where programs probe for such information via proc
but do not need to be able to control the target (e.g. procps,
lsof, PolicyKit, ConsoleKit). At present we have to choose between
allowing full ptrace in policy (more permissive than required/desired)
or breaking functionality (or in some cases just silencing the denials
via dontaudit rules but this can hide genuine attacks).
This version of the patch incorporates comments from Casey Schaufler
(change/replace existing ptrace_may_attach interface, pass access
mode), and Chris Wright (provide greater consistency in the checking).
Note that like their predecessors __ptrace_may_attach and
ptrace_may_attach, the __ptrace_may_access and ptrace_may_access
interfaces use different return value conventions from each other (0
or -errno vs. 1 or 0). I retained this difference to avoid any
changes to the caller logic but made the difference clearer by
changing the latter interface to return a bool rather than an int and
by adding a comment about it to ptrace.h for any future callers.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-05-19 08:32:49 -04:00
bool ptrace_may_access ( struct task_struct * task , unsigned int mode )
2005-09-06 15:18:24 -07:00
{
int err ;
task_lock ( task ) ;
Security: split proc ptrace checking into read vs. attach
Enable security modules to distinguish reading of process state via
proc from full ptrace access by renaming ptrace_may_attach to
ptrace_may_access and adding a mode argument indicating whether only
read access or full attach access is requested. This allows security
modules to permit access to reading process state without granting
full ptrace access. The base DAC/capability checking remains unchanged.
Read access to /proc/pid/mem continues to apply a full ptrace attach
check since check_mem_permission() already requires the current task
to already be ptracing the target. The other ptrace checks within
proc for elements like environ, maps, and fds are changed to pass the
read mode instead of attach.
In the SELinux case, we model such reading of process state as a
reading of a proc file labeled with the target process' label. This
enables SELinux policy to permit such reading of process state without
permitting control or manipulation of the target process, as there are
a number of cases where programs probe for such information via proc
but do not need to be able to control the target (e.g. procps,
lsof, PolicyKit, ConsoleKit). At present we have to choose between
allowing full ptrace in policy (more permissive than required/desired)
or breaking functionality (or in some cases just silencing the denials
via dontaudit rules but this can hide genuine attacks).
This version of the patch incorporates comments from Casey Schaufler
(change/replace existing ptrace_may_attach interface, pass access
mode), and Chris Wright (provide greater consistency in the checking).
Note that like their predecessors __ptrace_may_attach and
ptrace_may_attach, the __ptrace_may_access and ptrace_may_access
interfaces use different return value conventions from each other (0
or -errno vs. 1 or 0). I retained this difference to avoid any
changes to the caller logic but made the difference clearer by
changing the latter interface to return a bool rather than an int and
by adding a comment about it to ptrace.h for any future callers.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-05-19 08:32:49 -04:00
err = __ptrace_may_access ( task , mode ) ;
2005-09-06 15:18:24 -07:00
task_unlock ( task ) ;
2009-04-07 23:21:06 -07:00
return ! err ;
2005-09-06 15:18:24 -07:00
}
2011-03-04 09:23:30 -08:00
static int ptrace_attach ( struct task_struct * task )
2005-04-16 15:20:36 -07:00
{
2011-03-23 10:37:00 +01:00
bool wait_trap = false ;
2005-04-16 15:20:36 -07:00
int retval ;
2006-05-07 10:49:33 -07:00
2007-03-20 13:58:35 -04:00
audit_ptrace ( task ) ;
2005-04-16 15:20:36 -07:00
retval = - EPERM ;
2009-06-17 16:27:31 -07:00
if ( unlikely ( task - > flags & PF_KTHREAD ) )
goto out ;
2007-10-18 23:40:18 -07:00
if ( same_thread_group ( task , current ) )
2006-05-07 10:49:33 -07:00
goto out ;
2009-06-17 16:27:32 -07:00
/*
* Protect exec ' s credential calculations against our interference ;
2009-05-08 13:55:22 +01:00
* interference ; SUID , SGID and LSM creds get determined differently
* under ptrace .
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
*/
2009-07-05 12:08:26 -07:00
retval = - ERESTARTNOINTR ;
2010-10-27 15:34:08 -07:00
if ( mutex_lock_interruptible ( & task - > signal - > cred_guard_mutex ) )
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 10:39:23 +11:00
goto out ;
2006-05-07 10:49:33 -07:00
2009-06-17 16:27:33 -07:00
task_lock ( task ) ;
Security: split proc ptrace checking into read vs. attach
Enable security modules to distinguish reading of process state via
proc from full ptrace access by renaming ptrace_may_attach to
ptrace_may_access and adding a mode argument indicating whether only
read access or full attach access is requested. This allows security
modules to permit access to reading process state without granting
full ptrace access. The base DAC/capability checking remains unchanged.
Read access to /proc/pid/mem continues to apply a full ptrace attach
check since check_mem_permission() already requires the current task
to already be ptracing the target. The other ptrace checks within
proc for elements like environ, maps, and fds are changed to pass the
read mode instead of attach.
In the SELinux case, we model such reading of process state as a
reading of a proc file labeled with the target process' label. This
enables SELinux policy to permit such reading of process state without
permitting control or manipulation of the target process, as there are
a number of cases where programs probe for such information via proc
but do not need to be able to control the target (e.g. procps,
lsof, PolicyKit, ConsoleKit). At present we have to choose between
allowing full ptrace in policy (more permissive than required/desired)
or breaking functionality (or in some cases just silencing the denials
via dontaudit rules but this can hide genuine attacks).
This version of the patch incorporates comments from Casey Schaufler
(change/replace existing ptrace_may_attach interface, pass access
mode), and Chris Wright (provide greater consistency in the checking).
Note that like their predecessors __ptrace_may_attach and
ptrace_may_attach, the __ptrace_may_access and ptrace_may_access
interfaces use different return value conventions from each other (0
or -errno vs. 1 or 0). I retained this difference to avoid any
changes to the caller logic but made the difference clearer by
changing the latter interface to return a bool rather than an int and
by adding a comment about it to ptrace.h for any future callers.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-05-19 08:32:49 -04:00
retval = __ptrace_may_access ( task , PTRACE_MODE_ATTACH ) ;
2009-06-17 16:27:33 -07:00
task_unlock ( task ) ;
2005-04-16 15:20:36 -07:00
if ( retval )
2009-06-17 16:27:33 -07:00
goto unlock_creds ;
2005-04-16 15:20:36 -07:00
2009-06-17 16:27:33 -07:00
write_lock_irq ( & tasklist_lock ) ;
2009-06-17 16:27:31 -07:00
retval = - EPERM ;
if ( unlikely ( task - > exit_state ) )
2009-06-17 16:27:33 -07:00
goto unlock_tasklist ;
2009-06-17 16:27:32 -07:00
if ( task - > ptrace )
2009-06-17 16:27:33 -07:00
goto unlock_tasklist ;
2009-06-17 16:27:31 -07:00
2009-06-17 16:27:32 -07:00
task - > ptrace = PT_PTRACED ;
2011-03-23 16:43:20 -07:00
if ( task_ns_capable ( task , CAP_SYS_PTRACE ) )
2005-04-16 15:20:36 -07:00
task - > ptrace | = PT_PTRACE_CAP ;
__ptrace_link ( task , current ) ;
2008-04-30 00:53:14 -07:00
send_sig_info ( SIGSTOP , SEND_SIG_FORCED , task ) ;
2009-06-17 16:27:31 -07:00
2011-03-23 10:37:00 +01:00
spin_lock ( & task - > sighand - > siglock ) ;
/*
* If the task is already STOPPED , set GROUP_STOP_PENDING and
* TRAPPING , and kick it so that it transits to TRACED . TRAPPING
* will be cleared if the child completes the transition or any
* event which clears the group stop states happens . We ' ll wait
* for the transition to complete before returning from this
* function .
*
* This hides STOPPED - > RUNNING - > TRACED transition from the
* attaching thread but a different thread in the same group can
* still observe the transient RUNNING state . IOW , if another
* thread ' s WNOHANG wait ( 2 ) on the stopped tracee races against
* ATTACH , the wait ( 2 ) may fail due to the transient RUNNING .
*
* The following task_is_stopped ( ) test is safe as both transitions
* in and out of STOPPED are protected by siglock .
*/
if ( task_is_stopped ( task ) ) {
task - > group_stop | = GROUP_STOP_PENDING | GROUP_STOP_TRAPPING ;
signal_wake_up ( task , 1 ) ;
wait_trap = true ;
}
spin_unlock ( & task - > sighand - > siglock ) ;
2009-06-17 16:27:31 -07:00
retval = 0 ;
2009-06-17 16:27:33 -07:00
unlock_tasklist :
write_unlock_irq ( & tasklist_lock ) ;
unlock_creds :
2010-10-27 15:34:08 -07:00
mutex_unlock ( & task - > signal - > cred_guard_mutex ) ;
2006-05-07 10:49:33 -07:00
out :
2011-03-23 10:37:00 +01:00
if ( wait_trap )
wait_event ( current - > signal - > wait_chldexit ,
! ( task - > group_stop & GROUP_STOP_TRAPPING ) ) ;
2005-04-16 15:20:36 -07:00
return retval ;
}
2009-06-17 16:27:32 -07:00
/**
* ptrace_traceme - - helper for PTRACE_TRACEME
*
* Performs checks and sets PT_PTRACED .
* Should be used by all ptrace implementations for PTRACE_TRACEME .
*/
2011-03-04 09:23:30 -08:00
static int ptrace_traceme ( void )
2009-06-17 16:27:32 -07:00
{
int ret = - EPERM ;
2009-06-17 16:27:33 -07:00
write_lock_irq ( & tasklist_lock ) ;
/* Are we already being traced? */
2009-06-17 16:27:32 -07:00
if ( ! current - > ptrace ) {
ret = security_ptrace_traceme ( current - > parent ) ;
/*
* Check PF_EXITING to ensure - > real_parent has not passed
* exit_ptrace ( ) . Otherwise we don ' t report the error but
* pretend - > real_parent untraces us right after return .
*/
if ( ! ret & & ! ( current - > real_parent - > flags & PF_EXITING ) ) {
current - > ptrace = PT_PTRACED ;
__ptrace_link ( current , current - > real_parent ) ;
}
}
2009-06-17 16:27:33 -07:00
write_unlock_irq ( & tasklist_lock ) ;
2009-06-17 16:27:32 -07:00
return ret ;
}
2009-04-02 16:58:18 -07:00
/*
* Called with irqs disabled , returns true if childs should reap themselves .
*/
static int ignoring_children ( struct sighand_struct * sigh )
{
int ret ;
spin_lock ( & sigh - > siglock ) ;
ret = ( sigh - > action [ SIGCHLD - 1 ] . sa . sa_handler = = SIG_IGN ) | |
( sigh - > action [ SIGCHLD - 1 ] . sa . sa_flags & SA_NOCLDWAIT ) ;
spin_unlock ( & sigh - > siglock ) ;
return ret ;
}
/*
* Called with tasklist_lock held for writing .
* Unlink a traced task , and clean it up if it was a traced zombie .
* Return true if it needs to be reaped with release_task ( ) .
* ( We can ' t call release_task ( ) here because we already hold tasklist_lock . )
*
* If it ' s a zombie , our attachedness prevented normal parent notification
* or self - reaping . Do notification now if it would have happened earlier .
* If it should reap itself , return true .
*
ptrace: __ptrace_detach: do __wake_up_parent() if we reap the tracee
The bug is old, it wasn't cause by recent changes.
Test case:
static void *tfunc(void *arg)
{
int pid = (long)arg;
assert(ptrace(PTRACE_ATTACH, pid, NULL, NULL) == 0);
kill(pid, SIGKILL);
sleep(1);
return NULL;
}
int main(void)
{
pthread_t th;
long pid = fork();
if (!pid)
pause();
signal(SIGCHLD, SIG_IGN);
assert(pthread_create(&th, NULL, tfunc, (void*)pid) == 0);
int r = waitpid(-1, NULL, __WNOTHREAD);
printf("waitpid: %d %m\n", r);
return 0;
}
Before the patch this program hangs, after this patch waitpid() correctly
fails with errno == -ECHILD.
The problem is, __ptrace_detach() reaps the EXIT_ZOMBIE tracee if its
->real_parent is our sub-thread and we ignore SIGCHLD. But in this case
we should wake up other threads which can sleep in do_wait().
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Vitaly Mayatskikh <vmayatsk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-09-23 15:56:44 -07:00
* If it ' s our own child , there is no notification to do . But if our normal
* children self - reap , then this child was prevented by ptrace and we must
* reap it now , in that case we must also wake up sub - threads sleeping in
* do_wait ( ) .
2009-04-02 16:58:18 -07:00
*/
static bool __ptrace_detach ( struct task_struct * tracer , struct task_struct * p )
{
__ptrace_unlink ( p ) ;
if ( p - > exit_state = = EXIT_ZOMBIE ) {
if ( ! task_detached ( p ) & & thread_group_empty ( p ) ) {
if ( ! same_thread_group ( p - > real_parent , tracer ) )
do_notify_parent ( p , p - > exit_signal ) ;
ptrace: __ptrace_detach: do __wake_up_parent() if we reap the tracee
The bug is old, it wasn't cause by recent changes.
Test case:
static void *tfunc(void *arg)
{
int pid = (long)arg;
assert(ptrace(PTRACE_ATTACH, pid, NULL, NULL) == 0);
kill(pid, SIGKILL);
sleep(1);
return NULL;
}
int main(void)
{
pthread_t th;
long pid = fork();
if (!pid)
pause();
signal(SIGCHLD, SIG_IGN);
assert(pthread_create(&th, NULL, tfunc, (void*)pid) == 0);
int r = waitpid(-1, NULL, __WNOTHREAD);
printf("waitpid: %d %m\n", r);
return 0;
}
Before the patch this program hangs, after this patch waitpid() correctly
fails with errno == -ECHILD.
The problem is, __ptrace_detach() reaps the EXIT_ZOMBIE tracee if its
->real_parent is our sub-thread and we ignore SIGCHLD. But in this case
we should wake up other threads which can sleep in do_wait().
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Vitaly Mayatskikh <vmayatsk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-09-23 15:56:44 -07:00
else if ( ignoring_children ( tracer - > sighand ) ) {
__wake_up_parent ( p , tracer ) ;
2009-04-02 16:58:18 -07:00
p - > exit_signal = - 1 ;
ptrace: __ptrace_detach: do __wake_up_parent() if we reap the tracee
The bug is old, it wasn't cause by recent changes.
Test case:
static void *tfunc(void *arg)
{
int pid = (long)arg;
assert(ptrace(PTRACE_ATTACH, pid, NULL, NULL) == 0);
kill(pid, SIGKILL);
sleep(1);
return NULL;
}
int main(void)
{
pthread_t th;
long pid = fork();
if (!pid)
pause();
signal(SIGCHLD, SIG_IGN);
assert(pthread_create(&th, NULL, tfunc, (void*)pid) == 0);
int r = waitpid(-1, NULL, __WNOTHREAD);
printf("waitpid: %d %m\n", r);
return 0;
}
Before the patch this program hangs, after this patch waitpid() correctly
fails with errno == -ECHILD.
The problem is, __ptrace_detach() reaps the EXIT_ZOMBIE tracee if its
->real_parent is our sub-thread and we ignore SIGCHLD. But in this case
we should wake up other threads which can sleep in do_wait().
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Vitaly Mayatskikh <vmayatsk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-09-23 15:56:44 -07:00
}
2009-04-02 16:58:18 -07:00
}
if ( task_detached ( p ) ) {
/* Mark it as in the process of being reaped. */
p - > exit_state = EXIT_DEAD ;
return true ;
}
}
return false ;
}
2011-03-04 09:23:30 -08:00
static int ptrace_detach ( struct task_struct * child , unsigned int data )
2005-04-16 15:20:36 -07:00
{
2009-04-02 16:58:18 -07:00
bool dead = false ;
2009-04-02 16:58:14 -07:00
2005-05-01 08:59:14 -07:00
if ( ! valid_signal ( data ) )
2006-02-15 22:50:10 +03:00
return - EIO ;
2005-04-16 15:20:36 -07:00
/* Architecture-specific hardware disable .. */
ptrace_disable ( child ) ;
2007-09-05 03:05:56 -07:00
clear_tsk_thread_flag ( child , TIF_SYSCALL_TRACE ) ;
2005-04-16 15:20:36 -07:00
2009-04-02 16:58:11 -07:00
write_lock_irq ( & tasklist_lock ) ;
2009-04-02 16:58:18 -07:00
/*
* This child can be already killed . Make sure de_thread ( ) or
* our sub - thread doing do_wait ( ) didn ' t do release_task ( ) yet .
*/
2009-04-02 16:58:11 -07:00
if ( child - > ptrace ) {
child - > exit_code = data ;
2009-04-02 16:58:14 -07:00
dead = __ptrace_detach ( current , child ) ;
2009-04-02 16:58:11 -07:00
}
2005-04-16 15:20:36 -07:00
write_unlock_irq ( & tasklist_lock ) ;
2009-04-02 16:58:14 -07:00
if ( unlikely ( dead ) )
release_task ( child ) ;
2005-04-16 15:20:36 -07:00
return 0 ;
}
2009-04-02 16:58:18 -07:00
/*
2010-08-10 18:03:07 -07:00
* Detach all tasks we were using ptrace on . Called with tasklist held
* for writing , and returns with it held too . But note it can release
* and reacquire the lock .
2009-04-02 16:58:18 -07:00
*/
void exit_ptrace ( struct task_struct * tracer )
2010-10-27 15:33:44 -07:00
__releases ( & tasklist_lock )
__acquires ( & tasklist_lock )
2009-04-02 16:58:18 -07:00
{
struct task_struct * p , * n ;
LIST_HEAD ( ptrace_dead ) ;
2010-08-10 18:03:07 -07:00
if ( likely ( list_empty ( & tracer - > ptraced ) ) )
return ;
2009-04-02 16:58:18 -07:00
list_for_each_entry_safe ( p , n , & tracer - > ptraced , ptrace_entry ) {
if ( __ptrace_detach ( tracer , p ) )
list_add ( & p - > ptrace_entry , & ptrace_dead ) ;
}
2010-08-10 18:03:07 -07:00
write_unlock_irq ( & tasklist_lock ) ;
2009-04-02 16:58:18 -07:00
BUG_ON ( ! list_empty ( & tracer - > ptraced ) ) ;
list_for_each_entry_safe ( p , n , & ptrace_dead , ptrace_entry ) {
list_del_init ( & p - > ptrace_entry ) ;
release_task ( p ) ;
}
2010-08-10 18:03:07 -07:00
write_lock_irq ( & tasklist_lock ) ;
2009-04-02 16:58:18 -07:00
}
2005-04-16 15:20:36 -07:00
int ptrace_readdata ( struct task_struct * tsk , unsigned long src , char __user * dst , int len )
{
int copied = 0 ;
while ( len > 0 ) {
char buf [ 128 ] ;
int this_len , retval ;
this_len = ( len > sizeof ( buf ) ) ? sizeof ( buf ) : len ;
retval = access_process_vm ( tsk , src , buf , this_len , 0 ) ;
if ( ! retval ) {
if ( copied )
break ;
return - EIO ;
}
if ( copy_to_user ( dst , buf , retval ) )
return - EFAULT ;
copied + = retval ;
src + = retval ;
dst + = retval ;
2009-04-07 23:21:06 -07:00
len - = retval ;
2005-04-16 15:20:36 -07:00
}
return copied ;
}
int ptrace_writedata ( struct task_struct * tsk , char __user * src , unsigned long dst , int len )
{
int copied = 0 ;
while ( len > 0 ) {
char buf [ 128 ] ;
int this_len , retval ;
this_len = ( len > sizeof ( buf ) ) ? sizeof ( buf ) : len ;
if ( copy_from_user ( buf , src , this_len ) )
return - EFAULT ;
retval = access_process_vm ( tsk , dst , buf , this_len , 1 ) ;
if ( ! retval ) {
if ( copied )
break ;
return - EIO ;
}
copied + = retval ;
src + = retval ;
dst + = retval ;
2009-04-07 23:21:06 -07:00
len - = retval ;
2005-04-16 15:20:36 -07:00
}
return copied ;
}
2010-10-27 15:33:45 -07:00
static int ptrace_setoptions ( struct task_struct * child , unsigned long data )
2005-04-16 15:20:36 -07:00
{
child - > ptrace & = ~ PT_TRACE_MASK ;
if ( data & PTRACE_O_TRACESYSGOOD )
child - > ptrace | = PT_TRACESYSGOOD ;
if ( data & PTRACE_O_TRACEFORK )
child - > ptrace | = PT_TRACE_FORK ;
if ( data & PTRACE_O_TRACEVFORK )
child - > ptrace | = PT_TRACE_VFORK ;
if ( data & PTRACE_O_TRACECLONE )
child - > ptrace | = PT_TRACE_CLONE ;
if ( data & PTRACE_O_TRACEEXEC )
child - > ptrace | = PT_TRACE_EXEC ;
if ( data & PTRACE_O_TRACEVFORKDONE )
child - > ptrace | = PT_TRACE_VFORK_DONE ;
if ( data & PTRACE_O_TRACEEXIT )
child - > ptrace | = PT_TRACE_EXIT ;
return ( data & ~ PTRACE_O_MASK ) ? - EINVAL : 0 ;
}
2008-04-20 13:10:12 -07:00
static int ptrace_getsiginfo ( struct task_struct * child , siginfo_t * info )
2005-04-16 15:20:36 -07:00
{
2009-06-17 16:27:36 -07:00
unsigned long flags ;
2005-04-16 15:20:36 -07:00
int error = - ESRCH ;
2009-06-17 16:27:36 -07:00
if ( lock_task_sighand ( child , & flags ) ) {
2005-04-16 15:20:36 -07:00
error = - EINVAL ;
if ( likely ( child - > last_siginfo ! = NULL ) ) {
2008-04-20 13:10:12 -07:00
* info = * child - > last_siginfo ;
2005-04-16 15:20:36 -07:00
error = 0 ;
}
2009-06-17 16:27:36 -07:00
unlock_task_sighand ( child , & flags ) ;
2005-04-16 15:20:36 -07:00
}
return error ;
}
2008-04-20 13:10:12 -07:00
static int ptrace_setsiginfo ( struct task_struct * child , const siginfo_t * info )
2005-04-16 15:20:36 -07:00
{
2009-06-17 16:27:36 -07:00
unsigned long flags ;
2005-04-16 15:20:36 -07:00
int error = - ESRCH ;
2009-06-17 16:27:36 -07:00
if ( lock_task_sighand ( child , & flags ) ) {
2005-04-16 15:20:36 -07:00
error = - EINVAL ;
if ( likely ( child - > last_siginfo ! = NULL ) ) {
2008-04-20 13:10:12 -07:00
* child - > last_siginfo = * info ;
2005-04-16 15:20:36 -07:00
error = 0 ;
}
2009-06-17 16:27:36 -07:00
unlock_task_sighand ( child , & flags ) ;
2005-04-16 15:20:36 -07:00
}
return error ;
}
2008-01-30 13:30:51 +01:00
# ifdef PTRACE_SINGLESTEP
# define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
# else
# define is_singlestep(request) 0
# endif
2008-01-30 13:30:53 +01:00
# ifdef PTRACE_SINGLEBLOCK
# define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK)
# else
# define is_singleblock(request) 0
# endif
2008-01-30 13:30:51 +01:00
# ifdef PTRACE_SYSEMU
# define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP)
# else
# define is_sysemu_singlestep(request) 0
# endif
2010-10-27 15:33:45 -07:00
static int ptrace_resume ( struct task_struct * child , long request ,
unsigned long data )
2008-01-30 13:30:51 +01:00
{
if ( ! valid_signal ( data ) )
return - EIO ;
if ( request = = PTRACE_SYSCALL )
set_tsk_thread_flag ( child , TIF_SYSCALL_TRACE ) ;
else
clear_tsk_thread_flag ( child , TIF_SYSCALL_TRACE ) ;
# ifdef TIF_SYSCALL_EMU
if ( request = = PTRACE_SYSEMU | | request = = PTRACE_SYSEMU_SINGLESTEP )
set_tsk_thread_flag ( child , TIF_SYSCALL_EMU ) ;
else
clear_tsk_thread_flag ( child , TIF_SYSCALL_EMU ) ;
# endif
2008-01-30 13:30:53 +01:00
if ( is_singleblock ( request ) ) {
if ( unlikely ( ! arch_has_block_step ( ) ) )
return - EIO ;
user_enable_block_step ( child ) ;
} else if ( is_singlestep ( request ) | | is_sysemu_singlestep ( request ) ) {
2008-01-30 13:30:51 +01:00
if ( unlikely ( ! arch_has_single_step ( ) ) )
return - EIO ;
user_enable_single_step ( child ) ;
2009-04-07 23:21:06 -07:00
} else {
2008-01-30 13:30:51 +01:00
user_disable_single_step ( child ) ;
2009-04-07 23:21:06 -07:00
}
2008-01-30 13:30:51 +01:00
child - > exit_code = data ;
wake_up_process ( child ) ;
return 0 ;
}
2010-02-11 11:51:00 -08:00
# ifdef CONFIG_HAVE_ARCH_TRACEHOOK
static const struct user_regset *
find_regset ( const struct user_regset_view * view , unsigned int type )
{
const struct user_regset * regset ;
int n ;
for ( n = 0 ; n < view - > n ; + + n ) {
regset = view - > regsets + n ;
if ( regset - > core_note_type = = type )
return regset ;
}
return NULL ;
}
static int ptrace_regset ( struct task_struct * task , int req , unsigned int type ,
struct iovec * kiov )
{
const struct user_regset_view * view = task_user_regset_view ( task ) ;
const struct user_regset * regset = find_regset ( view , type ) ;
int regset_no ;
if ( ! regset | | ( kiov - > iov_len % regset - > size ) ! = 0 )
2010-02-22 14:51:32 -08:00
return - EINVAL ;
2010-02-11 11:51:00 -08:00
regset_no = regset - view - > regsets ;
kiov - > iov_len = min ( kiov - > iov_len ,
( __kernel_size_t ) ( regset - > n * regset - > size ) ) ;
if ( req = = PTRACE_GETREGSET )
return copy_regset_to_user ( task , view , regset_no , 0 ,
kiov - > iov_len , kiov - > iov_base ) ;
else
return copy_regset_from_user ( task , view , regset_no , 0 ,
kiov - > iov_len , kiov - > iov_base ) ;
}
# endif
2005-04-16 15:20:36 -07:00
int ptrace_request ( struct task_struct * child , long request ,
2010-10-27 15:33:45 -07:00
unsigned long addr , unsigned long data )
2005-04-16 15:20:36 -07:00
{
int ret = - EIO ;
2008-04-20 13:10:12 -07:00
siginfo_t siginfo ;
2010-10-27 15:33:46 -07:00
void __user * datavp = ( void __user * ) data ;
unsigned long __user * datalp = datavp ;
2005-04-16 15:20:36 -07:00
switch ( request ) {
2008-01-30 13:31:47 +01:00
case PTRACE_PEEKTEXT :
case PTRACE_PEEKDATA :
return generic_ptrace_peekdata ( child , addr , data ) ;
case PTRACE_POKETEXT :
case PTRACE_POKEDATA :
return generic_ptrace_pokedata ( child , addr , data ) ;
2005-04-16 15:20:36 -07:00
# ifdef PTRACE_OLDSETOPTIONS
case PTRACE_OLDSETOPTIONS :
# endif
case PTRACE_SETOPTIONS :
ret = ptrace_setoptions ( child , data ) ;
break ;
case PTRACE_GETEVENTMSG :
2010-10-27 15:33:46 -07:00
ret = put_user ( child - > ptrace_message , datalp ) ;
2005-04-16 15:20:36 -07:00
break ;
2008-04-20 13:10:12 -07:00
2005-04-16 15:20:36 -07:00
case PTRACE_GETSIGINFO :
2008-04-20 13:10:12 -07:00
ret = ptrace_getsiginfo ( child , & siginfo ) ;
if ( ! ret )
2010-10-27 15:33:46 -07:00
ret = copy_siginfo_to_user ( datavp , & siginfo ) ;
2005-04-16 15:20:36 -07:00
break ;
2008-04-20 13:10:12 -07:00
2005-04-16 15:20:36 -07:00
case PTRACE_SETSIGINFO :
2010-10-27 15:33:46 -07:00
if ( copy_from_user ( & siginfo , datavp , sizeof siginfo ) )
2008-04-20 13:10:12 -07:00
ret = - EFAULT ;
else
ret = ptrace_setsiginfo ( child , & siginfo ) ;
2005-04-16 15:20:36 -07:00
break ;
2008-04-20 13:10:12 -07:00
2007-10-16 01:23:45 -07:00
case PTRACE_DETACH : /* detach a process that was attached. */
ret = ptrace_detach ( child , data ) ;
break ;
2008-01-30 13:30:51 +01:00
2010-05-26 14:42:52 -07:00
# ifdef CONFIG_BINFMT_ELF_FDPIC
case PTRACE_GETFDPIC : {
2010-05-26 14:42:53 -07:00
struct mm_struct * mm = get_task_mm ( child ) ;
2010-05-26 14:42:52 -07:00
unsigned long tmp = 0 ;
2010-05-26 14:42:53 -07:00
ret = - ESRCH ;
if ( ! mm )
break ;
2010-05-26 14:42:52 -07:00
switch ( addr ) {
case PTRACE_GETFDPIC_EXEC :
2010-05-26 14:42:53 -07:00
tmp = mm - > context . exec_fdpic_loadmap ;
2010-05-26 14:42:52 -07:00
break ;
case PTRACE_GETFDPIC_INTERP :
2010-05-26 14:42:53 -07:00
tmp = mm - > context . interp_fdpic_loadmap ;
2010-05-26 14:42:52 -07:00
break ;
default :
break ;
}
2010-05-26 14:42:53 -07:00
mmput ( mm ) ;
2010-05-26 14:42:52 -07:00
2010-10-27 15:33:46 -07:00
ret = put_user ( tmp , datalp ) ;
2010-05-26 14:42:52 -07:00
break ;
}
# endif
2008-01-30 13:30:51 +01:00
# ifdef PTRACE_SINGLESTEP
case PTRACE_SINGLESTEP :
# endif
2008-01-30 13:30:53 +01:00
# ifdef PTRACE_SINGLEBLOCK
case PTRACE_SINGLEBLOCK :
# endif
2008-01-30 13:30:51 +01:00
# ifdef PTRACE_SYSEMU
case PTRACE_SYSEMU :
case PTRACE_SYSEMU_SINGLESTEP :
# endif
case PTRACE_SYSCALL :
case PTRACE_CONT :
return ptrace_resume ( child , request , data ) ;
case PTRACE_KILL :
if ( child - > exit_state ) /* already dead */
return 0 ;
return ptrace_resume ( child , request , SIGKILL ) ;
2010-02-11 11:51:00 -08:00
# ifdef CONFIG_HAVE_ARCH_TRACEHOOK
case PTRACE_GETREGSET :
case PTRACE_SETREGSET :
{
struct iovec kiov ;
2010-10-27 15:33:46 -07:00
struct iovec __user * uiov = datavp ;
2010-02-11 11:51:00 -08:00
if ( ! access_ok ( VERIFY_WRITE , uiov , sizeof ( * uiov ) ) )
return - EFAULT ;
if ( __get_user ( kiov . iov_base , & uiov - > iov_base ) | |
__get_user ( kiov . iov_len , & uiov - > iov_len ) )
return - EFAULT ;
ret = ptrace_regset ( child , request , addr , & kiov ) ;
if ( ! ret )
ret = __put_user ( kiov . iov_len , & uiov - > iov_len ) ;
break ;
}
# endif
2005-04-16 15:20:36 -07:00
default :
break ;
}
return ret ;
}
2005-11-07 00:59:47 -08:00
2009-06-17 16:27:34 -07:00
static struct task_struct * ptrace_get_task_struct ( pid_t pid )
2006-01-08 01:02:33 -08:00
{
struct task_struct * child ;
2005-11-07 00:59:47 -08:00
2009-06-17 16:27:34 -07:00
rcu_read_lock ( ) ;
2007-10-18 23:40:16 -07:00
child = find_task_by_vpid ( pid ) ;
2005-11-07 00:59:47 -08:00
if ( child )
get_task_struct ( child ) ;
2009-06-17 16:27:34 -07:00
rcu_read_unlock ( ) ;
2006-09-29 02:00:07 -07:00
2005-11-07 00:59:47 -08:00
if ( ! child )
2006-01-08 01:02:33 -08:00
return ERR_PTR ( - ESRCH ) ;
return child ;
2005-11-07 00:59:47 -08:00
}
2007-10-16 01:26:37 -07:00
# ifndef arch_ptrace_attach
# define arch_ptrace_attach(child) do { } while (0)
# endif
2010-10-27 15:33:45 -07:00
SYSCALL_DEFINE4 ( ptrace , long , request , long , pid , unsigned long , addr ,
unsigned long , data )
2005-11-07 00:59:47 -08:00
{
struct task_struct * child ;
long ret ;
2006-01-08 01:02:33 -08:00
if ( request = = PTRACE_TRACEME ) {
ret = ptrace_traceme ( ) ;
2007-11-27 13:02:40 +01:00
if ( ! ret )
arch_ptrace_attach ( current ) ;
2005-11-07 00:59:47 -08:00
goto out ;
2006-01-08 01:02:33 -08:00
}
child = ptrace_get_task_struct ( pid ) ;
if ( IS_ERR ( child ) ) {
ret = PTR_ERR ( child ) ;
goto out ;
}
2005-11-07 00:59:47 -08:00
if ( request = = PTRACE_ATTACH ) {
ret = ptrace_attach ( child ) ;
2007-10-16 01:26:37 -07:00
/*
* Some architectures need to do book - keeping after
* a ptrace attach .
*/
if ( ! ret )
arch_ptrace_attach ( child ) ;
2005-11-13 16:06:33 -08:00
goto out_put_task_struct ;
2005-11-07 00:59:47 -08:00
}
ret = ptrace_check_attach ( child , request = = PTRACE_KILL ) ;
if ( ret < 0 )
goto out_put_task_struct ;
ret = arch_ptrace ( child , request , addr , data ) ;
out_put_task_struct :
put_task_struct ( child ) ;
out :
return ret ;
}
2007-07-17 04:03:43 -07:00
2010-10-27 15:33:45 -07:00
int generic_ptrace_peekdata ( struct task_struct * tsk , unsigned long addr ,
unsigned long data )
2007-07-17 04:03:43 -07:00
{
unsigned long tmp ;
int copied ;
copied = access_process_vm ( tsk , addr , & tmp , sizeof ( tmp ) , 0 ) ;
if ( copied ! = sizeof ( tmp ) )
return - EIO ;
return put_user ( tmp , ( unsigned long __user * ) data ) ;
}
2007-07-17 04:03:44 -07:00
2010-10-27 15:33:45 -07:00
int generic_ptrace_pokedata ( struct task_struct * tsk , unsigned long addr ,
unsigned long data )
2007-07-17 04:03:44 -07:00
{
int copied ;
copied = access_process_vm ( tsk , addr , & data , sizeof ( data ) , 1 ) ;
return ( copied = = sizeof ( data ) ) ? 0 : - EIO ;
}
2008-01-30 13:31:47 +01:00
2008-11-25 08:10:03 +01:00
# if defined CONFIG_COMPAT
2008-01-30 13:31:47 +01:00
# include <linux/compat.h>
int compat_ptrace_request ( struct task_struct * child , compat_long_t request ,
compat_ulong_t addr , compat_ulong_t data )
{
compat_ulong_t __user * datap = compat_ptr ( data ) ;
compat_ulong_t word ;
2008-04-20 13:10:12 -07:00
siginfo_t siginfo ;
2008-01-30 13:31:47 +01:00
int ret ;
switch ( request ) {
case PTRACE_PEEKTEXT :
case PTRACE_PEEKDATA :
ret = access_process_vm ( child , addr , & word , sizeof ( word ) , 0 ) ;
if ( ret ! = sizeof ( word ) )
ret = - EIO ;
else
ret = put_user ( word , datap ) ;
break ;
case PTRACE_POKETEXT :
case PTRACE_POKEDATA :
ret = access_process_vm ( child , addr , & data , sizeof ( data ) , 1 ) ;
ret = ( ret ! = sizeof ( data ) ? - EIO : 0 ) ;
break ;
case PTRACE_GETEVENTMSG :
ret = put_user ( ( compat_ulong_t ) child - > ptrace_message , datap ) ;
break ;
2008-04-20 13:10:12 -07:00
case PTRACE_GETSIGINFO :
ret = ptrace_getsiginfo ( child , & siginfo ) ;
if ( ! ret )
ret = copy_siginfo_to_user32 (
( struct compat_siginfo __user * ) datap ,
& siginfo ) ;
break ;
case PTRACE_SETSIGINFO :
memset ( & siginfo , 0 , sizeof siginfo ) ;
if ( copy_siginfo_from_user32 (
& siginfo , ( struct compat_siginfo __user * ) datap ) )
ret = - EFAULT ;
else
ret = ptrace_setsiginfo ( child , & siginfo ) ;
break ;
2010-02-11 11:51:00 -08:00
# ifdef CONFIG_HAVE_ARCH_TRACEHOOK
case PTRACE_GETREGSET :
case PTRACE_SETREGSET :
{
struct iovec kiov ;
struct compat_iovec __user * uiov =
( struct compat_iovec __user * ) datap ;
compat_uptr_t ptr ;
compat_size_t len ;
if ( ! access_ok ( VERIFY_WRITE , uiov , sizeof ( * uiov ) ) )
return - EFAULT ;
if ( __get_user ( ptr , & uiov - > iov_base ) | |
__get_user ( len , & uiov - > iov_len ) )
return - EFAULT ;
kiov . iov_base = compat_ptr ( ptr ) ;
kiov . iov_len = len ;
ret = ptrace_regset ( child , request , addr , & kiov ) ;
if ( ! ret )
ret = __put_user ( kiov . iov_len , & uiov - > iov_len ) ;
break ;
}
# endif
2008-04-20 13:10:12 -07:00
2008-01-30 13:31:47 +01:00
default :
ret = ptrace_request ( child , request , addr , data ) ;
}
return ret ;
}
2008-01-30 13:31:48 +01:00
asmlinkage long compat_sys_ptrace ( compat_long_t request , compat_long_t pid ,
compat_long_t addr , compat_long_t data )
{
struct task_struct * child ;
long ret ;
if ( request = = PTRACE_TRACEME ) {
ret = ptrace_traceme ( ) ;
goto out ;
}
child = ptrace_get_task_struct ( pid ) ;
if ( IS_ERR ( child ) ) {
ret = PTR_ERR ( child ) ;
goto out ;
}
if ( request = = PTRACE_ATTACH ) {
ret = ptrace_attach ( child ) ;
/*
* Some architectures need to do book - keeping after
* a ptrace attach .
*/
if ( ! ret )
arch_ptrace_attach ( child ) ;
goto out_put_task_struct ;
}
ret = ptrace_check_attach ( child , request = = PTRACE_KILL ) ;
if ( ! ret )
ret = compat_arch_ptrace ( child , request , addr , data ) ;
out_put_task_struct :
put_task_struct ( child ) ;
out :
return ret ;
}
2008-11-25 08:10:03 +01:00
# endif /* CONFIG_COMPAT */
2011-04-07 16:53:20 +02:00
# ifdef CONFIG_HAVE_HW_BREAKPOINT
int ptrace_get_breakpoints ( struct task_struct * tsk )
{
if ( atomic_inc_not_zero ( & tsk - > ptrace_bp_refcnt ) )
return 0 ;
return - 1 ;
}
void ptrace_put_breakpoints ( struct task_struct * tsk )
{
if ( atomic_dec_and_test ( & tsk - > ptrace_bp_refcnt ) )
flush_ptrace_hw_breakpoint ( tsk ) ;
}
# endif /* CONFIG_HAVE_HW_BREAKPOINT */