2019-05-19 15:08:55 +03:00
// SPDX-License-Identifier: GPL-2.0-only
2005-04-17 02:20:36 +04:00
/*
* linux / kernel / ptrace . c
*
* ( C ) Copyright 1999 Linus Torvalds
*
* Common interfaces for " ptrace() " which we do not want
* to continually duplicate across every architecture .
*/
2006-01-11 23:17:46 +03:00
# include <linux/capability.h>
2011-05-23 22:51:41 +04:00
# include <linux/export.h>
2005-04-17 02:20:36 +04:00
# include <linux/sched.h>
2017-02-08 20:51:29 +03:00
# include <linux/sched/mm.h>
2017-02-08 20:51:30 +03:00
# include <linux/sched/coredump.h>
2017-02-08 20:51:36 +03:00
# include <linux/sched/task.h>
2005-04-17 02:20:36 +04:00
# include <linux/errno.h>
# include <linux/mm.h>
# include <linux/highmem.h>
# include <linux/pagemap.h>
# include <linux/ptrace.h>
# include <linux/security.h>
2005-05-01 19:59:14 +04:00
# include <linux/signal.h>
2013-05-08 03:19:08 +04:00
# include <linux/uio.h>
2007-03-20 20:58:35 +03:00
# include <linux/audit.h>
2007-10-19 10:40:14 +04:00
# include <linux/pid_namespace.h>
2008-02-06 12:36:44 +03:00
# include <linux/syscalls.h>
2009-04-08 10:21:06 +04:00
# include <linux/uaccess.h>
2010-02-11 22:51:00 +03:00
# include <linux/regset.h>
2011-04-07 18:53:20 +04:00
# include <linux/hw_breakpoint.h>
2011-07-15 21:45:18 +04:00
# include <linux/cn_proc.h>
2013-05-01 02:27:59 +04:00
# include <linux/compat.h>
2019-03-29 06:44:13 +03:00
# include <linux/sched/signal.h>
2005-04-17 02:20:36 +04:00
ptrace: add PTRACE_GET_SYSCALL_INFO request
PTRACE_GET_SYSCALL_INFO is a generic ptrace API that lets ptracer obtain
details of the syscall the tracee is blocked in.
There are two reasons for a special syscall-related ptrace request.
Firstly, with the current ptrace API there are cases when ptracer cannot
retrieve necessary information about syscalls. Some examples include:
* The notorious int-0x80-from-64-bit-task issue. See [1] for details.
In short, if a 64-bit task performs a syscall through int 0x80, its
tracer has no reliable means to find out that the syscall was, in
fact, a compat syscall, and misidentifies it.
* Syscall-enter-stop and syscall-exit-stop look the same for the
tracer. Common practice is to keep track of the sequence of
ptrace-stops in order not to mix the two syscall-stops up. But it is
not as simple as it looks; for example, strace had a (just recently
fixed) long-standing bug where attaching strace to a tracee that is
performing the execve system call led to the tracer identifying the
following syscall-exit-stop as syscall-enter-stop, which messed up
all the state tracking.
* Since the introduction of commit 84d77d3f06e7 ("ptrace: Don't allow
accessing an undumpable mm"), both PTRACE_PEEKDATA and
process_vm_readv become unavailable when the process dumpable flag is
cleared. On such architectures as ia64 this results in all syscall
arguments being unavailable for the tracer.
Secondly, ptracers also have to support a lot of arch-specific code for
obtaining information about the tracee. For some architectures, this
requires a ptrace(PTRACE_PEEKUSER, ...) invocation for every syscall
argument and return value.
ptrace(2) man page:
long ptrace(enum __ptrace_request request, pid_t pid,
void *addr, void *data);
...
PTRACE_GET_SYSCALL_INFO
Retrieve information about the syscall that caused the stop.
The information is placed into the buffer pointed by "data"
argument, which should be a pointer to a buffer of type
"struct ptrace_syscall_info".
The "addr" argument contains the size of the buffer pointed to
by "data" argument (i.e., sizeof(struct ptrace_syscall_info)).
The return value contains the number of bytes available
to be written by the kernel.
If the size of data to be written by the kernel exceeds the size
specified by "addr" argument, the output is truncated.
[ldv@altlinux.org: selftests/seccomp/seccomp_bpf: update for PTRACE_GET_SYSCALL_INFO]
Link: http://lkml.kernel.org/r/20190708182904.GA12332@altlinux.org
Link: http://lkml.kernel.org/r/20190510152842.GF28558@altlinux.org
Signed-off-by: Elvira Khabirova <lineprinter@altlinux.org>
Co-developed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Eugene Syromyatnikov <esyr@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greentime Hu <greentime@andestech.com>
Cc: Helge Deller <deller@gmx.de> [parisc]
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: kbuild test robot <lkp@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vincent Chen <deanbo422@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 02:29:42 +03:00
# include <asm/syscall.h> /* for syscall_get_* */
2016-11-22 21:06:50 +03:00
/*
* Access another process ' address space via ptrace .
* Source / target buffer must be kernel space ,
* Do not walk the page table directly , use get_user_pages
*/
int ptrace_access_vm ( struct task_struct * tsk , unsigned long addr ,
void * buf , int len , unsigned int gup_flags )
{
struct mm_struct * mm ;
int ret ;
mm = get_task_mm ( tsk ) ;
if ( ! mm )
return 0 ;
if ( ! tsk - > ptrace | |
( current ! = tsk - > parent ) | |
( ( get_dumpable ( mm ) ! = SUID_DUMP_USER ) & &
! ptracer_capable ( tsk , mm - > user_ns ) ) ) {
mmput ( mm ) ;
return 0 ;
}
ret = __access_remote_vm ( tsk , mm , addr , buf , len , gup_flags ) ;
mmput ( mm ) ;
return ret ;
}
2008-12-19 17:10:24 +03:00
2017-05-22 23:40:12 +03:00
void __ptrace_link ( struct task_struct * child , struct task_struct * new_parent ,
const struct cred * ptracer_cred )
{
BUG_ON ( ! list_empty ( & child - > ptrace_entry ) ) ;
list_add ( & child - > ptrace_entry , & new_parent - > ptraced ) ;
child - > parent = new_parent ;
child - > ptracer_cred = get_cred ( ptracer_cred ) ;
}
2005-04-17 02:20:36 +04:00
/*
* ptrace a task : make the debugger its new parent and
* move it to the ptrace list .
*
* Must be called with the tasklist lock write - held .
*/
2017-05-22 23:40:12 +03:00
static void ptrace_link ( struct task_struct * child , struct task_struct * new_parent )
2005-04-17 02:20:36 +04:00
{
ptrace: Fix ->ptracer_cred handling for PTRACE_TRACEME
Fix two issues:
When called for PTRACE_TRACEME, ptrace_link() would obtain an RCU
reference to the parent's objective credentials, then give that pointer
to get_cred(). However, the object lifetime rules for things like
struct cred do not permit unconditionally turning an RCU reference into
a stable reference.
PTRACE_TRACEME records the parent's credentials as if the parent was
acting as the subject, but that's not the case. If a malicious
unprivileged child uses PTRACE_TRACEME and the parent is privileged, and
at a later point, the parent process becomes attacker-controlled
(because it drops privileges and calls execve()), the attacker ends up
with control over two processes with a privileged ptrace relationship,
which can be abused to ptrace a suid binary and obtain root privileges.
Fix both of these by always recording the credentials of the process
that is requesting the creation of the ptrace relationship:
current_cred() can't change under us, and current is the proper subject
for access control.
This change is theoretically userspace-visible, but I am not aware of
any code that it will actually break.
Fixes: 64b875f7ac8a ("ptrace: Capture the ptracer's creds not PT_PTRACE_CAP")
Signed-off-by: Jann Horn <jannh@google.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-04 18:32:23 +03:00
__ptrace_link ( child , new_parent , current_cred ( ) ) ;
2005-04-17 02:20:36 +04:00
}
2009-04-08 10:21:06 +04:00
2011-03-23 12:37:01 +03:00
/**
* __ptrace_unlink - unlink ptracee and restore its execution state
* @ child : ptracee to be unlinked
2005-04-17 02:20:36 +04:00
*
ptrace: Always put ptracee into appropriate execution state
Currently, __ptrace_unlink() wakes up the tracee iff it's in
TASK_TRACED. For unlinking from PTRACE_DETACH, this is correct as the
tracee is guaranteed to be in TASK_TRACED or dead; however, unlinking
also happens when the ptracer exits and in this case the ptracee can
be in any state and ptrace might be left running even if the group it
belongs to is stopped.
This patch updates __ptrace_unlink() such that GROUP_STOP_PENDING is
reinstated regardless of the ptracee's current state as long as it's
alive and makes sure that signal_wake_up() is called if execution
state transition is necessary.
Test case follows.
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
static const struct timespec ts1s = { .tv_sec = 1 };
int main(void)
{
pid_t tracee;
siginfo_t si;
tracee = fork();
if (tracee == 0) {
while (1) {
nanosleep(&ts1s, NULL);
write(1, ".", 1);
}
}
ptrace(PTRACE_ATTACH, tracee, NULL, NULL);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
write(1, "exiting", 7);
return 0;
}
Before the patch, after the parent process exits, the child is left
running and prints out "." every second.
exiting..... (continues)
After the patch, the group stop initiated by the implied SIGSTOP from
PTRACE_ATTACH is re-established when the parent exits.
exiting
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
2011-03-23 12:37:01 +03:00
* Remove @ child from the ptrace list , move it back to the original parent ,
* and restore the execution state so that it conforms to the group stop
* state .
*
* Unlinking can happen via two paths - explicit PTRACE_DETACH or ptracer
* exiting . For PTRACE_DETACH , unless the ptracee has been killed between
* ptrace_check_attach ( ) and here , it ' s guaranteed to be in TASK_TRACED .
* If the ptracer is exiting , the ptracee can be in any state .
*
* After detach , the ptracee should be in a state which conforms to the
* group stop . If the group is stopped or in the process of stopping , the
* ptracee should be put into TASK_STOPPED ; otherwise , it should be woken
* up from TASK_TRACED .
*
* If the ptracee is in TASK_TRACED and needs to be moved to TASK_STOPPED ,
* it goes through TRACED - > RUNNING - > STOPPED transition which is similar
* to but in the opposite direction of what happens while attaching to a
* stopped task . However , in this direction , the intermediate RUNNING
* state is not hidden even from the current ptracer and if it immediately
* re - attaches and performs a WNOHANG wait ( 2 ) , it may fail .
2011-03-23 12:37:01 +03:00
*
* CONTEXT :
* write_lock_irq ( tasklist_lock )
2005-04-17 02:20:36 +04:00
*/
2006-07-03 11:25:41 +04:00
void __ptrace_unlink ( struct task_struct * child )
2005-04-17 02:20:36 +04:00
{
2016-11-15 03:48:07 +03:00
const struct cred * old_cred ;
2006-02-15 22:50:10 +03:00
BUG_ON ( ! child - > ptrace ) ;
2016-10-11 23:53:46 +03:00
clear_tsk_thread_flag ( child , TIF_SYSCALL_TRACE ) ;
2019-05-23 12:06:15 +03:00
# ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag ( child , TIF_SYSCALL_EMU ) ;
# endif
2016-10-11 23:53:46 +03:00
2008-03-25 04:36:23 +03:00
child - > parent = child - > real_parent ;
list_del_init ( & child - > ptrace_entry ) ;
2016-11-15 03:48:07 +03:00
old_cred = child - > ptracer_cred ;
child - > ptracer_cred = NULL ;
put_cred ( old_cred ) ;
2005-04-17 02:20:36 +04:00
spin_lock ( & child - > sighand - > siglock ) ;
2016-03-23 00:25:33 +03:00
child - > ptrace = 0 ;
2011-06-14 13:20:14 +04:00
/*
* Clear all pending traps and TRAPPING . TRAPPING should be
* cleared regardless of JOBCTL_STOP_PENDING . Do it explicitly .
*/
task_clear_jobctl_pending ( child , JOBCTL_TRAP_MASK ) ;
task_clear_jobctl_trapping ( child ) ;
ptrace: Always put ptracee into appropriate execution state
Currently, __ptrace_unlink() wakes up the tracee iff it's in
TASK_TRACED. For unlinking from PTRACE_DETACH, this is correct as the
tracee is guaranteed to be in TASK_TRACED or dead; however, unlinking
also happens when the ptracer exits and in this case the ptracee can
be in any state and ptrace might be left running even if the group it
belongs to is stopped.
This patch updates __ptrace_unlink() such that GROUP_STOP_PENDING is
reinstated regardless of the ptracee's current state as long as it's
alive and makes sure that signal_wake_up() is called if execution
state transition is necessary.
Test case follows.
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
static const struct timespec ts1s = { .tv_sec = 1 };
int main(void)
{
pid_t tracee;
siginfo_t si;
tracee = fork();
if (tracee == 0) {
while (1) {
nanosleep(&ts1s, NULL);
write(1, ".", 1);
}
}
ptrace(PTRACE_ATTACH, tracee, NULL, NULL);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
write(1, "exiting", 7);
return 0;
}
Before the patch, after the parent process exits, the child is left
running and prints out "." every second.
exiting..... (continues)
After the patch, the group stop initiated by the implied SIGSTOP from
PTRACE_ATTACH is re-established when the parent exits.
exiting
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
2011-03-23 12:37:01 +03:00
/*
2011-06-02 13:13:59 +04:00
* Reinstate JOBCTL_STOP_PENDING if group stop is in effect and
ptrace: Always put ptracee into appropriate execution state
Currently, __ptrace_unlink() wakes up the tracee iff it's in
TASK_TRACED. For unlinking from PTRACE_DETACH, this is correct as the
tracee is guaranteed to be in TASK_TRACED or dead; however, unlinking
also happens when the ptracer exits and in this case the ptracee can
be in any state and ptrace might be left running even if the group it
belongs to is stopped.
This patch updates __ptrace_unlink() such that GROUP_STOP_PENDING is
reinstated regardless of the ptracee's current state as long as it's
alive and makes sure that signal_wake_up() is called if execution
state transition is necessary.
Test case follows.
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
static const struct timespec ts1s = { .tv_sec = 1 };
int main(void)
{
pid_t tracee;
siginfo_t si;
tracee = fork();
if (tracee == 0) {
while (1) {
nanosleep(&ts1s, NULL);
write(1, ".", 1);
}
}
ptrace(PTRACE_ATTACH, tracee, NULL, NULL);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
write(1, "exiting", 7);
return 0;
}
Before the patch, after the parent process exits, the child is left
running and prints out "." every second.
exiting..... (continues)
After the patch, the group stop initiated by the implied SIGSTOP from
PTRACE_ATTACH is re-established when the parent exits.
exiting
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
2011-03-23 12:37:01 +03:00
* @ child isn ' t dead .
*/
if ( ! ( child - > flags & PF_EXITING ) & &
( child - > signal - > flags & SIGNAL_STOP_STOPPED | |
2012-01-04 20:29:20 +04:00
child - > signal - > group_stop_count ) ) {
2011-06-02 13:13:59 +04:00
child - > jobctl | = JOBCTL_STOP_PENDING ;
ptrace: Always put ptracee into appropriate execution state
Currently, __ptrace_unlink() wakes up the tracee iff it's in
TASK_TRACED. For unlinking from PTRACE_DETACH, this is correct as the
tracee is guaranteed to be in TASK_TRACED or dead; however, unlinking
also happens when the ptracer exits and in this case the ptracee can
be in any state and ptrace might be left running even if the group it
belongs to is stopped.
This patch updates __ptrace_unlink() such that GROUP_STOP_PENDING is
reinstated regardless of the ptracee's current state as long as it's
alive and makes sure that signal_wake_up() is called if execution
state transition is necessary.
Test case follows.
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
static const struct timespec ts1s = { .tv_sec = 1 };
int main(void)
{
pid_t tracee;
siginfo_t si;
tracee = fork();
if (tracee == 0) {
while (1) {
nanosleep(&ts1s, NULL);
write(1, ".", 1);
}
}
ptrace(PTRACE_ATTACH, tracee, NULL, NULL);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
write(1, "exiting", 7);
return 0;
}
Before the patch, after the parent process exits, the child is left
running and prints out "." every second.
exiting..... (continues)
After the patch, the group stop initiated by the implied SIGSTOP from
PTRACE_ATTACH is re-established when the parent exits.
exiting
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
2011-03-23 12:37:01 +03:00
2012-01-04 20:29:20 +04:00
/*
* This is only possible if this thread was cloned by the
* traced task running in the stopped group , set the signal
* for the future reports .
* FIXME : we should change ptrace_init_task ( ) to handle this
* case .
*/
if ( ! ( child - > jobctl & JOBCTL_STOP_SIGMASK ) )
child - > jobctl | = SIGSTOP ;
}
ptrace: Always put ptracee into appropriate execution state
Currently, __ptrace_unlink() wakes up the tracee iff it's in
TASK_TRACED. For unlinking from PTRACE_DETACH, this is correct as the
tracee is guaranteed to be in TASK_TRACED or dead; however, unlinking
also happens when the ptracer exits and in this case the ptracee can
be in any state and ptrace might be left running even if the group it
belongs to is stopped.
This patch updates __ptrace_unlink() such that GROUP_STOP_PENDING is
reinstated regardless of the ptracee's current state as long as it's
alive and makes sure that signal_wake_up() is called if execution
state transition is necessary.
Test case follows.
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
static const struct timespec ts1s = { .tv_sec = 1 };
int main(void)
{
pid_t tracee;
siginfo_t si;
tracee = fork();
if (tracee == 0) {
while (1) {
nanosleep(&ts1s, NULL);
write(1, ".", 1);
}
}
ptrace(PTRACE_ATTACH, tracee, NULL, NULL);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
write(1, "exiting", 7);
return 0;
}
Before the patch, after the parent process exits, the child is left
running and prints out "." every second.
exiting..... (continues)
After the patch, the group stop initiated by the implied SIGSTOP from
PTRACE_ATTACH is re-established when the parent exits.
exiting
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
2011-03-23 12:37:01 +03:00
/*
* If transition to TASK_STOPPED is pending or in TASK_TRACED , kick
* @ child in the butt . Note that @ resume should be used iff @ child
* is in TASK_TRACED ; otherwise , we might unduly disrupt
* TASK_KILLABLE sleeps .
*/
2011-06-02 13:13:59 +04:00
if ( child - > jobctl & JOBCTL_STOP_PENDING | | task_is_traced ( child ) )
2013-01-21 23:47:41 +04:00
ptrace_signal_wake_up ( child , true ) ;
ptrace: Always put ptracee into appropriate execution state
Currently, __ptrace_unlink() wakes up the tracee iff it's in
TASK_TRACED. For unlinking from PTRACE_DETACH, this is correct as the
tracee is guaranteed to be in TASK_TRACED or dead; however, unlinking
also happens when the ptracer exits and in this case the ptracee can
be in any state and ptrace might be left running even if the group it
belongs to is stopped.
This patch updates __ptrace_unlink() such that GROUP_STOP_PENDING is
reinstated regardless of the ptracee's current state as long as it's
alive and makes sure that signal_wake_up() is called if execution
state transition is necessary.
Test case follows.
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
static const struct timespec ts1s = { .tv_sec = 1 };
int main(void)
{
pid_t tracee;
siginfo_t si;
tracee = fork();
if (tracee == 0) {
while (1) {
nanosleep(&ts1s, NULL);
write(1, ".", 1);
}
}
ptrace(PTRACE_ATTACH, tracee, NULL, NULL);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
waitid(P_PID, tracee, &si, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status);
write(1, "exiting", 7);
return 0;
}
Before the patch, after the parent process exits, the child is left
running and prints out "." every second.
exiting..... (continues)
After the patch, the group stop initiated by the implied SIGSTOP from
PTRACE_ATTACH is re-established when the parent exits.
exiting
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
2011-03-23 12:37:01 +03:00
2005-04-17 02:20:36 +04:00
spin_unlock ( & child - > sighand - > siglock ) ;
}
2013-01-21 23:48:00 +04:00
/* Ensure that nothing can wake it up, even SIGKILL */
static bool ptrace_freeze_traced ( struct task_struct * task )
{
bool ret = false ;
/* Lockless, nobody but us can set this flag */
if ( task - > jobctl & JOBCTL_LISTENING )
return ret ;
spin_lock_irq ( & task - > sighand - > siglock ) ;
if ( task_is_traced ( task ) & & ! __fatal_signal_pending ( task ) ) {
task - > state = __TASK_TRACED ;
ret = true ;
}
spin_unlock_irq ( & task - > sighand - > siglock ) ;
return ret ;
}
static void ptrace_unfreeze_traced ( struct task_struct * task )
{
if ( task - > state ! = __TASK_TRACED )
return ;
WARN_ON ( ! task - > ptrace | | task - > parent ! = current ) ;
2017-04-08 02:04:51 +03:00
/*
* PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely .
* Recheck state under the lock to close this race .
*/
2013-01-21 23:48:00 +04:00
spin_lock_irq ( & task - > sighand - > siglock ) ;
2017-04-08 02:04:51 +03:00
if ( task - > state = = __TASK_TRACED ) {
if ( __fatal_signal_pending ( task ) )
wake_up_state ( task , __TASK_TRACED ) ;
else
task - > state = TASK_TRACED ;
}
2013-01-21 23:48:00 +04:00
spin_unlock_irq ( & task - > sighand - > siglock ) ;
}
2011-06-02 13:13:59 +04:00
/**
* ptrace_check_attach - check whether ptracee is ready for ptrace operation
* @ child : ptracee to check for
* @ ignore_state : don ' t check whether @ child is currently % TASK_TRACED
*
* Check whether @ child is being ptraced by % current and ready for further
* ptrace operations . If @ ignore_state is % false , @ child also should be in
* % TASK_TRACED state and on return the child is guaranteed to be traced
* and not executing . If @ ignore_state is % true , @ child can be in any
* state .
*
* CONTEXT :
* Grabs and releases tasklist_lock and @ child - > sighand - > siglock .
*
* RETURNS :
* 0 on success , - ESRCH if % child is not ready .
2005-04-17 02:20:36 +04:00
*/
2013-01-20 23:25:47 +04:00
static int ptrace_check_attach ( struct task_struct * child , bool ignore_state )
2005-04-17 02:20:36 +04:00
{
int ret = - ESRCH ;
/*
* We take the read lock around doing both checks to close a
* possible race where someone else was tracing our child and
* detached between these two checks . After this locked check ,
* we are sure that this is our traced child and that can only
* be changed by us so it ' s not changing right after this .
*/
read_lock ( & tasklist_lock ) ;
2013-01-21 23:48:00 +04:00
if ( child - > ptrace & & child - > parent = = current ) {
WARN_ON ( child - > state = = __TASK_TRACED ) ;
2008-02-08 15:19:00 +03:00
/*
* child - > sighand can ' t be NULL , release_task ( )
* does ptrace_unlink ( ) before __exit_signal ( ) .
*/
2013-01-21 23:48:00 +04:00
if ( ignore_state | | ptrace_freeze_traced ( child ) )
2011-04-01 22:13:01 +04:00
ret = 0 ;
2005-04-17 02:20:36 +04:00
}
read_unlock ( & tasklist_lock ) ;
2013-01-21 23:48:00 +04:00
if ( ! ret & & ! ignore_state ) {
if ( ! wait_task_inactive ( child , __TASK_TRACED ) ) {
/*
* This can only happen if may_ptrace_stop ( ) fails and
* ptrace_stop ( ) changes - > state back to TASK_RUNNING ,
* so we should not worry about leaking __TASK_TRACED .
*/
WARN_ON ( child - > state = = __TASK_TRACED ) ;
ret = - ESRCH ;
}
}
2005-04-17 02:20:36 +04:00
return ret ;
}
2012-01-03 21:25:15 +04:00
static int ptrace_has_cap ( struct user_namespace * ns , unsigned int mode )
{
if ( mode & PTRACE_MODE_NOAUDIT )
return has_ns_capability_noaudit ( current , ns , CAP_SYS_PTRACE ) ;
else
return has_ns_capability ( current , ns , CAP_SYS_PTRACE ) ;
}
2012-07-31 15:37:00 +04:00
/* Returns 0 on success, -errno on denial. */
static int __ptrace_may_access ( struct task_struct * task , unsigned int mode )
2005-09-07 02:18:24 +04:00
{
2008-11-14 02:39:19 +03:00
const struct cred * cred = current_cred ( ) , * tcred ;
2016-10-14 05:23:16 +03:00
struct mm_struct * mm ;
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21 02:00:04 +03:00
kuid_t caller_uid ;
kgid_t caller_gid ;
if ( ! ( mode & PTRACE_MODE_FSCREDS ) = = ! ( mode & PTRACE_MODE_REALCREDS ) ) {
WARN ( 1 , " denying ptrace access check without PTRACE_MODE_*CREDS \n " ) ;
return - EPERM ;
}
2008-11-14 02:39:16 +03:00
2006-06-26 11:25:59 +04:00
/* May we inspect the given task?
* This check is used both for attaching with ptrace
* and for allowing access to sensitive information in / proc .
*
* ptrace_attach denies several cases that / proc allows
* because setting up the necessary parent / child relationship
* or halting the specified task is impossible .
*/
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21 02:00:04 +03:00
2006-06-26 11:25:59 +04:00
/* Don't let security modules deny introspection */
2013-09-12 01:24:31 +04:00
if ( same_thread_group ( task , current ) )
2006-06-26 11:25:59 +04:00
return 0 ;
2008-11-14 02:39:19 +03:00
rcu_read_lock ( ) ;
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21 02:00:04 +03:00
if ( mode & PTRACE_MODE_FSCREDS ) {
caller_uid = cred - > fsuid ;
caller_gid = cred - > fsgid ;
} else {
/*
* Using the euid would make more sense here , but something
* in userland might rely on the old behavior , and this
* shouldn ' t be a security problem since
* PTRACE_MODE_REALCREDS implies that the caller explicitly
* used a syscall that requests access to another process
* ( and not a filesystem syscall to procfs ) .
*/
caller_uid = cred - > uid ;
caller_gid = cred - > gid ;
}
2008-11-14 02:39:19 +03:00
tcred = __task_cred ( task ) ;
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21 02:00:04 +03:00
if ( uid_eq ( caller_uid , tcred - > euid ) & &
uid_eq ( caller_uid , tcred - > suid ) & &
uid_eq ( caller_uid , tcred - > uid ) & &
gid_eq ( caller_gid , tcred - > egid ) & &
gid_eq ( caller_gid , tcred - > sgid ) & &
gid_eq ( caller_gid , tcred - > gid ) )
2011-03-24 02:43:20 +03:00
goto ok ;
2011-11-17 11:15:31 +04:00
if ( ptrace_has_cap ( tcred - > user_ns , mode ) )
2011-03-24 02:43:20 +03:00
goto ok ;
rcu_read_unlock ( ) ;
return - EPERM ;
ok :
2008-11-14 02:39:19 +03:00
rcu_read_unlock ( ) ;
2019-05-29 14:31:57 +03:00
/*
* If a task drops privileges and becomes nondumpable ( through a syscall
* like setresuid ( ) ) while we are trying to access it , we must ensure
* that the dumpability is read after the credentials ; otherwise ,
* we may be able to attach to a task that we shouldn ' t be able to
* attach to ( as if the task had dropped privileges without becoming
* nondumpable ) .
* Pairs with a write barrier in commit_creds ( ) .
*/
smp_rmb ( ) ;
2016-10-14 05:23:16 +03:00
mm = task - > mm ;
if ( mm & &
( ( get_dumpable ( mm ) ! = SUID_DUMP_USER ) & &
! ptrace_has_cap ( mm - > user_ns , mode ) ) )
return - EPERM ;
2005-09-07 02:18:24 +04:00
2009-05-07 13:26:19 +04:00
return security_ptrace_access_check ( task , mode ) ;
2005-09-07 02:18:24 +04:00
}
Security: split proc ptrace checking into read vs. attach
Enable security modules to distinguish reading of process state via
proc from full ptrace access by renaming ptrace_may_attach to
ptrace_may_access and adding a mode argument indicating whether only
read access or full attach access is requested. This allows security
modules to permit access to reading process state without granting
full ptrace access. The base DAC/capability checking remains unchanged.
Read access to /proc/pid/mem continues to apply a full ptrace attach
check since check_mem_permission() already requires the current task
to already be ptracing the target. The other ptrace checks within
proc for elements like environ, maps, and fds are changed to pass the
read mode instead of attach.
In the SELinux case, we model such reading of process state as a
reading of a proc file labeled with the target process' label. This
enables SELinux policy to permit such reading of process state without
permitting control or manipulation of the target process, as there are
a number of cases where programs probe for such information via proc
but do not need to be able to control the target (e.g. procps,
lsof, PolicyKit, ConsoleKit). At present we have to choose between
allowing full ptrace in policy (more permissive than required/desired)
or breaking functionality (or in some cases just silencing the denials
via dontaudit rules but this can hide genuine attacks).
This version of the patch incorporates comments from Casey Schaufler
(change/replace existing ptrace_may_attach interface, pass access
mode), and Chris Wright (provide greater consistency in the checking).
Note that like their predecessors __ptrace_may_attach and
ptrace_may_attach, the __ptrace_may_access and ptrace_may_access
interfaces use different return value conventions from each other (0
or -errno vs. 1 or 0). I retained this difference to avoid any
changes to the caller logic but made the difference clearer by
changing the latter interface to return a bool rather than an int and
by adding a comment about it to ptrace.h for any future callers.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-05-19 16:32:49 +04:00
bool ptrace_may_access ( struct task_struct * task , unsigned int mode )
2005-09-07 02:18:24 +04:00
{
int err ;
task_lock ( task ) ;
Security: split proc ptrace checking into read vs. attach
Enable security modules to distinguish reading of process state via
proc from full ptrace access by renaming ptrace_may_attach to
ptrace_may_access and adding a mode argument indicating whether only
read access or full attach access is requested. This allows security
modules to permit access to reading process state without granting
full ptrace access. The base DAC/capability checking remains unchanged.
Read access to /proc/pid/mem continues to apply a full ptrace attach
check since check_mem_permission() already requires the current task
to already be ptracing the target. The other ptrace checks within
proc for elements like environ, maps, and fds are changed to pass the
read mode instead of attach.
In the SELinux case, we model such reading of process state as a
reading of a proc file labeled with the target process' label. This
enables SELinux policy to permit such reading of process state without
permitting control or manipulation of the target process, as there are
a number of cases where programs probe for such information via proc
but do not need to be able to control the target (e.g. procps,
lsof, PolicyKit, ConsoleKit). At present we have to choose between
allowing full ptrace in policy (more permissive than required/desired)
or breaking functionality (or in some cases just silencing the denials
via dontaudit rules but this can hide genuine attacks).
This version of the patch incorporates comments from Casey Schaufler
(change/replace existing ptrace_may_attach interface, pass access
mode), and Chris Wright (provide greater consistency in the checking).
Note that like their predecessors __ptrace_may_attach and
ptrace_may_attach, the __ptrace_may_access and ptrace_may_access
interfaces use different return value conventions from each other (0
or -errno vs. 1 or 0). I retained this difference to avoid any
changes to the caller logic but made the difference clearer by
changing the latter interface to return a bool rather than an int and
by adding a comment about it to ptrace.h for any future callers.
Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-05-19 16:32:49 +04:00
err = __ptrace_may_access ( task , mode ) ;
2005-09-07 02:18:24 +04:00
task_unlock ( task ) ;
2009-04-08 10:21:06 +04:00
return ! err ;
2005-09-07 02:18:24 +04:00
}
ptrace: implement PTRACE_SEIZE
PTRACE_ATTACH implicitly issues SIGSTOP on attach which has side
effects on tracee signal and job control states. This patch
implements a new ptrace request PTRACE_SEIZE which attaches a tracee
without trapping it or affecting its signal and job control states.
The usage is the same with PTRACE_ATTACH but it takes PTRACE_SEIZE_*
flags in @data. Currently, the only defined flag is
PTRACE_SEIZE_DEVEL which is a temporary flag to enable PTRACE_SEIZE.
PTRACE_SEIZE will change ptrace behaviors outside of attach itself.
The changes will be implemented gradually and the DEVEL flag is to
prevent programs which expect full SEIZE behavior from using it before
all the behavior modifications are complete while allowing unit
testing. The flag will be removed once SEIZE behaviors are completely
implemented.
* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap. After
attaching tracee continues to run unless a trap condition occurs.
* PTRACE_SEIZE doesn't affect signal or group stop state.
* If PTRACE_SEIZE'd, group stop uses PTRACE_EVENT_STOP trap which uses
exit_code of (signr | PTRACE_EVENT_STOP << 8) where signr is one of
the stopping signals if group stop is in effect or SIGTRAP
otherwise, and returns usual trap siginfo on PTRACE_GETSIGINFO
instead of NULL.
Seizing sets PT_SEIZED in ->ptrace of the tracee. This flag will be
used to determine whether new SEIZE behaviors should be enabled.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive\n");
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
return 0;
}
When the above program is called w/o argument, tracee is seized while
running and remains running. When tracer exits, tracee continues to
run and print out messages.
# ./test-seize-simple
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
tracee: alive
tracee: alive
When called with an argument, tracee is seized from stopped state and
continued, and returns to stopped state when tracer exits.
# ./test-seize
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
# ps -el|grep test-seize
1 T 0 4720 1 0 80 0 - 941 signal ttyS0 00:00:00 test-seize
-v2: SEIZE doesn't schedule TRAP_STOP and leaves tracee running as Jan
suggested.
-v3: PTRACE_EVENT_STOP traps now report group stop state by signr. If
group stop is in effect the stop signal number is returned as
part of exit_code; otherwise, SIGTRAP. This was suggested by
Denys and Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:15 +04:00
static int ptrace_attach ( struct task_struct * task , long request ,
2012-03-24 02:02:42 +04:00
unsigned long addr ,
ptrace: implement PTRACE_SEIZE
PTRACE_ATTACH implicitly issues SIGSTOP on attach which has side
effects on tracee signal and job control states. This patch
implements a new ptrace request PTRACE_SEIZE which attaches a tracee
without trapping it or affecting its signal and job control states.
The usage is the same with PTRACE_ATTACH but it takes PTRACE_SEIZE_*
flags in @data. Currently, the only defined flag is
PTRACE_SEIZE_DEVEL which is a temporary flag to enable PTRACE_SEIZE.
PTRACE_SEIZE will change ptrace behaviors outside of attach itself.
The changes will be implemented gradually and the DEVEL flag is to
prevent programs which expect full SEIZE behavior from using it before
all the behavior modifications are complete while allowing unit
testing. The flag will be removed once SEIZE behaviors are completely
implemented.
* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap. After
attaching tracee continues to run unless a trap condition occurs.
* PTRACE_SEIZE doesn't affect signal or group stop state.
* If PTRACE_SEIZE'd, group stop uses PTRACE_EVENT_STOP trap which uses
exit_code of (signr | PTRACE_EVENT_STOP << 8) where signr is one of
the stopping signals if group stop is in effect or SIGTRAP
otherwise, and returns usual trap siginfo on PTRACE_GETSIGINFO
instead of NULL.
Seizing sets PT_SEIZED in ->ptrace of the tracee. This flag will be
used to determine whether new SEIZE behaviors should be enabled.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive\n");
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
return 0;
}
When the above program is called w/o argument, tracee is seized while
running and remains running. When tracer exits, tracee continues to
run and print out messages.
# ./test-seize-simple
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
tracee: alive
tracee: alive
When called with an argument, tracee is seized from stopped state and
continued, and returns to stopped state when tracer exits.
# ./test-seize
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
# ps -el|grep test-seize
1 T 0 4720 1 0 80 0 - 941 signal ttyS0 00:00:00 test-seize
-v2: SEIZE doesn't schedule TRAP_STOP and leaves tracee running as Jan
suggested.
-v3: PTRACE_EVENT_STOP traps now report group stop state by signr. If
group stop is in effect the stop signal number is returned as
part of exit_code; otherwise, SIGTRAP. This was suggested by
Denys and Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:15 +04:00
unsigned long flags )
2005-04-17 02:20:36 +04:00
{
ptrace: implement PTRACE_SEIZE
PTRACE_ATTACH implicitly issues SIGSTOP on attach which has side
effects on tracee signal and job control states. This patch
implements a new ptrace request PTRACE_SEIZE which attaches a tracee
without trapping it or affecting its signal and job control states.
The usage is the same with PTRACE_ATTACH but it takes PTRACE_SEIZE_*
flags in @data. Currently, the only defined flag is
PTRACE_SEIZE_DEVEL which is a temporary flag to enable PTRACE_SEIZE.
PTRACE_SEIZE will change ptrace behaviors outside of attach itself.
The changes will be implemented gradually and the DEVEL flag is to
prevent programs which expect full SEIZE behavior from using it before
all the behavior modifications are complete while allowing unit
testing. The flag will be removed once SEIZE behaviors are completely
implemented.
* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap. After
attaching tracee continues to run unless a trap condition occurs.
* PTRACE_SEIZE doesn't affect signal or group stop state.
* If PTRACE_SEIZE'd, group stop uses PTRACE_EVENT_STOP trap which uses
exit_code of (signr | PTRACE_EVENT_STOP << 8) where signr is one of
the stopping signals if group stop is in effect or SIGTRAP
otherwise, and returns usual trap siginfo on PTRACE_GETSIGINFO
instead of NULL.
Seizing sets PT_SEIZED in ->ptrace of the tracee. This flag will be
used to determine whether new SEIZE behaviors should be enabled.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive\n");
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
return 0;
}
When the above program is called w/o argument, tracee is seized while
running and remains running. When tracer exits, tracee continues to
run and print out messages.
# ./test-seize-simple
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
tracee: alive
tracee: alive
When called with an argument, tracee is seized from stopped state and
continued, and returns to stopped state when tracer exits.
# ./test-seize
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
# ps -el|grep test-seize
1 T 0 4720 1 0 80 0 - 941 signal ttyS0 00:00:00 test-seize
-v2: SEIZE doesn't schedule TRAP_STOP and leaves tracee running as Jan
suggested.
-v3: PTRACE_EVENT_STOP traps now report group stop state by signr. If
group stop is in effect the stop signal number is returned as
part of exit_code; otherwise, SIGTRAP. This was suggested by
Denys and Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:15 +04:00
bool seize = ( request = = PTRACE_SEIZE ) ;
2005-04-17 02:20:36 +04:00
int retval ;
2006-05-07 21:49:33 +04:00
ptrace: implement PTRACE_SEIZE
PTRACE_ATTACH implicitly issues SIGSTOP on attach which has side
effects on tracee signal and job control states. This patch
implements a new ptrace request PTRACE_SEIZE which attaches a tracee
without trapping it or affecting its signal and job control states.
The usage is the same with PTRACE_ATTACH but it takes PTRACE_SEIZE_*
flags in @data. Currently, the only defined flag is
PTRACE_SEIZE_DEVEL which is a temporary flag to enable PTRACE_SEIZE.
PTRACE_SEIZE will change ptrace behaviors outside of attach itself.
The changes will be implemented gradually and the DEVEL flag is to
prevent programs which expect full SEIZE behavior from using it before
all the behavior modifications are complete while allowing unit
testing. The flag will be removed once SEIZE behaviors are completely
implemented.
* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap. After
attaching tracee continues to run unless a trap condition occurs.
* PTRACE_SEIZE doesn't affect signal or group stop state.
* If PTRACE_SEIZE'd, group stop uses PTRACE_EVENT_STOP trap which uses
exit_code of (signr | PTRACE_EVENT_STOP << 8) where signr is one of
the stopping signals if group stop is in effect or SIGTRAP
otherwise, and returns usual trap siginfo on PTRACE_GETSIGINFO
instead of NULL.
Seizing sets PT_SEIZED in ->ptrace of the tracee. This flag will be
used to determine whether new SEIZE behaviors should be enabled.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive\n");
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
return 0;
}
When the above program is called w/o argument, tracee is seized while
running and remains running. When tracer exits, tracee continues to
run and print out messages.
# ./test-seize-simple
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
tracee: alive
tracee: alive
When called with an argument, tracee is seized from stopped state and
continued, and returns to stopped state when tracer exits.
# ./test-seize
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
# ps -el|grep test-seize
1 T 0 4720 1 0 80 0 - 941 signal ttyS0 00:00:00 test-seize
-v2: SEIZE doesn't schedule TRAP_STOP and leaves tracee running as Jan
suggested.
-v3: PTRACE_EVENT_STOP traps now report group stop state by signr. If
group stop is in effect the stop signal number is returned as
part of exit_code; otherwise, SIGTRAP. This was suggested by
Denys and Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:15 +04:00
retval = - EIO ;
2012-03-24 02:02:42 +04:00
if ( seize ) {
if ( addr ! = 0 )
goto out ;
if ( flags & ~ ( unsigned long ) PTRACE_O_MASK )
goto out ;
flags = PT_PTRACED | PT_SEIZED | ( flags < < PT_OPT_FLAG_SHIFT ) ;
} else {
flags = PT_PTRACED ;
}
ptrace: implement PTRACE_SEIZE
PTRACE_ATTACH implicitly issues SIGSTOP on attach which has side
effects on tracee signal and job control states. This patch
implements a new ptrace request PTRACE_SEIZE which attaches a tracee
without trapping it or affecting its signal and job control states.
The usage is the same with PTRACE_ATTACH but it takes PTRACE_SEIZE_*
flags in @data. Currently, the only defined flag is
PTRACE_SEIZE_DEVEL which is a temporary flag to enable PTRACE_SEIZE.
PTRACE_SEIZE will change ptrace behaviors outside of attach itself.
The changes will be implemented gradually and the DEVEL flag is to
prevent programs which expect full SEIZE behavior from using it before
all the behavior modifications are complete while allowing unit
testing. The flag will be removed once SEIZE behaviors are completely
implemented.
* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap. After
attaching tracee continues to run unless a trap condition occurs.
* PTRACE_SEIZE doesn't affect signal or group stop state.
* If PTRACE_SEIZE'd, group stop uses PTRACE_EVENT_STOP trap which uses
exit_code of (signr | PTRACE_EVENT_STOP << 8) where signr is one of
the stopping signals if group stop is in effect or SIGTRAP
otherwise, and returns usual trap siginfo on PTRACE_GETSIGINFO
instead of NULL.
Seizing sets PT_SEIZED in ->ptrace of the tracee. This flag will be
used to determine whether new SEIZE behaviors should be enabled.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive\n");
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
return 0;
}
When the above program is called w/o argument, tracee is seized while
running and remains running. When tracer exits, tracee continues to
run and print out messages.
# ./test-seize-simple
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
tracee: alive
tracee: alive
When called with an argument, tracee is seized from stopped state and
continued, and returns to stopped state when tracer exits.
# ./test-seize
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
# ps -el|grep test-seize
1 T 0 4720 1 0 80 0 - 941 signal ttyS0 00:00:00 test-seize
-v2: SEIZE doesn't schedule TRAP_STOP and leaves tracee running as Jan
suggested.
-v3: PTRACE_EVENT_STOP traps now report group stop state by signr. If
group stop is in effect the stop signal number is returned as
part of exit_code; otherwise, SIGTRAP. This was suggested by
Denys and Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:15 +04:00
2007-03-20 20:58:35 +03:00
audit_ptrace ( task ) ;
2005-04-17 02:20:36 +04:00
retval = - EPERM ;
2009-06-18 03:27:31 +04:00
if ( unlikely ( task - > flags & PF_KTHREAD ) )
goto out ;
2007-10-19 10:40:18 +04:00
if ( same_thread_group ( task , current ) )
2006-05-07 21:49:33 +04:00
goto out ;
2009-06-18 03:27:32 +04:00
/*
* Protect exec ' s credential calculations against our interference ;
2012-03-24 02:02:41 +04:00
* SUID , SGID and LSM creds get determined differently
2009-05-08 16:55:22 +04:00
* under ptrace .
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 02:39:23 +03:00
*/
2009-07-05 23:08:26 +04:00
retval = - ERESTARTNOINTR ;
2010-10-28 02:34:08 +04:00
if ( mutex_lock_interruptible ( & task - > signal - > cred_guard_mutex ) )
CRED: Inaugurate COW credentials
Inaugurate copy-on-write credentials management. This uses RCU to manage the
credentials pointer in the task_struct with respect to accesses by other tasks.
A process may only modify its own credentials, and so does not need locking to
access or modify its own credentials.
A mutex (cred_replace_mutex) is added to the task_struct to control the effect
of PTRACE_ATTACHED on credential calculations, particularly with respect to
execve().
With this patch, the contents of an active credentials struct may not be
changed directly; rather a new set of credentials must be prepared, modified
and committed using something like the following sequence of events:
struct cred *new = prepare_creds();
int ret = blah(new);
if (ret < 0) {
abort_creds(new);
return ret;
}
return commit_creds(new);
There are some exceptions to this rule: the keyrings pointed to by the active
credentials may be instantiated - keyrings violate the COW rule as managing
COW keyrings is tricky, given that it is possible for a task to directly alter
the keys in a keyring in use by another task.
To help enforce this, various pointers to sets of credentials, such as those in
the task_struct, are declared const. The purpose of this is compile-time
discouragement of altering credentials through those pointers. Once a set of
credentials has been made public through one of these pointers, it may not be
modified, except under special circumstances:
(1) Its reference count may incremented and decremented.
(2) The keyrings to which it points may be modified, but not replaced.
The only safe way to modify anything else is to create a replacement and commit
using the functions described in Documentation/credentials.txt (which will be
added by a later patch).
This patch and the preceding patches have been tested with the LTP SELinux
testsuite.
This patch makes several logical sets of alteration:
(1) execve().
This now prepares and commits credentials in various places in the
security code rather than altering the current creds directly.
(2) Temporary credential overrides.
do_coredump() and sys_faccessat() now prepare their own credentials and
temporarily override the ones currently on the acting thread, whilst
preventing interference from other threads by holding cred_replace_mutex
on the thread being dumped.
This will be replaced in a future patch by something that hands down the
credentials directly to the functions being called, rather than altering
the task's objective credentials.
(3) LSM interface.
A number of functions have been changed, added or removed:
(*) security_capset_check(), ->capset_check()
(*) security_capset_set(), ->capset_set()
Removed in favour of security_capset().
(*) security_capset(), ->capset()
New. This is passed a pointer to the new creds, a pointer to the old
creds and the proposed capability sets. It should fill in the new
creds or return an error. All pointers, barring the pointer to the
new creds, are now const.
(*) security_bprm_apply_creds(), ->bprm_apply_creds()
Changed; now returns a value, which will cause the process to be
killed if it's an error.
(*) security_task_alloc(), ->task_alloc_security()
Removed in favour of security_prepare_creds().
(*) security_cred_free(), ->cred_free()
New. Free security data attached to cred->security.
(*) security_prepare_creds(), ->cred_prepare()
New. Duplicate any security data attached to cred->security.
(*) security_commit_creds(), ->cred_commit()
New. Apply any security effects for the upcoming installation of new
security by commit_creds().
(*) security_task_post_setuid(), ->task_post_setuid()
Removed in favour of security_task_fix_setuid().
(*) security_task_fix_setuid(), ->task_fix_setuid()
Fix up the proposed new credentials for setuid(). This is used by
cap_set_fix_setuid() to implicitly adjust capabilities in line with
setuid() changes. Changes are made to the new credentials, rather
than the task itself as in security_task_post_setuid().
(*) security_task_reparent_to_init(), ->task_reparent_to_init()
Removed. Instead the task being reparented to init is referred
directly to init's credentials.
NOTE! This results in the loss of some state: SELinux's osid no
longer records the sid of the thread that forked it.
(*) security_key_alloc(), ->key_alloc()
(*) security_key_permission(), ->key_permission()
Changed. These now take cred pointers rather than task pointers to
refer to the security context.
(4) sys_capset().
This has been simplified and uses less locking. The LSM functions it
calls have been merged.
(5) reparent_to_kthreadd().
This gives the current thread the same credentials as init by simply using
commit_thread() to point that way.
(6) __sigqueue_alloc() and switch_uid()
__sigqueue_alloc() can't stop the target task from changing its creds
beneath it, so this function gets a reference to the currently applicable
user_struct which it then passes into the sigqueue struct it returns if
successful.
switch_uid() is now called from commit_creds(), and possibly should be
folded into that. commit_creds() should take care of protecting
__sigqueue_alloc().
(7) [sg]et[ug]id() and co and [sg]et_current_groups.
The set functions now all use prepare_creds(), commit_creds() and
abort_creds() to build and check a new set of credentials before applying
it.
security_task_set[ug]id() is called inside the prepared section. This
guarantees that nothing else will affect the creds until we've finished.
The calling of set_dumpable() has been moved into commit_creds().
Much of the functionality of set_user() has been moved into
commit_creds().
The get functions all simply access the data directly.
(8) security_task_prctl() and cap_task_prctl().
security_task_prctl() has been modified to return -ENOSYS if it doesn't
want to handle a function, or otherwise return the return value directly
rather than through an argument.
Additionally, cap_task_prctl() now prepares a new set of credentials, even
if it doesn't end up using it.
(9) Keyrings.
A number of changes have been made to the keyrings code:
(a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have
all been dropped and built in to the credentials functions directly.
They may want separating out again later.
(b) key_alloc() and search_process_keyrings() now take a cred pointer
rather than a task pointer to specify the security context.
(c) copy_creds() gives a new thread within the same thread group a new
thread keyring if its parent had one, otherwise it discards the thread
keyring.
(d) The authorisation key now points directly to the credentials to extend
the search into rather pointing to the task that carries them.
(e) Installing thread, process or session keyrings causes a new set of
credentials to be created, even though it's not strictly necessary for
process or session keyrings (they're shared).
(10) Usermode helper.
The usermode helper code now carries a cred struct pointer in its
subprocess_info struct instead of a new session keyring pointer. This set
of credentials is derived from init_cred and installed on the new process
after it has been cloned.
call_usermodehelper_setup() allocates the new credentials and
call_usermodehelper_freeinfo() discards them if they haven't been used. A
special cred function (prepare_usermodeinfo_creds()) is provided
specifically for call_usermodehelper_setup() to call.
call_usermodehelper_setkeys() adjusts the credentials to sport the
supplied keyring as the new session keyring.
(11) SELinux.
SELinux has a number of changes, in addition to those to support the LSM
interface changes mentioned above:
(a) selinux_setprocattr() no longer does its check for whether the
current ptracer can access processes with the new SID inside the lock
that covers getting the ptracer's SID. Whilst this lock ensures that
the check is done with the ptracer pinned, the result is only valid
until the lock is released, so there's no point doing it inside the
lock.
(12) is_single_threaded().
This function has been extracted from selinux_setprocattr() and put into
a file of its own in the lib/ directory as join_session_keyring() now
wants to use it too.
The code in SELinux just checked to see whether a task shared mm_structs
with other tasks (CLONE_VM), but that isn't good enough. We really want
to know if they're part of the same thread group (CLONE_THREAD).
(13) nfsd.
The NFS server daemon now has to use the COW credentials to set the
credentials it is going to use. It really needs to pass the credentials
down to the functions it calls, but it can't do that until other patches
in this series have been applied.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: James Morris <jmorris@namei.org>
2008-11-14 02:39:23 +03:00
goto out ;
2006-05-07 21:49:33 +04:00
2009-06-18 03:27:33 +04:00
task_lock ( task ) ;
ptrace: use fsuid, fsgid, effective creds for fs access checks
By checking the effective credentials instead of the real UID / permitted
capabilities, ensure that the calling process actually intended to use its
credentials.
To ensure that all ptrace checks use the correct caller credentials (e.g.
in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS
flag), use two new flags and require one of them to be set.
The problem was that when a privileged task had temporarily dropped its
privileges, e.g. by calling setreuid(0, user_uid), with the intent to
perform following syscalls with the credentials of a user, it still passed
ptrace access checks that the user would not be able to pass.
While an attacker should not be able to convince the privileged task to
perform a ptrace() syscall, this is a problem because the ptrace access
check is reused for things in procfs.
In particular, the following somewhat interesting procfs entries only rely
on ptrace access checks:
/proc/$pid/stat - uses the check for determining whether pointers
should be visible, useful for bypassing ASLR
/proc/$pid/maps - also useful for bypassing ASLR
/proc/$pid/cwd - useful for gaining access to restricted
directories that contain files with lax permissions, e.g. in
this scenario:
lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar
drwx------ root root /root
drwxr-xr-x root root /root/foobar
-rw-r--r-- root root /root/foobar/secret
Therefore, on a system where a root-owned mode 6755 binary changes its
effective credentials as described and then dumps a user-specified file,
this could be used by an attacker to reveal the memory layout of root's
processes or reveal the contents of files he is not allowed to access
(through /proc/$pid/cwd).
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: "Serge E. Hallyn" <serge.hallyn@ubuntu.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-21 02:00:04 +03:00
retval = __ptrace_may_access ( task , PTRACE_MODE_ATTACH_REALCREDS ) ;
2009-06-18 03:27:33 +04:00
task_unlock ( task ) ;
2005-04-17 02:20:36 +04:00
if ( retval )
2009-06-18 03:27:33 +04:00
goto unlock_creds ;
2005-04-17 02:20:36 +04:00
2009-06-18 03:27:33 +04:00
write_lock_irq ( & tasklist_lock ) ;
2009-06-18 03:27:31 +04:00
retval = - EPERM ;
if ( unlikely ( task - > exit_state ) )
2009-06-18 03:27:33 +04:00
goto unlock_tasklist ;
2009-06-18 03:27:32 +04:00
if ( task - > ptrace )
2009-06-18 03:27:33 +04:00
goto unlock_tasklist ;
2009-06-18 03:27:31 +04:00
ptrace: implement PTRACE_SEIZE
PTRACE_ATTACH implicitly issues SIGSTOP on attach which has side
effects on tracee signal and job control states. This patch
implements a new ptrace request PTRACE_SEIZE which attaches a tracee
without trapping it or affecting its signal and job control states.
The usage is the same with PTRACE_ATTACH but it takes PTRACE_SEIZE_*
flags in @data. Currently, the only defined flag is
PTRACE_SEIZE_DEVEL which is a temporary flag to enable PTRACE_SEIZE.
PTRACE_SEIZE will change ptrace behaviors outside of attach itself.
The changes will be implemented gradually and the DEVEL flag is to
prevent programs which expect full SEIZE behavior from using it before
all the behavior modifications are complete while allowing unit
testing. The flag will be removed once SEIZE behaviors are completely
implemented.
* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap. After
attaching tracee continues to run unless a trap condition occurs.
* PTRACE_SEIZE doesn't affect signal or group stop state.
* If PTRACE_SEIZE'd, group stop uses PTRACE_EVENT_STOP trap which uses
exit_code of (signr | PTRACE_EVENT_STOP << 8) where signr is one of
the stopping signals if group stop is in effect or SIGTRAP
otherwise, and returns usual trap siginfo on PTRACE_GETSIGINFO
instead of NULL.
Seizing sets PT_SEIZED in ->ptrace of the tracee. This flag will be
used to determine whether new SEIZE behaviors should be enabled.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive\n");
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
return 0;
}
When the above program is called w/o argument, tracee is seized while
running and remains running. When tracer exits, tracee continues to
run and print out messages.
# ./test-seize-simple
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
tracee: alive
tracee: alive
When called with an argument, tracee is seized from stopped state and
continued, and returns to stopped state when tracer exits.
# ./test-seize
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
# ps -el|grep test-seize
1 T 0 4720 1 0 80 0 - 941 signal ttyS0 00:00:00 test-seize
-v2: SEIZE doesn't schedule TRAP_STOP and leaves tracee running as Jan
suggested.
-v3: PTRACE_EVENT_STOP traps now report group stop state by signr. If
group stop is in effect the stop signal number is returned as
part of exit_code; otherwise, SIGTRAP. This was suggested by
Denys and Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:15 +04:00
if ( seize )
2012-03-24 02:02:42 +04:00
flags | = PT_SEIZED ;
task - > ptrace = flags ;
2005-04-17 02:20:36 +04:00
2017-05-22 23:40:12 +03:00
ptrace_link ( task , current ) ;
ptrace: implement PTRACE_SEIZE
PTRACE_ATTACH implicitly issues SIGSTOP on attach which has side
effects on tracee signal and job control states. This patch
implements a new ptrace request PTRACE_SEIZE which attaches a tracee
without trapping it or affecting its signal and job control states.
The usage is the same with PTRACE_ATTACH but it takes PTRACE_SEIZE_*
flags in @data. Currently, the only defined flag is
PTRACE_SEIZE_DEVEL which is a temporary flag to enable PTRACE_SEIZE.
PTRACE_SEIZE will change ptrace behaviors outside of attach itself.
The changes will be implemented gradually and the DEVEL flag is to
prevent programs which expect full SEIZE behavior from using it before
all the behavior modifications are complete while allowing unit
testing. The flag will be removed once SEIZE behaviors are completely
implemented.
* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap. After
attaching tracee continues to run unless a trap condition occurs.
* PTRACE_SEIZE doesn't affect signal or group stop state.
* If PTRACE_SEIZE'd, group stop uses PTRACE_EVENT_STOP trap which uses
exit_code of (signr | PTRACE_EVENT_STOP << 8) where signr is one of
the stopping signals if group stop is in effect or SIGTRAP
otherwise, and returns usual trap siginfo on PTRACE_GETSIGINFO
instead of NULL.
Seizing sets PT_SEIZED in ->ptrace of the tracee. This flag will be
used to determine whether new SEIZE behaviors should be enabled.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive\n");
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
return 0;
}
When the above program is called w/o argument, tracee is seized while
running and remains running. When tracer exits, tracee continues to
run and print out messages.
# ./test-seize-simple
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
tracee: alive
tracee: alive
When called with an argument, tracee is seized from stopped state and
continued, and returns to stopped state when tracer exits.
# ./test-seize
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
# ps -el|grep test-seize
1 T 0 4720 1 0 80 0 - 941 signal ttyS0 00:00:00 test-seize
-v2: SEIZE doesn't schedule TRAP_STOP and leaves tracee running as Jan
suggested.
-v3: PTRACE_EVENT_STOP traps now report group stop state by signr. If
group stop is in effect the stop signal number is returned as
part of exit_code; otherwise, SIGTRAP. This was suggested by
Denys and Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:15 +04:00
/* SEIZE doesn't trap tracee on attach */
if ( ! seize )
2018-09-03 11:32:52 +03:00
send_sig_info ( SIGSTOP , SEND_SIG_PRIV , task ) ;
2009-06-18 03:27:31 +04:00
2011-03-23 12:37:00 +03:00
spin_lock ( & task - > sighand - > siglock ) ;
/*
2011-06-14 13:20:14 +04:00
* If the task is already STOPPED , set JOBCTL_TRAP_STOP and
2011-03-23 12:37:00 +03:00
* TRAPPING , and kick it so that it transits to TRACED . TRAPPING
* will be cleared if the child completes the transition or any
* event which clears the group stop states happens . We ' ll wait
* for the transition to complete before returning from this
* function .
*
* This hides STOPPED - > RUNNING - > TRACED transition from the
* attaching thread but a different thread in the same group can
* still observe the transient RUNNING state . IOW , if another
* thread ' s WNOHANG wait ( 2 ) on the stopped tracee races against
* ATTACH , the wait ( 2 ) may fail due to the transient RUNNING .
*
* The following task_is_stopped ( ) test is safe as both transitions
* in and out of STOPPED are protected by siglock .
*/
2011-06-02 13:14:00 +04:00
if ( task_is_stopped ( task ) & &
2011-06-14 13:20:14 +04:00
task_set_jobctl_pending ( task , JOBCTL_TRAP_STOP | JOBCTL_TRAPPING ) )
2013-01-21 23:47:41 +04:00
signal_wake_up_state ( task , __TASK_STOPPED ) ;
2011-03-23 12:37:00 +03:00
spin_unlock ( & task - > sighand - > siglock ) ;
2009-06-18 03:27:31 +04:00
retval = 0 ;
2009-06-18 03:27:33 +04:00
unlock_tasklist :
write_unlock_irq ( & tasklist_lock ) ;
unlock_creds :
2010-10-28 02:34:08 +04:00
mutex_unlock ( & task - > signal - > cred_guard_mutex ) ;
2006-05-07 21:49:33 +04:00
out :
2011-07-15 21:45:18 +04:00
if ( ! retval ) {
2016-01-21 01:59:55 +03:00
/*
* We do not bother to change retval or clear JOBCTL_TRAPPING
* if wait_on_bit ( ) was interrupted by SIGKILL . The tracer will
* not return to user - mode , it will exit and clear this bit in
* __ptrace_unlink ( ) if it wasn ' t already cleared by the tracee ;
* and until then nobody can ptrace this task .
*/
wait_on_bit ( & task - > jobctl , JOBCTL_TRAPPING_BIT , TASK_KILLABLE ) ;
2011-07-15 21:45:18 +04:00
proc_ptrace_connector ( task , PTRACE_ATTACH ) ;
}
2005-04-17 02:20:36 +04:00
return retval ;
}
2009-06-18 03:27:32 +04:00
/**
* ptrace_traceme - - helper for PTRACE_TRACEME
*
* Performs checks and sets PT_PTRACED .
* Should be used by all ptrace implementations for PTRACE_TRACEME .
*/
2011-03-04 20:23:30 +03:00
static int ptrace_traceme ( void )
2009-06-18 03:27:32 +04:00
{
int ret = - EPERM ;
2009-06-18 03:27:33 +04:00
write_lock_irq ( & tasklist_lock ) ;
/* Are we already being traced? */
2009-06-18 03:27:32 +04:00
if ( ! current - > ptrace ) {
ret = security_ptrace_traceme ( current - > parent ) ;
/*
* Check PF_EXITING to ensure - > real_parent has not passed
* exit_ptrace ( ) . Otherwise we don ' t report the error but
* pretend - > real_parent untraces us right after return .
*/
if ( ! ret & & ! ( current - > real_parent - > flags & PF_EXITING ) ) {
current - > ptrace = PT_PTRACED ;
2017-05-22 23:40:12 +03:00
ptrace_link ( current , current - > real_parent ) ;
2009-06-18 03:27:32 +04:00
}
}
2009-06-18 03:27:33 +04:00
write_unlock_irq ( & tasklist_lock ) ;
2009-06-18 03:27:32 +04:00
return ret ;
}
2009-04-03 03:58:18 +04:00
/*
* Called with irqs disabled , returns true if childs should reap themselves .
*/
static int ignoring_children ( struct sighand_struct * sigh )
{
int ret ;
spin_lock ( & sigh - > siglock ) ;
ret = ( sigh - > action [ SIGCHLD - 1 ] . sa . sa_handler = = SIG_IGN ) | |
( sigh - > action [ SIGCHLD - 1 ] . sa . sa_flags & SA_NOCLDWAIT ) ;
spin_unlock ( & sigh - > siglock ) ;
return ret ;
}
/*
* Called with tasklist_lock held for writing .
* Unlink a traced task , and clean it up if it was a traced zombie .
* Return true if it needs to be reaped with release_task ( ) .
* ( We can ' t call release_task ( ) here because we already hold tasklist_lock . )
*
* If it ' s a zombie , our attachedness prevented normal parent notification
* or self - reaping . Do notification now if it would have happened earlier .
* If it should reap itself , return true .
*
ptrace: __ptrace_detach: do __wake_up_parent() if we reap the tracee
The bug is old, it wasn't cause by recent changes.
Test case:
static void *tfunc(void *arg)
{
int pid = (long)arg;
assert(ptrace(PTRACE_ATTACH, pid, NULL, NULL) == 0);
kill(pid, SIGKILL);
sleep(1);
return NULL;
}
int main(void)
{
pthread_t th;
long pid = fork();
if (!pid)
pause();
signal(SIGCHLD, SIG_IGN);
assert(pthread_create(&th, NULL, tfunc, (void*)pid) == 0);
int r = waitpid(-1, NULL, __WNOTHREAD);
printf("waitpid: %d %m\n", r);
return 0;
}
Before the patch this program hangs, after this patch waitpid() correctly
fails with errno == -ECHILD.
The problem is, __ptrace_detach() reaps the EXIT_ZOMBIE tracee if its
->real_parent is our sub-thread and we ignore SIGCHLD. But in this case
we should wake up other threads which can sleep in do_wait().
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Vitaly Mayatskikh <vmayatsk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-09-24 02:56:44 +04:00
* If it ' s our own child , there is no notification to do . But if our normal
* children self - reap , then this child was prevented by ptrace and we must
* reap it now , in that case we must also wake up sub - threads sleeping in
* do_wait ( ) .
2009-04-03 03:58:18 +04:00
*/
static bool __ptrace_detach ( struct task_struct * tracer , struct task_struct * p )
{
2011-06-23 01:08:53 +04:00
bool dead ;
2009-04-03 03:58:18 +04:00
__ptrace_unlink ( p ) ;
2011-06-23 01:08:53 +04:00
if ( p - > exit_state ! = EXIT_ZOMBIE )
return false ;
dead = ! thread_group_leader ( p ) ;
if ( ! dead & & thread_group_empty ( p ) ) {
if ( ! same_thread_group ( p - > real_parent , tracer ) )
dead = do_notify_parent ( p , p - > exit_signal ) ;
else if ( ignoring_children ( tracer - > sighand ) ) {
__wake_up_parent ( p , tracer ) ;
dead = true ;
2009-04-03 03:58:18 +04:00
}
}
2011-06-23 01:08:53 +04:00
/* Mark it as in the process of being reaped. */
if ( dead )
p - > exit_state = EXIT_DEAD ;
return dead ;
2009-04-03 03:58:18 +04:00
}
2011-03-04 20:23:30 +03:00
static int ptrace_detach ( struct task_struct * child , unsigned int data )
2005-04-17 02:20:36 +04:00
{
2005-05-01 19:59:14 +04:00
if ( ! valid_signal ( data ) )
2006-02-15 22:50:10 +03:00
return - EIO ;
2005-04-17 02:20:36 +04:00
/* Architecture-specific hardware disable .. */
ptrace_disable ( child ) ;
2009-04-03 03:58:11 +04:00
write_lock_irq ( & tasklist_lock ) ;
2009-04-03 03:58:18 +04:00
/*
2015-04-16 22:47:32 +03:00
* We rely on ptrace_freeze_traced ( ) . It can ' t be killed and
* untraced by another thread , it can ' t be a zombie .
2009-04-03 03:58:18 +04:00
*/
2015-04-16 22:47:32 +03:00
WARN_ON ( ! child - > ptrace | | child - > exit_state ) ;
/*
* tasklist_lock avoids the race with wait_task_stopped ( ) , see
* the comment in ptrace_resume ( ) .
*/
child - > exit_code = data ;
__ptrace_detach ( current , child ) ;
2005-04-17 02:20:36 +04:00
write_unlock_irq ( & tasklist_lock ) ;
2011-07-15 21:45:18 +04:00
proc_ptrace_connector ( child , PTRACE_DETACH ) ;
2009-04-03 03:58:14 +04:00
2005-04-17 02:20:36 +04:00
return 0 ;
}
2009-04-03 03:58:18 +04:00
/*
2010-08-11 05:03:07 +04:00
* Detach all tasks we were using ptrace on . Called with tasklist held
2014-12-11 02:45:33 +03:00
* for writing .
2009-04-03 03:58:18 +04:00
*/
2014-12-11 02:45:33 +03:00
void exit_ptrace ( struct task_struct * tracer , struct list_head * dead )
2009-04-03 03:58:18 +04:00
{
struct task_struct * p , * n ;
2010-08-11 05:03:07 +04:00
2009-04-03 03:58:18 +04:00
list_for_each_entry_safe ( p , n , & tracer - > ptraced , ptrace_entry ) {
2012-12-18 04:03:07 +04:00
if ( unlikely ( p - > ptrace & PT_EXITKILL ) )
2018-09-03 11:32:52 +03:00
send_sig_info ( SIGKILL , SEND_SIG_PRIV , p ) ;
2012-12-18 04:03:07 +04:00
2009-04-03 03:58:18 +04:00
if ( __ptrace_detach ( tracer , p ) )
2014-12-11 02:45:33 +03:00
list_add ( & p - > ptrace_entry , dead ) ;
2009-04-03 03:58:18 +04:00
}
}
2005-04-17 02:20:36 +04:00
int ptrace_readdata ( struct task_struct * tsk , unsigned long src , char __user * dst , int len )
{
int copied = 0 ;
while ( len > 0 ) {
char buf [ 128 ] ;
int this_len , retval ;
this_len = ( len > sizeof ( buf ) ) ? sizeof ( buf ) : len ;
2016-11-22 21:06:50 +03:00
retval = ptrace_access_vm ( tsk , src , buf , this_len , FOLL_FORCE ) ;
2005-04-17 02:20:36 +04:00
if ( ! retval ) {
if ( copied )
break ;
return - EIO ;
}
if ( copy_to_user ( dst , buf , retval ) )
return - EFAULT ;
copied + = retval ;
src + = retval ;
dst + = retval ;
2009-04-08 10:21:06 +04:00
len - = retval ;
2005-04-17 02:20:36 +04:00
}
return copied ;
}
int ptrace_writedata ( struct task_struct * tsk , char __user * src , unsigned long dst , int len )
{
int copied = 0 ;
while ( len > 0 ) {
char buf [ 128 ] ;
int this_len , retval ;
this_len = ( len > sizeof ( buf ) ) ? sizeof ( buf ) : len ;
if ( copy_from_user ( buf , src , this_len ) )
return - EFAULT ;
2016-11-22 21:06:50 +03:00
retval = ptrace_access_vm ( tsk , dst , buf , this_len ,
2016-10-13 03:20:20 +03:00
FOLL_FORCE | FOLL_WRITE ) ;
2005-04-17 02:20:36 +04:00
if ( ! retval ) {
if ( copied )
break ;
return - EIO ;
}
copied + = retval ;
src + = retval ;
dst + = retval ;
2009-04-08 10:21:06 +04:00
len - = retval ;
2005-04-17 02:20:36 +04:00
}
return copied ;
}
2010-10-28 02:33:45 +04:00
static int ptrace_setoptions ( struct task_struct * child , unsigned long data )
2005-04-17 02:20:36 +04:00
{
2012-03-24 02:02:41 +04:00
unsigned flags ;
ptrace: don't modify flags on PTRACE_SETOPTIONS failure
On ptrace(PTRACE_SETOPTIONS, pid, 0, <opts>), we used to set those
option bits which are known, and then fail with -EINVAL if there are
some unknown bits in <opts>.
This is inconsistent with typical error handling, which does not change
any state if input is invalid.
This patch changes PTRACE_SETOPTIONS behavior so that in this case, we
return -EINVAL and don't change any bits in task->ptrace.
It's very unlikely that there is userspace code in the wild which will
be affected by this change: it should have the form
ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_BOGUSOPT)
where PTRACE_O_BOGUSOPT is a constant unknown to the kernel. But kernel
headers, naturally, don't contain any PTRACE_O_BOGUSOPTs, thus the only
way userspace can use one if it defines one itself. I can't see why
anyone would do such a thing deliberately.
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Pedro Alves <palves@redhat.com>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-03-24 02:02:40 +04:00
if ( data & ~ ( unsigned long ) PTRACE_O_MASK )
return - EINVAL ;
seccomp: add ptrace options for suspend/resume
This patch is the first step in enabling checkpoint/restore of processes
with seccomp enabled.
One of the things CRIU does while dumping tasks is inject code into them
via ptrace to collect information that is only available to the process
itself. However, if we are in a seccomp mode where these processes are
prohibited from making these syscalls, then what CRIU does kills the task.
This patch adds a new ptrace option, PTRACE_O_SUSPEND_SECCOMP, that enables
a task from the init user namespace which has CAP_SYS_ADMIN and no seccomp
filters to disable (and re-enable) seccomp filters for another task so that
they can be successfully dumped (and restored). We restrict the set of
processes that can disable seccomp through ptrace because although today
ptrace can be used to bypass seccomp, there is some discussion of closing
this loophole in the future and we would like this patch to not depend on
that behavior and be future proofed for when it is removed.
Note that seccomp can be suspended before any filters are actually
installed; this behavior is useful on criu restore, so that we can suspend
seccomp, restore the filters, unmap our restore code from the restored
process' address space, and then resume the task by detaching and have the
filters resumed as well.
v2 changes:
* require that the tracer have no seccomp filters installed
* drop TIF_NOTSC manipulation from the patch
* change from ptrace command to a ptrace option and use this ptrace option
as the flag to check. This means that as soon as the tracer
detaches/dies, seccomp is re-enabled and as a corrollary that one can not
disable seccomp across PTRACE_ATTACHs.
v3 changes:
* get rid of various #ifdefs everywhere
* report more sensible errors when PTRACE_O_SUSPEND_SECCOMP is incorrectly
used
v4 changes:
* get rid of may_suspend_seccomp() in favor of a capable() check in ptrace
directly
v5 changes:
* check that seccomp is not enabled (or suspended) on the tracer
Signed-off-by: Tycho Andersen <tycho.andersen@canonical.com>
CC: Will Drewry <wad@chromium.org>
CC: Roland McGrath <roland@hack.frob.com>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
[kees: access seccomp.mode through seccomp_mode() instead]
Signed-off-by: Kees Cook <keescook@chromium.org>
2015-06-13 18:02:48 +03:00
if ( unlikely ( data & PTRACE_O_SUSPEND_SECCOMP ) ) {
2016-08-03 23:45:50 +03:00
if ( ! IS_ENABLED ( CONFIG_CHECKPOINT_RESTORE ) | |
! IS_ENABLED ( CONFIG_SECCOMP ) )
seccomp: add ptrace options for suspend/resume
This patch is the first step in enabling checkpoint/restore of processes
with seccomp enabled.
One of the things CRIU does while dumping tasks is inject code into them
via ptrace to collect information that is only available to the process
itself. However, if we are in a seccomp mode where these processes are
prohibited from making these syscalls, then what CRIU does kills the task.
This patch adds a new ptrace option, PTRACE_O_SUSPEND_SECCOMP, that enables
a task from the init user namespace which has CAP_SYS_ADMIN and no seccomp
filters to disable (and re-enable) seccomp filters for another task so that
they can be successfully dumped (and restored). We restrict the set of
processes that can disable seccomp through ptrace because although today
ptrace can be used to bypass seccomp, there is some discussion of closing
this loophole in the future and we would like this patch to not depend on
that behavior and be future proofed for when it is removed.
Note that seccomp can be suspended before any filters are actually
installed; this behavior is useful on criu restore, so that we can suspend
seccomp, restore the filters, unmap our restore code from the restored
process' address space, and then resume the task by detaching and have the
filters resumed as well.
v2 changes:
* require that the tracer have no seccomp filters installed
* drop TIF_NOTSC manipulation from the patch
* change from ptrace command to a ptrace option and use this ptrace option
as the flag to check. This means that as soon as the tracer
detaches/dies, seccomp is re-enabled and as a corrollary that one can not
disable seccomp across PTRACE_ATTACHs.
v3 changes:
* get rid of various #ifdefs everywhere
* report more sensible errors when PTRACE_O_SUSPEND_SECCOMP is incorrectly
used
v4 changes:
* get rid of may_suspend_seccomp() in favor of a capable() check in ptrace
directly
v5 changes:
* check that seccomp is not enabled (or suspended) on the tracer
Signed-off-by: Tycho Andersen <tycho.andersen@canonical.com>
CC: Will Drewry <wad@chromium.org>
CC: Roland McGrath <roland@hack.frob.com>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: Serge E. Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
[kees: access seccomp.mode through seccomp_mode() instead]
Signed-off-by: Kees Cook <keescook@chromium.org>
2015-06-13 18:02:48 +03:00
return - EINVAL ;
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EPERM ;
if ( seccomp_mode ( & current - > seccomp ) ! = SECCOMP_MODE_DISABLED | |
current - > ptrace & PT_SUSPEND_SECCOMP )
return - EPERM ;
}
2012-03-24 02:02:41 +04:00
/* Avoid intermediate state when all opts are cleared */
flags = child - > ptrace ;
flags & = ~ ( PTRACE_O_MASK < < PT_OPT_FLAG_SHIFT ) ;
flags | = ( data < < PT_OPT_FLAG_SHIFT ) ;
child - > ptrace = flags ;
2005-04-17 02:20:36 +04:00
ptrace: don't modify flags on PTRACE_SETOPTIONS failure
On ptrace(PTRACE_SETOPTIONS, pid, 0, <opts>), we used to set those
option bits which are known, and then fail with -EINVAL if there are
some unknown bits in <opts>.
This is inconsistent with typical error handling, which does not change
any state if input is invalid.
This patch changes PTRACE_SETOPTIONS behavior so that in this case, we
return -EINVAL and don't change any bits in task->ptrace.
It's very unlikely that there is userspace code in the wild which will
be affected by this change: it should have the form
ptrace(PTRACE_SETOPTIONS, pid, 0, PTRACE_O_BOGUSOPT)
where PTRACE_O_BOGUSOPT is a constant unknown to the kernel. But kernel
headers, naturally, don't contain any PTRACE_O_BOGUSOPTs, thus the only
way userspace can use one if it defines one itself. I can't see why
anyone would do such a thing deliberately.
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Pedro Alves <palves@redhat.com>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-03-24 02:02:40 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2018-09-25 12:27:20 +03:00
static int ptrace_getsiginfo ( struct task_struct * child , kernel_siginfo_t * info )
2005-04-17 02:20:36 +04:00
{
2009-06-18 03:27:36 +04:00
unsigned long flags ;
2005-04-17 02:20:36 +04:00
int error = - ESRCH ;
2009-06-18 03:27:36 +04:00
if ( lock_task_sighand ( child , & flags ) ) {
2005-04-17 02:20:36 +04:00
error = - EINVAL ;
if ( likely ( child - > last_siginfo ! = NULL ) ) {
2017-07-24 23:08:16 +03:00
copy_siginfo ( info , child - > last_siginfo ) ;
2005-04-17 02:20:36 +04:00
error = 0 ;
}
2009-06-18 03:27:36 +04:00
unlock_task_sighand ( child , & flags ) ;
2005-04-17 02:20:36 +04:00
}
return error ;
}
2018-09-25 12:27:20 +03:00
static int ptrace_setsiginfo ( struct task_struct * child , const kernel_siginfo_t * info )
2005-04-17 02:20:36 +04:00
{
2009-06-18 03:27:36 +04:00
unsigned long flags ;
2005-04-17 02:20:36 +04:00
int error = - ESRCH ;
2009-06-18 03:27:36 +04:00
if ( lock_task_sighand ( child , & flags ) ) {
2005-04-17 02:20:36 +04:00
error = - EINVAL ;
if ( likely ( child - > last_siginfo ! = NULL ) ) {
2017-07-24 23:08:16 +03:00
copy_siginfo ( child - > last_siginfo , info ) ;
2005-04-17 02:20:36 +04:00
error = 0 ;
}
2009-06-18 03:27:36 +04:00
unlock_task_sighand ( child , & flags ) ;
2005-04-17 02:20:36 +04:00
}
return error ;
}
2013-05-01 02:27:59 +04:00
static int ptrace_peek_siginfo ( struct task_struct * child ,
unsigned long addr ,
unsigned long data )
{
struct ptrace_peeksiginfo_args arg ;
struct sigpending * pending ;
struct sigqueue * q ;
int ret , i ;
ret = copy_from_user ( & arg , ( void __user * ) addr ,
sizeof ( struct ptrace_peeksiginfo_args ) ) ;
if ( ret )
return - EFAULT ;
if ( arg . flags & ~ PTRACE_PEEKSIGINFO_SHARED )
return - EINVAL ; /* unknown flags */
if ( arg . nr < 0 )
return - EINVAL ;
2019-05-29 02:46:37 +03:00
/* Ensure arg.off fits in an unsigned long */
if ( arg . off > ULONG_MAX )
return 0 ;
2013-05-01 02:27:59 +04:00
if ( arg . flags & PTRACE_PEEKSIGINFO_SHARED )
pending = & child - > signal - > shared_pending ;
else
pending = & child - > pending ;
for ( i = 0 ; i < arg . nr ; ) {
2018-09-25 12:27:20 +03:00
kernel_siginfo_t info ;
2019-05-29 02:46:37 +03:00
unsigned long off = arg . off + i ;
bool found = false ;
2013-05-01 02:27:59 +04:00
spin_lock_irq ( & child - > sighand - > siglock ) ;
list_for_each_entry ( q , & pending - > list , list ) {
if ( ! off - - ) {
2019-05-29 02:46:37 +03:00
found = true ;
2013-05-01 02:27:59 +04:00
copy_siginfo ( & info , & q - > info ) ;
break ;
}
}
spin_unlock_irq ( & child - > sighand - > siglock ) ;
2019-05-29 02:46:37 +03:00
if ( ! found ) /* beyond the end of the list */
2013-05-01 02:27:59 +04:00
break ;
# ifdef CONFIG_COMPAT
2016-03-23 00:24:55 +03:00
if ( unlikely ( in_compat_syscall ( ) ) ) {
2013-05-01 02:27:59 +04:00
compat_siginfo_t __user * uinfo = compat_ptr ( data ) ;
2017-07-17 06:36:59 +03:00
if ( copy_siginfo_to_user32 ( uinfo , & info ) ) {
2013-06-28 17:49:46 +04:00
ret = - EFAULT ;
break ;
}
2013-05-01 02:27:59 +04:00
} else
# endif
{
siginfo_t __user * uinfo = ( siginfo_t __user * ) data ;
2017-07-17 06:36:59 +03:00
if ( copy_siginfo_to_user ( uinfo , & info ) ) {
2013-06-28 17:49:46 +04:00
ret = - EFAULT ;
break ;
}
2013-05-01 02:27:59 +04:00
}
data + = sizeof ( siginfo_t ) ;
i + + ;
if ( signal_pending ( current ) )
break ;
cond_resched ( ) ;
}
if ( i > 0 )
return i ;
return ret ;
}
2008-01-30 15:30:51 +03:00
# ifdef PTRACE_SINGLESTEP
# define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
# else
# define is_singlestep(request) 0
# endif
2008-01-30 15:30:53 +03:00
# ifdef PTRACE_SINGLEBLOCK
# define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK)
# else
# define is_singleblock(request) 0
# endif
2008-01-30 15:30:51 +03:00
# ifdef PTRACE_SYSEMU
# define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP)
# else
# define is_sysemu_singlestep(request) 0
# endif
2010-10-28 02:33:45 +04:00
static int ptrace_resume ( struct task_struct * child , long request ,
unsigned long data )
2008-01-30 15:30:51 +03:00
{
ptrace: fix race between ptrace_resume() and wait_task_stopped()
ptrace_resume() is called when the tracee is still __TASK_TRACED. We set
tracee->exit_code and then wake_up_state() changes tracee->state. If the
tracer's sub-thread does wait() in between, task_stopped_code(ptrace => T)
wrongly looks like another report from tracee.
This confuses debugger, and since wait_task_stopped() clears ->exit_code
the tracee can miss a signal.
Test-case:
#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/ptrace.h>
#include <pthread.h>
#include <assert.h>
int pid;
void *waiter(void *arg)
{
int stat;
for (;;) {
assert(pid == wait(&stat));
assert(WIFSTOPPED(stat));
if (WSTOPSIG(stat) == SIGHUP)
continue;
assert(WSTOPSIG(stat) == SIGCONT);
printf("ERR! extra/wrong report:%x\n", stat);
}
}
int main(void)
{
pthread_t thread;
pid = fork();
if (!pid) {
assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
for (;;)
kill(getpid(), SIGHUP);
}
assert(pthread_create(&thread, NULL, waiter, NULL) == 0);
for (;;)
ptrace(PTRACE_CONT, pid, 0, SIGCONT);
return 0;
}
Note for stable: the bug is very old, but without 9899d11f6544 "ptrace:
ensure arch_ptrace/ptrace_request can never race with SIGKILL" the fix
should use lock_task_sighand(child).
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Pavel Labath <labath@google.com>
Tested-by: Pavel Labath <labath@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-04-16 22:47:29 +03:00
bool need_siglock ;
2008-01-30 15:30:51 +03:00
if ( ! valid_signal ( data ) )
return - EIO ;
if ( request = = PTRACE_SYSCALL )
set_tsk_thread_flag ( child , TIF_SYSCALL_TRACE ) ;
else
clear_tsk_thread_flag ( child , TIF_SYSCALL_TRACE ) ;
# ifdef TIF_SYSCALL_EMU
if ( request = = PTRACE_SYSEMU | | request = = PTRACE_SYSEMU_SINGLESTEP )
set_tsk_thread_flag ( child , TIF_SYSCALL_EMU ) ;
else
clear_tsk_thread_flag ( child , TIF_SYSCALL_EMU ) ;
# endif
2008-01-30 15:30:53 +03:00
if ( is_singleblock ( request ) ) {
if ( unlikely ( ! arch_has_block_step ( ) ) )
return - EIO ;
user_enable_block_step ( child ) ;
} else if ( is_singlestep ( request ) | | is_sysemu_singlestep ( request ) ) {
2008-01-30 15:30:51 +03:00
if ( unlikely ( ! arch_has_single_step ( ) ) )
return - EIO ;
user_enable_single_step ( child ) ;
2009-04-08 10:21:06 +04:00
} else {
2008-01-30 15:30:51 +03:00
user_disable_single_step ( child ) ;
2009-04-08 10:21:06 +04:00
}
2008-01-30 15:30:51 +03:00
ptrace: fix race between ptrace_resume() and wait_task_stopped()
ptrace_resume() is called when the tracee is still __TASK_TRACED. We set
tracee->exit_code and then wake_up_state() changes tracee->state. If the
tracer's sub-thread does wait() in between, task_stopped_code(ptrace => T)
wrongly looks like another report from tracee.
This confuses debugger, and since wait_task_stopped() clears ->exit_code
the tracee can miss a signal.
Test-case:
#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/ptrace.h>
#include <pthread.h>
#include <assert.h>
int pid;
void *waiter(void *arg)
{
int stat;
for (;;) {
assert(pid == wait(&stat));
assert(WIFSTOPPED(stat));
if (WSTOPSIG(stat) == SIGHUP)
continue;
assert(WSTOPSIG(stat) == SIGCONT);
printf("ERR! extra/wrong report:%x\n", stat);
}
}
int main(void)
{
pthread_t thread;
pid = fork();
if (!pid) {
assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
for (;;)
kill(getpid(), SIGHUP);
}
assert(pthread_create(&thread, NULL, waiter, NULL) == 0);
for (;;)
ptrace(PTRACE_CONT, pid, 0, SIGCONT);
return 0;
}
Note for stable: the bug is very old, but without 9899d11f6544 "ptrace:
ensure arch_ptrace/ptrace_request can never race with SIGKILL" the fix
should use lock_task_sighand(child).
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Pavel Labath <labath@google.com>
Tested-by: Pavel Labath <labath@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-04-16 22:47:29 +03:00
/*
* Change - > exit_code and - > state under siglock to avoid the race
* with wait_task_stopped ( ) in between ; a non - zero - > exit_code will
* wrongly look like another report from tracee .
*
* Note that we need siglock even if - > exit_code = = data and / or this
* status was not reported yet , the new status must not be cleared by
* wait_task_stopped ( ) after resume .
*
* If data = = 0 we do not care if wait_task_stopped ( ) reports the old
* status and clears the code too ; this can ' t race with the tracee , it
* takes siglock after resume .
*/
need_siglock = data & & ! thread_group_empty ( current ) ;
if ( need_siglock )
spin_lock_irq ( & child - > sighand - > siglock ) ;
2008-01-30 15:30:51 +03:00
child - > exit_code = data ;
ptrace: ptrace_resume() shouldn't wake up !TASK_TRACED thread
It is not clear why ptrace_resume() does wake_up_process(). Unless the
caller is PTRACE_KILL the tracee should be TASK_TRACED so we can use
wake_up_state(__TASK_TRACED). If sys_ptrace() races with SIGKILL we do
not need the extra and potentionally spurious wakeup.
If the caller is PTRACE_KILL, wake_up_process() is even more wrong.
The tracee can sleep in any state in any place, and if we have a buggy
code which doesn't handle a spurious wakeup correctly PTRACE_KILL can
be used to exploit it. For example:
int main(void)
{
int child, status;
child = fork();
if (!child) {
int ret;
assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
ret = pause();
printf("pause: %d %m\n", ret);
return 0x23;
}
sleep(1);
assert(ptrace(PTRACE_KILL, child, 0,0) == 0);
assert(child == wait(&status));
printf("wait: %x\n", status);
return 0;
}
prints "pause: -1 Unknown error 514", -ERESTARTNOHAND leaks to the
userland. In this case sys_pause() is buggy as well and should be
fixed.
I do not know what was the original rationality behind PTRACE_KILL.
The man page is simply wrong and afaics it was always wrong. Imho
it should be deprecated, or may be it should do send_sig(SIGKILL)
as Denys suggests, but in any case I do not think that the current
behaviour was intentional.
Note: there is another problem, ptrace_resume() changes ->exit_code
and this can race with SIGKILL too. Eventually we should change ptrace
to not use ->exit_code.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
2011-05-25 21:20:21 +04:00
wake_up_state ( child , __TASK_TRACED ) ;
ptrace: fix race between ptrace_resume() and wait_task_stopped()
ptrace_resume() is called when the tracee is still __TASK_TRACED. We set
tracee->exit_code and then wake_up_state() changes tracee->state. If the
tracer's sub-thread does wait() in between, task_stopped_code(ptrace => T)
wrongly looks like another report from tracee.
This confuses debugger, and since wait_task_stopped() clears ->exit_code
the tracee can miss a signal.
Test-case:
#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/ptrace.h>
#include <pthread.h>
#include <assert.h>
int pid;
void *waiter(void *arg)
{
int stat;
for (;;) {
assert(pid == wait(&stat));
assert(WIFSTOPPED(stat));
if (WSTOPSIG(stat) == SIGHUP)
continue;
assert(WSTOPSIG(stat) == SIGCONT);
printf("ERR! extra/wrong report:%x\n", stat);
}
}
int main(void)
{
pthread_t thread;
pid = fork();
if (!pid) {
assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0);
for (;;)
kill(getpid(), SIGHUP);
}
assert(pthread_create(&thread, NULL, waiter, NULL) == 0);
for (;;)
ptrace(PTRACE_CONT, pid, 0, SIGCONT);
return 0;
}
Note for stable: the bug is very old, but without 9899d11f6544 "ptrace:
ensure arch_ptrace/ptrace_request can never race with SIGKILL" the fix
should use lock_task_sighand(child).
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Pavel Labath <labath@google.com>
Tested-by: Pavel Labath <labath@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-04-16 22:47:29 +03:00
if ( need_siglock )
spin_unlock_irq ( & child - > sighand - > siglock ) ;
2008-01-30 15:30:51 +03:00
return 0 ;
}
2010-02-11 22:51:00 +03:00
# ifdef CONFIG_HAVE_ARCH_TRACEHOOK
static const struct user_regset *
find_regset ( const struct user_regset_view * view , unsigned int type )
{
const struct user_regset * regset ;
int n ;
for ( n = 0 ; n < view - > n ; + + n ) {
regset = view - > regsets + n ;
if ( regset - > core_note_type = = type )
return regset ;
}
return NULL ;
}
static int ptrace_regset ( struct task_struct * task , int req , unsigned int type ,
struct iovec * kiov )
{
const struct user_regset_view * view = task_user_regset_view ( task ) ;
const struct user_regset * regset = find_regset ( view , type ) ;
int regset_no ;
if ( ! regset | | ( kiov - > iov_len % regset - > size ) ! = 0 )
2010-02-23 01:51:32 +03:00
return - EINVAL ;
2010-02-11 22:51:00 +03:00
regset_no = regset - view - > regsets ;
kiov - > iov_len = min ( kiov - > iov_len ,
( __kernel_size_t ) ( regset - > n * regset - > size ) ) ;
if ( req = = PTRACE_GETREGSET )
return copy_regset_to_user ( task , view , regset_no , 0 ,
kiov - > iov_len , kiov - > iov_base ) ;
else
return copy_regset_from_user ( task , view , regset_no , 0 ,
kiov - > iov_len , kiov - > iov_base ) ;
}
2013-01-13 22:03:34 +04:00
/*
* This is declared in linux / regset . h and defined in machine - dependent
* code . We put the export here , near the primary machine - neutral use ,
* to ensure no machine forgets it .
*/
EXPORT_SYMBOL_GPL ( task_user_regset_view ) ;
ptrace: add PTRACE_GET_SYSCALL_INFO request
PTRACE_GET_SYSCALL_INFO is a generic ptrace API that lets ptracer obtain
details of the syscall the tracee is blocked in.
There are two reasons for a special syscall-related ptrace request.
Firstly, with the current ptrace API there are cases when ptracer cannot
retrieve necessary information about syscalls. Some examples include:
* The notorious int-0x80-from-64-bit-task issue. See [1] for details.
In short, if a 64-bit task performs a syscall through int 0x80, its
tracer has no reliable means to find out that the syscall was, in
fact, a compat syscall, and misidentifies it.
* Syscall-enter-stop and syscall-exit-stop look the same for the
tracer. Common practice is to keep track of the sequence of
ptrace-stops in order not to mix the two syscall-stops up. But it is
not as simple as it looks; for example, strace had a (just recently
fixed) long-standing bug where attaching strace to a tracee that is
performing the execve system call led to the tracer identifying the
following syscall-exit-stop as syscall-enter-stop, which messed up
all the state tracking.
* Since the introduction of commit 84d77d3f06e7 ("ptrace: Don't allow
accessing an undumpable mm"), both PTRACE_PEEKDATA and
process_vm_readv become unavailable when the process dumpable flag is
cleared. On such architectures as ia64 this results in all syscall
arguments being unavailable for the tracer.
Secondly, ptracers also have to support a lot of arch-specific code for
obtaining information about the tracee. For some architectures, this
requires a ptrace(PTRACE_PEEKUSER, ...) invocation for every syscall
argument and return value.
ptrace(2) man page:
long ptrace(enum __ptrace_request request, pid_t pid,
void *addr, void *data);
...
PTRACE_GET_SYSCALL_INFO
Retrieve information about the syscall that caused the stop.
The information is placed into the buffer pointed by "data"
argument, which should be a pointer to a buffer of type
"struct ptrace_syscall_info".
The "addr" argument contains the size of the buffer pointed to
by "data" argument (i.e., sizeof(struct ptrace_syscall_info)).
The return value contains the number of bytes available
to be written by the kernel.
If the size of data to be written by the kernel exceeds the size
specified by "addr" argument, the output is truncated.
[ldv@altlinux.org: selftests/seccomp/seccomp_bpf: update for PTRACE_GET_SYSCALL_INFO]
Link: http://lkml.kernel.org/r/20190708182904.GA12332@altlinux.org
Link: http://lkml.kernel.org/r/20190510152842.GF28558@altlinux.org
Signed-off-by: Elvira Khabirova <lineprinter@altlinux.org>
Co-developed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Eugene Syromyatnikov <esyr@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greentime Hu <greentime@andestech.com>
Cc: Helge Deller <deller@gmx.de> [parisc]
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: kbuild test robot <lkp@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vincent Chen <deanbo422@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 02:29:42 +03:00
static unsigned long
ptrace_get_syscall_info_entry ( struct task_struct * child , struct pt_regs * regs ,
struct ptrace_syscall_info * info )
{
unsigned long args [ ARRAY_SIZE ( info - > entry . args ) ] ;
int i ;
info - > op = PTRACE_SYSCALL_INFO_ENTRY ;
info - > entry . nr = syscall_get_nr ( child , regs ) ;
syscall_get_arguments ( child , regs , args ) ;
for ( i = 0 ; i < ARRAY_SIZE ( args ) ; i + + )
info - > entry . args [ i ] = args [ i ] ;
/* args is the last field in struct ptrace_syscall_info.entry */
return offsetofend ( struct ptrace_syscall_info , entry . args ) ;
}
static unsigned long
ptrace_get_syscall_info_seccomp ( struct task_struct * child , struct pt_regs * regs ,
struct ptrace_syscall_info * info )
{
/*
* As struct ptrace_syscall_info . entry is currently a subset
* of struct ptrace_syscall_info . seccomp , it makes sense to
* initialize that subset using ptrace_get_syscall_info_entry ( ) .
* This can be reconsidered in the future if these structures
* diverge significantly enough .
*/
ptrace_get_syscall_info_entry ( child , regs , info ) ;
info - > op = PTRACE_SYSCALL_INFO_SECCOMP ;
info - > seccomp . ret_data = child - > ptrace_message ;
/* ret_data is the last field in struct ptrace_syscall_info.seccomp */
return offsetofend ( struct ptrace_syscall_info , seccomp . ret_data ) ;
}
static unsigned long
ptrace_get_syscall_info_exit ( struct task_struct * child , struct pt_regs * regs ,
struct ptrace_syscall_info * info )
{
info - > op = PTRACE_SYSCALL_INFO_EXIT ;
info - > exit . rval = syscall_get_error ( child , regs ) ;
info - > exit . is_error = ! ! info - > exit . rval ;
if ( ! info - > exit . is_error )
info - > exit . rval = syscall_get_return_value ( child , regs ) ;
/* is_error is the last field in struct ptrace_syscall_info.exit */
return offsetofend ( struct ptrace_syscall_info , exit . is_error ) ;
}
static int
ptrace_get_syscall_info ( struct task_struct * child , unsigned long user_size ,
void __user * datavp )
{
struct pt_regs * regs = task_pt_regs ( child ) ;
struct ptrace_syscall_info info = {
. op = PTRACE_SYSCALL_INFO_NONE ,
. arch = syscall_get_arch ( child ) ,
. instruction_pointer = instruction_pointer ( regs ) ,
. stack_pointer = user_stack_pointer ( regs ) ,
} ;
unsigned long actual_size = offsetof ( struct ptrace_syscall_info , entry ) ;
unsigned long write_size ;
/*
* This does not need lock_task_sighand ( ) to access
* child - > last_siginfo because ptrace_freeze_traced ( )
* called earlier by ptrace_check_attach ( ) ensures that
* the tracee cannot go away and clear its last_siginfo .
*/
switch ( child - > last_siginfo ? child - > last_siginfo - > si_code : 0 ) {
case SIGTRAP | 0x80 :
switch ( child - > ptrace_message ) {
case PTRACE_EVENTMSG_SYSCALL_ENTRY :
actual_size = ptrace_get_syscall_info_entry ( child , regs ,
& info ) ;
break ;
case PTRACE_EVENTMSG_SYSCALL_EXIT :
actual_size = ptrace_get_syscall_info_exit ( child , regs ,
& info ) ;
break ;
}
break ;
case SIGTRAP | ( PTRACE_EVENT_SECCOMP < < 8 ) :
actual_size = ptrace_get_syscall_info_seccomp ( child , regs ,
& info ) ;
break ;
}
write_size = min ( actual_size , user_size ) ;
return copy_to_user ( datavp , & info , write_size ) ? - EFAULT : actual_size ;
}
# endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
2010-02-11 22:51:00 +03:00
2005-04-17 02:20:36 +04:00
int ptrace_request ( struct task_struct * child , long request ,
2010-10-28 02:33:45 +04:00
unsigned long addr , unsigned long data )
2005-04-17 02:20:36 +04:00
{
ptrace: implement PTRACE_INTERRUPT
Currently, there's no way to trap a running ptracee short of sending a
signal which has various side effects. This patch implements
PTRACE_INTERRUPT which traps ptracee without any signal or job control
related side effect.
The implementation is almost trivial. It uses the group stop trap -
SIGTRAP | PTRACE_EVENT_STOP << 8. A new trap flag
JOBCTL_TRAP_INTERRUPT is added, which is set on PTRACE_INTERRUPT and
cleared when any trap happens. As INTERRUPT should be useable
regardless of the current state of tracee, task_is_traced() test in
ptrace_check_attach() is skipped for INTERRUPT.
PTRACE_INTERRUPT is available iff tracee is attached with
PTRACE_SEIZE.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_INTERRUPT 0x4207
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive pid=%d\n", getpid());
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: INTERRUPT and DETACH\n");
ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL);
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_DETACH, tracee, NULL, NULL);
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
kill(tracee, SIGKILL);
return 0;
}
When called without argument, tracee is seized from running state,
interrupted and then detached back to running state.
# ./test-interrupt
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: INTERRUPT and DETACH
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: exiting
When called with argument, tracee is seized from stopped state,
continued, interrupted and then detached back to stopped state.
# ./test-interrupt 1
tracee: alive pid=4548
tracee: alive pid=4548
tracee: alive pid=4548
tracer: INTERRUPT and DETACH
tracer: exiting
Before PTRACE_INTERRUPT, once the tracee was running, there was no way
to trap tracee and do PTRACE_DETACH without causing side effect.
-v2: Updated to use task_set_jobctl_pending() so that it doesn't end
up scheduling TRAP_STOP if child is dying which may make the
child unkillable. Spotted by Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:16 +04:00
bool seized = child - > ptrace & PT_SEIZED ;
2005-04-17 02:20:36 +04:00
int ret = - EIO ;
2018-09-25 12:27:20 +03:00
kernel_siginfo_t siginfo , * si ;
2010-10-28 02:33:46 +04:00
void __user * datavp = ( void __user * ) data ;
unsigned long __user * datalp = datavp ;
ptrace: implement PTRACE_INTERRUPT
Currently, there's no way to trap a running ptracee short of sending a
signal which has various side effects. This patch implements
PTRACE_INTERRUPT which traps ptracee without any signal or job control
related side effect.
The implementation is almost trivial. It uses the group stop trap -
SIGTRAP | PTRACE_EVENT_STOP << 8. A new trap flag
JOBCTL_TRAP_INTERRUPT is added, which is set on PTRACE_INTERRUPT and
cleared when any trap happens. As INTERRUPT should be useable
regardless of the current state of tracee, task_is_traced() test in
ptrace_check_attach() is skipped for INTERRUPT.
PTRACE_INTERRUPT is available iff tracee is attached with
PTRACE_SEIZE.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_INTERRUPT 0x4207
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive pid=%d\n", getpid());
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: INTERRUPT and DETACH\n");
ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL);
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_DETACH, tracee, NULL, NULL);
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
kill(tracee, SIGKILL);
return 0;
}
When called without argument, tracee is seized from running state,
interrupted and then detached back to running state.
# ./test-interrupt
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: INTERRUPT and DETACH
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: exiting
When called with argument, tracee is seized from stopped state,
continued, interrupted and then detached back to stopped state.
# ./test-interrupt 1
tracee: alive pid=4548
tracee: alive pid=4548
tracee: alive pid=4548
tracer: INTERRUPT and DETACH
tracer: exiting
Before PTRACE_INTERRUPT, once the tracee was running, there was no way
to trap tracee and do PTRACE_DETACH without causing side effect.
-v2: Updated to use task_set_jobctl_pending() so that it doesn't end
up scheduling TRAP_STOP if child is dying which may make the
child unkillable. Spotted by Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:16 +04:00
unsigned long flags ;
2005-04-17 02:20:36 +04:00
switch ( request ) {
2008-01-30 15:31:47 +03:00
case PTRACE_PEEKTEXT :
case PTRACE_PEEKDATA :
return generic_ptrace_peekdata ( child , addr , data ) ;
case PTRACE_POKETEXT :
case PTRACE_POKEDATA :
return generic_ptrace_pokedata ( child , addr , data ) ;
2005-04-17 02:20:36 +04:00
# ifdef PTRACE_OLDSETOPTIONS
case PTRACE_OLDSETOPTIONS :
# endif
case PTRACE_SETOPTIONS :
ret = ptrace_setoptions ( child , data ) ;
break ;
case PTRACE_GETEVENTMSG :
2010-10-28 02:33:46 +04:00
ret = put_user ( child - > ptrace_message , datalp ) ;
2005-04-17 02:20:36 +04:00
break ;
2008-04-21 00:10:12 +04:00
2013-05-01 02:27:59 +04:00
case PTRACE_PEEKSIGINFO :
ret = ptrace_peek_siginfo ( child , addr , data ) ;
break ;
2005-04-17 02:20:36 +04:00
case PTRACE_GETSIGINFO :
2008-04-21 00:10:12 +04:00
ret = ptrace_getsiginfo ( child , & siginfo ) ;
if ( ! ret )
2010-10-28 02:33:46 +04:00
ret = copy_siginfo_to_user ( datavp , & siginfo ) ;
2005-04-17 02:20:36 +04:00
break ;
2008-04-21 00:10:12 +04:00
2005-04-17 02:20:36 +04:00
case PTRACE_SETSIGINFO :
2018-04-19 01:30:19 +03:00
ret = copy_siginfo_from_user ( & siginfo , datavp ) ;
if ( ! ret )
2008-04-21 00:10:12 +04:00
ret = ptrace_setsiginfo ( child , & siginfo ) ;
2005-04-17 02:20:36 +04:00
break ;
2008-04-21 00:10:12 +04:00
2019-03-29 06:44:13 +03:00
case PTRACE_GETSIGMASK : {
sigset_t * mask ;
2013-07-04 02:08:12 +04:00
if ( addr ! = sizeof ( sigset_t ) ) {
ret = - EINVAL ;
break ;
}
2019-03-29 06:44:13 +03:00
if ( test_tsk_restore_sigmask ( child ) )
mask = & child - > saved_sigmask ;
else
mask = & child - > blocked ;
if ( copy_to_user ( datavp , mask , sizeof ( sigset_t ) ) )
2013-07-04 02:08:12 +04:00
ret = - EFAULT ;
else
ret = 0 ;
break ;
2019-03-29 06:44:13 +03:00
}
2013-07-04 02:08:12 +04:00
case PTRACE_SETSIGMASK : {
sigset_t new_set ;
if ( addr ! = sizeof ( sigset_t ) ) {
ret = - EINVAL ;
break ;
}
if ( copy_from_user ( & new_set , datavp , sizeof ( sigset_t ) ) ) {
ret = - EFAULT ;
break ;
}
sigdelsetmask ( & new_set , sigmask ( SIGKILL ) | sigmask ( SIGSTOP ) ) ;
/*
* Every thread does recalc_sigpending ( ) after resume , so
* retarget_shared_pending ( ) and recalc_sigpending ( ) are not
* called here .
*/
spin_lock_irq ( & child - > sighand - > siglock ) ;
child - > blocked = new_set ;
spin_unlock_irq ( & child - > sighand - > siglock ) ;
2019-03-29 06:44:13 +03:00
clear_tsk_restore_sigmask ( child ) ;
2013-07-04 02:08:12 +04:00
ret = 0 ;
break ;
}
ptrace: implement PTRACE_INTERRUPT
Currently, there's no way to trap a running ptracee short of sending a
signal which has various side effects. This patch implements
PTRACE_INTERRUPT which traps ptracee without any signal or job control
related side effect.
The implementation is almost trivial. It uses the group stop trap -
SIGTRAP | PTRACE_EVENT_STOP << 8. A new trap flag
JOBCTL_TRAP_INTERRUPT is added, which is set on PTRACE_INTERRUPT and
cleared when any trap happens. As INTERRUPT should be useable
regardless of the current state of tracee, task_is_traced() test in
ptrace_check_attach() is skipped for INTERRUPT.
PTRACE_INTERRUPT is available iff tracee is attached with
PTRACE_SEIZE.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_INTERRUPT 0x4207
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive pid=%d\n", getpid());
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: INTERRUPT and DETACH\n");
ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL);
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_DETACH, tracee, NULL, NULL);
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
kill(tracee, SIGKILL);
return 0;
}
When called without argument, tracee is seized from running state,
interrupted and then detached back to running state.
# ./test-interrupt
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: INTERRUPT and DETACH
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: exiting
When called with argument, tracee is seized from stopped state,
continued, interrupted and then detached back to stopped state.
# ./test-interrupt 1
tracee: alive pid=4548
tracee: alive pid=4548
tracee: alive pid=4548
tracer: INTERRUPT and DETACH
tracer: exiting
Before PTRACE_INTERRUPT, once the tracee was running, there was no way
to trap tracee and do PTRACE_DETACH without causing side effect.
-v2: Updated to use task_set_jobctl_pending() so that it doesn't end
up scheduling TRAP_STOP if child is dying which may make the
child unkillable. Spotted by Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:16 +04:00
case PTRACE_INTERRUPT :
/*
* Stop tracee without any side - effect on signal or job
* control . At least one trap is guaranteed to happen
* after this request . If @ child is already trapped , the
* current trap is not disturbed and another trap will
* happen after the current trap is ended with PTRACE_CONT .
*
* The actual trap might not be PTRACE_EVENT_STOP trap but
* the pending condition is cleared regardless .
*/
if ( unlikely ( ! seized | | ! lock_task_sighand ( child , & flags ) ) )
break ;
ptrace: implement PTRACE_LISTEN
The previous patch implemented async notification for ptrace but it
only worked while trace is running. This patch introduces
PTRACE_LISTEN which is suggested by Oleg Nestrov.
It's allowed iff tracee is in STOP trap and puts tracee into
quasi-running state - tracee never really runs but wait(2) and
ptrace(2) consider it to be running. While ptracer is listening,
tracee is allowed to re-enter STOP to notify an async event.
Listening state is cleared on the first notification. Ptracer can
also clear it by issuing INTERRUPT - tracee will re-trap into STOP
with listening state cleared.
This allows ptracer to monitor group stop state without running tracee
- use INTERRUPT to put tracee into STOP trap, issue LISTEN and then
wait(2) to wait for the next group stop event. When it happens,
PTRACE_GETSIGINFO provides information to determine the current state.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_INTERRUPT 0x4207
#define PTRACE_LISTEN 0x4208
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts1s = { .tv_sec = 1 };
int main(int argc, char **argv)
{
pid_t tracee, tracer;
int i;
tracee = fork();
if (!tracee)
while (1)
pause();
tracer = fork();
if (!tracer) {
siginfo_t si;
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL);
repeat:
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_GETSIGINFO, tracee, NULL, &si);
if (!si.si_code) {
printf("tracer: SIG %d\n", si.si_signo);
ptrace(PTRACE_CONT, tracee, NULL,
(void *)(unsigned long)si.si_signo);
goto repeat;
}
printf("tracer: stopped=%d signo=%d\n",
si.si_signo != SIGTRAP, si.si_signo);
if (si.si_signo != SIGTRAP)
ptrace(PTRACE_LISTEN, tracee, NULL, NULL);
else
ptrace(PTRACE_CONT, tracee, NULL, NULL);
goto repeat;
}
for (i = 0; i < 3; i++) {
nanosleep(&ts1s, NULL);
printf("mother: SIGSTOP\n");
kill(tracee, SIGSTOP);
nanosleep(&ts1s, NULL);
printf("mother: SIGCONT\n");
kill(tracee, SIGCONT);
}
nanosleep(&ts1s, NULL);
kill(tracer, SIGKILL);
kill(tracee, SIGKILL);
return 0;
}
This is identical to the program to test TRAP_NOTIFY except that
tracee is PTRACE_LISTEN'd instead of PTRACE_CONT'd when group stopped.
This allows ptracer to monitor when group stop ends without running
tracee.
# ./test-listen
tracer: stopped=0 signo=5
mother: SIGSTOP
tracer: SIG 19
tracer: stopped=1 signo=19
mother: SIGCONT
tracer: stopped=0 signo=5
tracer: SIG 18
mother: SIGSTOP
tracer: SIG 19
tracer: stopped=1 signo=19
mother: SIGCONT
tracer: stopped=0 signo=5
tracer: SIG 18
mother: SIGSTOP
tracer: SIG 19
tracer: stopped=1 signo=19
mother: SIGCONT
tracer: stopped=0 signo=5
tracer: SIG 18
-v2: Moved JOBCTL_LISTENING check in wait_task_stopped() into
task_stopped_code() as suggested by Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:18 +04:00
/*
* INTERRUPT doesn ' t disturb existing trap sans one
* exception . If ptracer issued LISTEN for the current
* STOP , this INTERRUPT should clear LISTEN and re - trap
* tracee into STOP .
*/
ptrace: implement PTRACE_INTERRUPT
Currently, there's no way to trap a running ptracee short of sending a
signal which has various side effects. This patch implements
PTRACE_INTERRUPT which traps ptracee without any signal or job control
related side effect.
The implementation is almost trivial. It uses the group stop trap -
SIGTRAP | PTRACE_EVENT_STOP << 8. A new trap flag
JOBCTL_TRAP_INTERRUPT is added, which is set on PTRACE_INTERRUPT and
cleared when any trap happens. As INTERRUPT should be useable
regardless of the current state of tracee, task_is_traced() test in
ptrace_check_attach() is skipped for INTERRUPT.
PTRACE_INTERRUPT is available iff tracee is attached with
PTRACE_SEIZE.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_INTERRUPT 0x4207
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive pid=%d\n", getpid());
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: INTERRUPT and DETACH\n");
ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL);
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_DETACH, tracee, NULL, NULL);
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
kill(tracee, SIGKILL);
return 0;
}
When called without argument, tracee is seized from running state,
interrupted and then detached back to running state.
# ./test-interrupt
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: INTERRUPT and DETACH
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: exiting
When called with argument, tracee is seized from stopped state,
continued, interrupted and then detached back to stopped state.
# ./test-interrupt 1
tracee: alive pid=4548
tracee: alive pid=4548
tracee: alive pid=4548
tracer: INTERRUPT and DETACH
tracer: exiting
Before PTRACE_INTERRUPT, once the tracee was running, there was no way
to trap tracee and do PTRACE_DETACH without causing side effect.
-v2: Updated to use task_set_jobctl_pending() so that it doesn't end
up scheduling TRAP_STOP if child is dying which may make the
child unkillable. Spotted by Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:16 +04:00
if ( likely ( task_set_jobctl_pending ( child , JOBCTL_TRAP_STOP ) ) )
2013-01-21 23:47:41 +04:00
ptrace_signal_wake_up ( child , child - > jobctl & JOBCTL_LISTENING ) ;
ptrace: implement PTRACE_LISTEN
The previous patch implemented async notification for ptrace but it
only worked while trace is running. This patch introduces
PTRACE_LISTEN which is suggested by Oleg Nestrov.
It's allowed iff tracee is in STOP trap and puts tracee into
quasi-running state - tracee never really runs but wait(2) and
ptrace(2) consider it to be running. While ptracer is listening,
tracee is allowed to re-enter STOP to notify an async event.
Listening state is cleared on the first notification. Ptracer can
also clear it by issuing INTERRUPT - tracee will re-trap into STOP
with listening state cleared.
This allows ptracer to monitor group stop state without running tracee
- use INTERRUPT to put tracee into STOP trap, issue LISTEN and then
wait(2) to wait for the next group stop event. When it happens,
PTRACE_GETSIGINFO provides information to determine the current state.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_INTERRUPT 0x4207
#define PTRACE_LISTEN 0x4208
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts1s = { .tv_sec = 1 };
int main(int argc, char **argv)
{
pid_t tracee, tracer;
int i;
tracee = fork();
if (!tracee)
while (1)
pause();
tracer = fork();
if (!tracer) {
siginfo_t si;
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL);
repeat:
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_GETSIGINFO, tracee, NULL, &si);
if (!si.si_code) {
printf("tracer: SIG %d\n", si.si_signo);
ptrace(PTRACE_CONT, tracee, NULL,
(void *)(unsigned long)si.si_signo);
goto repeat;
}
printf("tracer: stopped=%d signo=%d\n",
si.si_signo != SIGTRAP, si.si_signo);
if (si.si_signo != SIGTRAP)
ptrace(PTRACE_LISTEN, tracee, NULL, NULL);
else
ptrace(PTRACE_CONT, tracee, NULL, NULL);
goto repeat;
}
for (i = 0; i < 3; i++) {
nanosleep(&ts1s, NULL);
printf("mother: SIGSTOP\n");
kill(tracee, SIGSTOP);
nanosleep(&ts1s, NULL);
printf("mother: SIGCONT\n");
kill(tracee, SIGCONT);
}
nanosleep(&ts1s, NULL);
kill(tracer, SIGKILL);
kill(tracee, SIGKILL);
return 0;
}
This is identical to the program to test TRAP_NOTIFY except that
tracee is PTRACE_LISTEN'd instead of PTRACE_CONT'd when group stopped.
This allows ptracer to monitor when group stop ends without running
tracee.
# ./test-listen
tracer: stopped=0 signo=5
mother: SIGSTOP
tracer: SIG 19
tracer: stopped=1 signo=19
mother: SIGCONT
tracer: stopped=0 signo=5
tracer: SIG 18
mother: SIGSTOP
tracer: SIG 19
tracer: stopped=1 signo=19
mother: SIGCONT
tracer: stopped=0 signo=5
tracer: SIG 18
mother: SIGSTOP
tracer: SIG 19
tracer: stopped=1 signo=19
mother: SIGCONT
tracer: stopped=0 signo=5
tracer: SIG 18
-v2: Moved JOBCTL_LISTENING check in wait_task_stopped() into
task_stopped_code() as suggested by Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:18 +04:00
unlock_task_sighand ( child , & flags ) ;
ret = 0 ;
break ;
case PTRACE_LISTEN :
/*
* Listen for events . Tracee must be in STOP . It ' s not
* resumed per - se but is not considered to be in TRACED by
* wait ( 2 ) or ptrace ( 2 ) . If an async event ( e . g . group
* stop state change ) happens , tracee will enter STOP trap
* again . Alternatively , ptracer can issue INTERRUPT to
* finish listening and re - trap tracee into STOP .
*/
if ( unlikely ( ! seized | | ! lock_task_sighand ( child , & flags ) ) )
break ;
si = child - > last_siginfo ;
2011-09-25 21:46:22 +04:00
if ( likely ( si & & ( si - > si_code > > 8 ) = = PTRACE_EVENT_STOP ) ) {
child - > jobctl | = JOBCTL_LISTENING ;
/*
* If NOTIFY is set , it means event happened between
* start of this trap and now . Trigger re - trap .
*/
if ( child - > jobctl & JOBCTL_TRAP_NOTIFY )
2013-01-21 23:47:41 +04:00
ptrace_signal_wake_up ( child , true ) ;
2011-09-25 21:46:22 +04:00
ret = 0 ;
}
ptrace: implement PTRACE_INTERRUPT
Currently, there's no way to trap a running ptracee short of sending a
signal which has various side effects. This patch implements
PTRACE_INTERRUPT which traps ptracee without any signal or job control
related side effect.
The implementation is almost trivial. It uses the group stop trap -
SIGTRAP | PTRACE_EVENT_STOP << 8. A new trap flag
JOBCTL_TRAP_INTERRUPT is added, which is set on PTRACE_INTERRUPT and
cleared when any trap happens. As INTERRUPT should be useable
regardless of the current state of tracee, task_is_traced() test in
ptrace_check_attach() is skipped for INTERRUPT.
PTRACE_INTERRUPT is available iff tracee is attached with
PTRACE_SEIZE.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_INTERRUPT 0x4207
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive pid=%d\n", getpid());
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: INTERRUPT and DETACH\n");
ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL);
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_DETACH, tracee, NULL, NULL);
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
kill(tracee, SIGKILL);
return 0;
}
When called without argument, tracee is seized from running state,
interrupted and then detached back to running state.
# ./test-interrupt
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: INTERRUPT and DETACH
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: exiting
When called with argument, tracee is seized from stopped state,
continued, interrupted and then detached back to stopped state.
# ./test-interrupt 1
tracee: alive pid=4548
tracee: alive pid=4548
tracee: alive pid=4548
tracer: INTERRUPT and DETACH
tracer: exiting
Before PTRACE_INTERRUPT, once the tracee was running, there was no way
to trap tracee and do PTRACE_DETACH without causing side effect.
-v2: Updated to use task_set_jobctl_pending() so that it doesn't end
up scheduling TRAP_STOP if child is dying which may make the
child unkillable. Spotted by Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:16 +04:00
unlock_task_sighand ( child , & flags ) ;
break ;
2007-10-16 12:23:45 +04:00
case PTRACE_DETACH : /* detach a process that was attached. */
ret = ptrace_detach ( child , data ) ;
break ;
2008-01-30 15:30:51 +03:00
2010-05-27 01:42:52 +04:00
# ifdef CONFIG_BINFMT_ELF_FDPIC
case PTRACE_GETFDPIC : {
2010-05-27 01:42:53 +04:00
struct mm_struct * mm = get_task_mm ( child ) ;
2010-05-27 01:42:52 +04:00
unsigned long tmp = 0 ;
2010-05-27 01:42:53 +04:00
ret = - ESRCH ;
if ( ! mm )
break ;
2010-05-27 01:42:52 +04:00
switch ( addr ) {
case PTRACE_GETFDPIC_EXEC :
2010-05-27 01:42:53 +04:00
tmp = mm - > context . exec_fdpic_loadmap ;
2010-05-27 01:42:52 +04:00
break ;
case PTRACE_GETFDPIC_INTERP :
2010-05-27 01:42:53 +04:00
tmp = mm - > context . interp_fdpic_loadmap ;
2010-05-27 01:42:52 +04:00
break ;
default :
break ;
}
2010-05-27 01:42:53 +04:00
mmput ( mm ) ;
2010-05-27 01:42:52 +04:00
2010-10-28 02:33:46 +04:00
ret = put_user ( tmp , datalp ) ;
2010-05-27 01:42:52 +04:00
break ;
}
# endif
2008-01-30 15:30:51 +03:00
# ifdef PTRACE_SINGLESTEP
case PTRACE_SINGLESTEP :
# endif
2008-01-30 15:30:53 +03:00
# ifdef PTRACE_SINGLEBLOCK
case PTRACE_SINGLEBLOCK :
# endif
2008-01-30 15:30:51 +03:00
# ifdef PTRACE_SYSEMU
case PTRACE_SYSEMU :
case PTRACE_SYSEMU_SINGLESTEP :
# endif
case PTRACE_SYSCALL :
case PTRACE_CONT :
return ptrace_resume ( child , request , data ) ;
case PTRACE_KILL :
if ( child - > exit_state ) /* already dead */
return 0 ;
return ptrace_resume ( child , request , SIGKILL ) ;
2010-02-11 22:51:00 +03:00
# ifdef CONFIG_HAVE_ARCH_TRACEHOOK
case PTRACE_GETREGSET :
2013-07-04 02:08:12 +04:00
case PTRACE_SETREGSET : {
2010-02-11 22:51:00 +03:00
struct iovec kiov ;
2010-10-28 02:33:46 +04:00
struct iovec __user * uiov = datavp ;
2010-02-11 22:51:00 +03:00
Remove 'type' argument from access_ok() function
Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument
of the user address range verification function since we got rid of the
old racy i386-only code to walk page tables by hand.
It existed because the original 80386 would not honor the write protect
bit when in kernel mode, so you had to do COW by hand before doing any
user access. But we haven't supported that in a long time, and these
days the 'type' argument is a purely historical artifact.
A discussion about extending 'user_access_begin()' to do the range
checking resulted this patch, because there is no way we're going to
move the old VERIFY_xyz interface to that model. And it's best done at
the end of the merge window when I've done most of my merges, so let's
just get this done once and for all.
This patch was mostly done with a sed-script, with manual fix-ups for
the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form.
There were a couple of notable cases:
- csky still had the old "verify_area()" name as an alias.
- the iter_iov code had magical hardcoded knowledge of the actual
values of VERIFY_{READ,WRITE} (not that they mattered, since nothing
really used it)
- microblaze used the type argument for a debug printout
but other than those oddities this should be a total no-op patch.
I tried to fix up all architectures, did fairly extensive grepping for
access_ok() uses, and the changes are trivial, but I may have missed
something. Any missed conversion should be trivially fixable, though.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 05:57:57 +03:00
if ( ! access_ok ( uiov , sizeof ( * uiov ) ) )
2010-02-11 22:51:00 +03:00
return - EFAULT ;
if ( __get_user ( kiov . iov_base , & uiov - > iov_base ) | |
__get_user ( kiov . iov_len , & uiov - > iov_len ) )
return - EFAULT ;
ret = ptrace_regset ( child , request , addr , & kiov ) ;
if ( ! ret )
ret = __put_user ( kiov . iov_len , & uiov - > iov_len ) ;
break ;
}
ptrace: add PTRACE_GET_SYSCALL_INFO request
PTRACE_GET_SYSCALL_INFO is a generic ptrace API that lets ptracer obtain
details of the syscall the tracee is blocked in.
There are two reasons for a special syscall-related ptrace request.
Firstly, with the current ptrace API there are cases when ptracer cannot
retrieve necessary information about syscalls. Some examples include:
* The notorious int-0x80-from-64-bit-task issue. See [1] for details.
In short, if a 64-bit task performs a syscall through int 0x80, its
tracer has no reliable means to find out that the syscall was, in
fact, a compat syscall, and misidentifies it.
* Syscall-enter-stop and syscall-exit-stop look the same for the
tracer. Common practice is to keep track of the sequence of
ptrace-stops in order not to mix the two syscall-stops up. But it is
not as simple as it looks; for example, strace had a (just recently
fixed) long-standing bug where attaching strace to a tracee that is
performing the execve system call led to the tracer identifying the
following syscall-exit-stop as syscall-enter-stop, which messed up
all the state tracking.
* Since the introduction of commit 84d77d3f06e7 ("ptrace: Don't allow
accessing an undumpable mm"), both PTRACE_PEEKDATA and
process_vm_readv become unavailable when the process dumpable flag is
cleared. On such architectures as ia64 this results in all syscall
arguments being unavailable for the tracer.
Secondly, ptracers also have to support a lot of arch-specific code for
obtaining information about the tracee. For some architectures, this
requires a ptrace(PTRACE_PEEKUSER, ...) invocation for every syscall
argument and return value.
ptrace(2) man page:
long ptrace(enum __ptrace_request request, pid_t pid,
void *addr, void *data);
...
PTRACE_GET_SYSCALL_INFO
Retrieve information about the syscall that caused the stop.
The information is placed into the buffer pointed by "data"
argument, which should be a pointer to a buffer of type
"struct ptrace_syscall_info".
The "addr" argument contains the size of the buffer pointed to
by "data" argument (i.e., sizeof(struct ptrace_syscall_info)).
The return value contains the number of bytes available
to be written by the kernel.
If the size of data to be written by the kernel exceeds the size
specified by "addr" argument, the output is truncated.
[ldv@altlinux.org: selftests/seccomp/seccomp_bpf: update for PTRACE_GET_SYSCALL_INFO]
Link: http://lkml.kernel.org/r/20190708182904.GA12332@altlinux.org
Link: http://lkml.kernel.org/r/20190510152842.GF28558@altlinux.org
Signed-off-by: Elvira Khabirova <lineprinter@altlinux.org>
Co-developed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Eugene Syromyatnikov <esyr@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greentime Hu <greentime@andestech.com>
Cc: Helge Deller <deller@gmx.de> [parisc]
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: kbuild test robot <lkp@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vincent Chen <deanbo422@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-07-17 02:29:42 +03:00
case PTRACE_GET_SYSCALL_INFO :
ret = ptrace_get_syscall_info ( child , addr , datavp ) ;
break ;
2010-02-11 22:51:00 +03:00
# endif
2015-10-27 03:23:59 +03:00
case PTRACE_SECCOMP_GET_FILTER :
ret = seccomp_get_filter ( child , addr , datavp ) ;
break ;
2017-10-11 18:39:21 +03:00
case PTRACE_SECCOMP_GET_METADATA :
ret = seccomp_get_metadata ( child , addr , datavp ) ;
break ;
2005-04-17 02:20:36 +04:00
default :
break ;
}
return ret ;
}
2005-11-07 11:59:47 +03:00
2007-10-16 12:26:37 +04:00
# ifndef arch_ptrace_attach
# define arch_ptrace_attach(child) do { } while (0)
# endif
2010-10-28 02:33:45 +04:00
SYSCALL_DEFINE4 ( ptrace , long , request , long , pid , unsigned long , addr ,
unsigned long , data )
2005-11-07 11:59:47 +03:00
{
struct task_struct * child ;
long ret ;
2006-01-08 12:02:33 +03:00
if ( request = = PTRACE_TRACEME ) {
ret = ptrace_traceme ( ) ;
2007-11-27 15:02:40 +03:00
if ( ! ret )
arch_ptrace_attach ( current ) ;
2005-11-07 11:59:47 +03:00
goto out ;
2006-01-08 12:02:33 +03:00
}
2018-02-07 02:40:17 +03:00
child = find_get_task_by_vpid ( pid ) ;
if ( ! child ) {
ret = - ESRCH ;
2006-01-08 12:02:33 +03:00
goto out ;
}
2005-11-07 11:59:47 +03:00
ptrace: implement PTRACE_SEIZE
PTRACE_ATTACH implicitly issues SIGSTOP on attach which has side
effects on tracee signal and job control states. This patch
implements a new ptrace request PTRACE_SEIZE which attaches a tracee
without trapping it or affecting its signal and job control states.
The usage is the same with PTRACE_ATTACH but it takes PTRACE_SEIZE_*
flags in @data. Currently, the only defined flag is
PTRACE_SEIZE_DEVEL which is a temporary flag to enable PTRACE_SEIZE.
PTRACE_SEIZE will change ptrace behaviors outside of attach itself.
The changes will be implemented gradually and the DEVEL flag is to
prevent programs which expect full SEIZE behavior from using it before
all the behavior modifications are complete while allowing unit
testing. The flag will be removed once SEIZE behaviors are completely
implemented.
* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap. After
attaching tracee continues to run unless a trap condition occurs.
* PTRACE_SEIZE doesn't affect signal or group stop state.
* If PTRACE_SEIZE'd, group stop uses PTRACE_EVENT_STOP trap which uses
exit_code of (signr | PTRACE_EVENT_STOP << 8) where signr is one of
the stopping signals if group stop is in effect or SIGTRAP
otherwise, and returns usual trap siginfo on PTRACE_GETSIGINFO
instead of NULL.
Seizing sets PT_SEIZED in ->ptrace of the tracee. This flag will be
used to determine whether new SEIZE behaviors should be enabled.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive\n");
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
return 0;
}
When the above program is called w/o argument, tracee is seized while
running and remains running. When tracer exits, tracee continues to
run and print out messages.
# ./test-seize-simple
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
tracee: alive
tracee: alive
When called with an argument, tracee is seized from stopped state and
continued, and returns to stopped state when tracer exits.
# ./test-seize
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
# ps -el|grep test-seize
1 T 0 4720 1 0 80 0 - 941 signal ttyS0 00:00:00 test-seize
-v2: SEIZE doesn't schedule TRAP_STOP and leaves tracee running as Jan
suggested.
-v3: PTRACE_EVENT_STOP traps now report group stop state by signr. If
group stop is in effect the stop signal number is returned as
part of exit_code; otherwise, SIGTRAP. This was suggested by
Denys and Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:15 +04:00
if ( request = = PTRACE_ATTACH | | request = = PTRACE_SEIZE ) {
2012-03-24 02:02:42 +04:00
ret = ptrace_attach ( child , request , addr , data ) ;
2007-10-16 12:26:37 +04:00
/*
* Some architectures need to do book - keeping after
* a ptrace attach .
*/
if ( ! ret )
arch_ptrace_attach ( child ) ;
2005-11-14 03:06:33 +03:00
goto out_put_task_struct ;
2005-11-07 11:59:47 +03:00
}
ptrace: implement PTRACE_INTERRUPT
Currently, there's no way to trap a running ptracee short of sending a
signal which has various side effects. This patch implements
PTRACE_INTERRUPT which traps ptracee without any signal or job control
related side effect.
The implementation is almost trivial. It uses the group stop trap -
SIGTRAP | PTRACE_EVENT_STOP << 8. A new trap flag
JOBCTL_TRAP_INTERRUPT is added, which is set on PTRACE_INTERRUPT and
cleared when any trap happens. As INTERRUPT should be useable
regardless of the current state of tracee, task_is_traced() test in
ptrace_check_attach() is skipped for INTERRUPT.
PTRACE_INTERRUPT is available iff tracee is attached with
PTRACE_SEIZE.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_INTERRUPT 0x4207
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive pid=%d\n", getpid());
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: INTERRUPT and DETACH\n");
ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL);
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_DETACH, tracee, NULL, NULL);
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
kill(tracee, SIGKILL);
return 0;
}
When called without argument, tracee is seized from running state,
interrupted and then detached back to running state.
# ./test-interrupt
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: INTERRUPT and DETACH
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: exiting
When called with argument, tracee is seized from stopped state,
continued, interrupted and then detached back to stopped state.
# ./test-interrupt 1
tracee: alive pid=4548
tracee: alive pid=4548
tracee: alive pid=4548
tracer: INTERRUPT and DETACH
tracer: exiting
Before PTRACE_INTERRUPT, once the tracee was running, there was no way
to trap tracee and do PTRACE_DETACH without causing side effect.
-v2: Updated to use task_set_jobctl_pending() so that it doesn't end
up scheduling TRAP_STOP if child is dying which may make the
child unkillable. Spotted by Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:16 +04:00
ret = ptrace_check_attach ( child , request = = PTRACE_KILL | |
request = = PTRACE_INTERRUPT ) ;
2005-11-07 11:59:47 +03:00
if ( ret < 0 )
goto out_put_task_struct ;
ret = arch_ptrace ( child , request , addr , data ) ;
2013-01-21 23:48:00 +04:00
if ( ret | | request ! = PTRACE_DETACH )
ptrace_unfreeze_traced ( child ) ;
2005-11-07 11:59:47 +03:00
out_put_task_struct :
put_task_struct ( child ) ;
out :
return ret ;
}
2007-07-17 15:03:43 +04:00
2010-10-28 02:33:45 +04:00
int generic_ptrace_peekdata ( struct task_struct * tsk , unsigned long addr ,
unsigned long data )
2007-07-17 15:03:43 +04:00
{
unsigned long tmp ;
int copied ;
2016-11-22 21:06:50 +03:00
copied = ptrace_access_vm ( tsk , addr , & tmp , sizeof ( tmp ) , FOLL_FORCE ) ;
2007-07-17 15:03:43 +04:00
if ( copied ! = sizeof ( tmp ) )
return - EIO ;
return put_user ( tmp , ( unsigned long __user * ) data ) ;
}
2007-07-17 15:03:44 +04:00
2010-10-28 02:33:45 +04:00
int generic_ptrace_pokedata ( struct task_struct * tsk , unsigned long addr ,
unsigned long data )
2007-07-17 15:03:44 +04:00
{
int copied ;
2016-11-22 21:06:50 +03:00
copied = ptrace_access_vm ( tsk , addr , & data , sizeof ( data ) ,
2016-10-13 03:20:20 +03:00
FOLL_FORCE | FOLL_WRITE ) ;
2007-07-17 15:03:44 +04:00
return ( copied = = sizeof ( data ) ) ? 0 : - EIO ;
}
2008-01-30 15:31:47 +03:00
2008-11-25 10:10:03 +03:00
# if defined CONFIG_COMPAT
2008-01-30 15:31:47 +03:00
int compat_ptrace_request ( struct task_struct * child , compat_long_t request ,
compat_ulong_t addr , compat_ulong_t data )
{
compat_ulong_t __user * datap = compat_ptr ( data ) ;
compat_ulong_t word ;
2018-09-25 12:27:20 +03:00
kernel_siginfo_t siginfo ;
2008-01-30 15:31:47 +03:00
int ret ;
switch ( request ) {
case PTRACE_PEEKTEXT :
case PTRACE_PEEKDATA :
2016-11-22 21:06:50 +03:00
ret = ptrace_access_vm ( child , addr , & word , sizeof ( word ) ,
2016-10-13 03:20:20 +03:00
FOLL_FORCE ) ;
2008-01-30 15:31:47 +03:00
if ( ret ! = sizeof ( word ) )
ret = - EIO ;
else
ret = put_user ( word , datap ) ;
break ;
case PTRACE_POKETEXT :
case PTRACE_POKEDATA :
2016-11-22 21:06:50 +03:00
ret = ptrace_access_vm ( child , addr , & data , sizeof ( data ) ,
2016-10-13 03:20:20 +03:00
FOLL_FORCE | FOLL_WRITE ) ;
2008-01-30 15:31:47 +03:00
ret = ( ret ! = sizeof ( data ) ? - EIO : 0 ) ;
break ;
case PTRACE_GETEVENTMSG :
ret = put_user ( ( compat_ulong_t ) child - > ptrace_message , datap ) ;
break ;
2008-04-21 00:10:12 +04:00
case PTRACE_GETSIGINFO :
ret = ptrace_getsiginfo ( child , & siginfo ) ;
if ( ! ret )
ret = copy_siginfo_to_user32 (
( struct compat_siginfo __user * ) datap ,
& siginfo ) ;
break ;
case PTRACE_SETSIGINFO :
2018-04-19 01:30:19 +03:00
ret = copy_siginfo_from_user32 (
& siginfo , ( struct compat_siginfo __user * ) datap ) ;
if ( ! ret )
2008-04-21 00:10:12 +04:00
ret = ptrace_setsiginfo ( child , & siginfo ) ;
break ;
2010-02-11 22:51:00 +03:00
# ifdef CONFIG_HAVE_ARCH_TRACEHOOK
case PTRACE_GETREGSET :
case PTRACE_SETREGSET :
{
struct iovec kiov ;
struct compat_iovec __user * uiov =
( struct compat_iovec __user * ) datap ;
compat_uptr_t ptr ;
compat_size_t len ;
Remove 'type' argument from access_ok() function
Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument
of the user address range verification function since we got rid of the
old racy i386-only code to walk page tables by hand.
It existed because the original 80386 would not honor the write protect
bit when in kernel mode, so you had to do COW by hand before doing any
user access. But we haven't supported that in a long time, and these
days the 'type' argument is a purely historical artifact.
A discussion about extending 'user_access_begin()' to do the range
checking resulted this patch, because there is no way we're going to
move the old VERIFY_xyz interface to that model. And it's best done at
the end of the merge window when I've done most of my merges, so let's
just get this done once and for all.
This patch was mostly done with a sed-script, with manual fix-ups for
the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form.
There were a couple of notable cases:
- csky still had the old "verify_area()" name as an alias.
- the iter_iov code had magical hardcoded knowledge of the actual
values of VERIFY_{READ,WRITE} (not that they mattered, since nothing
really used it)
- microblaze used the type argument for a debug printout
but other than those oddities this should be a total no-op patch.
I tried to fix up all architectures, did fairly extensive grepping for
access_ok() uses, and the changes are trivial, but I may have missed
something. Any missed conversion should be trivially fixable, though.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-01-04 05:57:57 +03:00
if ( ! access_ok ( uiov , sizeof ( * uiov ) ) )
2010-02-11 22:51:00 +03:00
return - EFAULT ;
if ( __get_user ( ptr , & uiov - > iov_base ) | |
__get_user ( len , & uiov - > iov_len ) )
return - EFAULT ;
kiov . iov_base = compat_ptr ( ptr ) ;
kiov . iov_len = len ;
ret = ptrace_regset ( child , request , addr , & kiov ) ;
if ( ! ret )
ret = __put_user ( kiov . iov_len , & uiov - > iov_len ) ;
break ;
}
# endif
2008-04-21 00:10:12 +04:00
2008-01-30 15:31:47 +03:00
default :
ret = ptrace_request ( child , request , addr , data ) ;
}
return ret ;
}
2008-01-30 15:31:48 +03:00
2014-03-03 19:11:13 +04:00
COMPAT_SYSCALL_DEFINE4 ( ptrace , compat_long_t , request , compat_long_t , pid ,
compat_long_t , addr , compat_long_t , data )
2008-01-30 15:31:48 +03:00
{
struct task_struct * child ;
long ret ;
if ( request = = PTRACE_TRACEME ) {
ret = ptrace_traceme ( ) ;
goto out ;
}
2018-02-07 02:40:17 +03:00
child = find_get_task_by_vpid ( pid ) ;
if ( ! child ) {
ret = - ESRCH ;
2008-01-30 15:31:48 +03:00
goto out ;
}
ptrace: implement PTRACE_SEIZE
PTRACE_ATTACH implicitly issues SIGSTOP on attach which has side
effects on tracee signal and job control states. This patch
implements a new ptrace request PTRACE_SEIZE which attaches a tracee
without trapping it or affecting its signal and job control states.
The usage is the same with PTRACE_ATTACH but it takes PTRACE_SEIZE_*
flags in @data. Currently, the only defined flag is
PTRACE_SEIZE_DEVEL which is a temporary flag to enable PTRACE_SEIZE.
PTRACE_SEIZE will change ptrace behaviors outside of attach itself.
The changes will be implemented gradually and the DEVEL flag is to
prevent programs which expect full SEIZE behavior from using it before
all the behavior modifications are complete while allowing unit
testing. The flag will be removed once SEIZE behaviors are completely
implemented.
* PTRACE_SEIZE, unlike ATTACH, doesn't force tracee to trap. After
attaching tracee continues to run unless a trap condition occurs.
* PTRACE_SEIZE doesn't affect signal or group stop state.
* If PTRACE_SEIZE'd, group stop uses PTRACE_EVENT_STOP trap which uses
exit_code of (signr | PTRACE_EVENT_STOP << 8) where signr is one of
the stopping signals if group stop is in effect or SIGTRAP
otherwise, and returns usual trap siginfo on PTRACE_GETSIGINFO
instead of NULL.
Seizing sets PT_SEIZED in ->ptrace of the tracee. This flag will be
used to determine whether new SEIZE behaviors should be enabled.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive\n");
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
return 0;
}
When the above program is called w/o argument, tracee is seized while
running and remains running. When tracer exits, tracee continues to
run and print out messages.
# ./test-seize-simple
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
tracee: alive
tracee: alive
When called with an argument, tracee is seized from stopped state and
continued, and returns to stopped state when tracer exits.
# ./test-seize
tracee: alive
tracee: alive
tracee: alive
tracer: exiting
# ps -el|grep test-seize
1 T 0 4720 1 0 80 0 - 941 signal ttyS0 00:00:00 test-seize
-v2: SEIZE doesn't schedule TRAP_STOP and leaves tracee running as Jan
suggested.
-v3: PTRACE_EVENT_STOP traps now report group stop state by signr. If
group stop is in effect the stop signal number is returned as
part of exit_code; otherwise, SIGTRAP. This was suggested by
Denys and Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:15 +04:00
if ( request = = PTRACE_ATTACH | | request = = PTRACE_SEIZE ) {
2012-03-24 02:02:42 +04:00
ret = ptrace_attach ( child , request , addr , data ) ;
2008-01-30 15:31:48 +03:00
/*
* Some architectures need to do book - keeping after
* a ptrace attach .
*/
if ( ! ret )
arch_ptrace_attach ( child ) ;
goto out_put_task_struct ;
}
ptrace: implement PTRACE_INTERRUPT
Currently, there's no way to trap a running ptracee short of sending a
signal which has various side effects. This patch implements
PTRACE_INTERRUPT which traps ptracee without any signal or job control
related side effect.
The implementation is almost trivial. It uses the group stop trap -
SIGTRAP | PTRACE_EVENT_STOP << 8. A new trap flag
JOBCTL_TRAP_INTERRUPT is added, which is set on PTRACE_INTERRUPT and
cleared when any trap happens. As INTERRUPT should be useable
regardless of the current state of tracee, task_is_traced() test in
ptrace_check_attach() is skipped for INTERRUPT.
PTRACE_INTERRUPT is available iff tracee is attached with
PTRACE_SEIZE.
Test program follows.
#define PTRACE_SEIZE 0x4206
#define PTRACE_INTERRUPT 0x4207
#define PTRACE_SEIZE_DEVEL 0x80000000
static const struct timespec ts100ms = { .tv_nsec = 100000000 };
static const struct timespec ts1s = { .tv_sec = 1 };
static const struct timespec ts3s = { .tv_sec = 3 };
int main(int argc, char **argv)
{
pid_t tracee;
tracee = fork();
if (tracee == 0) {
nanosleep(&ts100ms, NULL);
while (1) {
printf("tracee: alive pid=%d\n", getpid());
nanosleep(&ts1s, NULL);
}
}
if (argc > 1)
kill(tracee, SIGSTOP);
nanosleep(&ts100ms, NULL);
ptrace(PTRACE_SEIZE, tracee, NULL,
(void *)(unsigned long)PTRACE_SEIZE_DEVEL);
if (argc > 1) {
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_CONT, tracee, NULL, NULL);
}
nanosleep(&ts3s, NULL);
printf("tracer: INTERRUPT and DETACH\n");
ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL);
waitid(P_PID, tracee, NULL, WSTOPPED);
ptrace(PTRACE_DETACH, tracee, NULL, NULL);
nanosleep(&ts3s, NULL);
printf("tracer: exiting\n");
kill(tracee, SIGKILL);
return 0;
}
When called without argument, tracee is seized from running state,
interrupted and then detached back to running state.
# ./test-interrupt
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: INTERRUPT and DETACH
tracee: alive pid=4546
tracee: alive pid=4546
tracee: alive pid=4546
tracer: exiting
When called with argument, tracee is seized from stopped state,
continued, interrupted and then detached back to stopped state.
# ./test-interrupt 1
tracee: alive pid=4548
tracee: alive pid=4548
tracee: alive pid=4548
tracer: INTERRUPT and DETACH
tracer: exiting
Before PTRACE_INTERRUPT, once the tracee was running, there was no way
to trap tracee and do PTRACE_DETACH without causing side effect.
-v2: Updated to use task_set_jobctl_pending() so that it doesn't end
up scheduling TRAP_STOP if child is dying which may make the
child unkillable. Spotted by Oleg.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
2011-06-14 13:20:16 +04:00
ret = ptrace_check_attach ( child , request = = PTRACE_KILL | |
request = = PTRACE_INTERRUPT ) ;
2013-01-21 23:48:00 +04:00
if ( ! ret ) {
2008-01-30 15:31:48 +03:00
ret = compat_arch_ptrace ( child , request , addr , data ) ;
2013-01-21 23:48:00 +04:00
if ( ret | | request ! = PTRACE_DETACH )
ptrace_unfreeze_traced ( child ) ;
}
2008-01-30 15:31:48 +03:00
out_put_task_struct :
put_task_struct ( child ) ;
out :
return ret ;
}
2008-11-25 10:10:03 +03:00
# endif /* CONFIG_COMPAT */