Do not suspend waitpid.

strace used to suspend waitpid until there is a child
for waitpid'ing process to collect status from.
Apparently, it was done because in some very old kernels
(circa 2002 or even earlier) there were ptrace bugs which
were making waitpid in real parent to not see children.
This kernel bug is fixed long ago. This change removes the workaround.
test/wait_must_be_interruptible.c is a test program which
illustrates why without this change strace changes
programs's behavior.

* defs.h: Delete waitpid and nclone_waiting members from from struct tcb.
  Remove declaration of internal_wait().
* process.c (internal_wait): Remove this function.
* strace.c (alloc_tcb): Do not set tcp->nclone_waiting.
  (resume): Remove this function.
  (resume_from_tcp): Remove this function.
  (detach): Do not call resume_from_tcp().
  (handle_group_exit): Do not call resume_from_tcp().
* syscall.c (internal_syscall): Do not call internal_wait().

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
This commit is contained in:
Denys Vlasenko 2011-06-18 11:29:10 +02:00
parent dfcd741a5a
commit 0df9ed47af
4 changed files with 0 additions and 206 deletions

3
defs.h
View File

@ -369,11 +369,9 @@ struct tcb {
/* Support for tracing forked processes */
struct tcb *parent; /* Parent of this process */
int nchildren; /* # of traced children */
int waitpid; /* pid(s) this process is waiting for */
int nzombies; /* # of formerly traced children now dead */
#ifdef LINUX
int nclone_threads; /* # of nchildren with CLONE_THREAD */
int nclone_waiting; /* clone threads in wait4 (TCB_SUSPENDED) */
#endif
/* (1st arg of wait4()) */
long baddr; /* `Breakpoint' address */
@ -595,7 +593,6 @@ extern int pathtrace_match(struct tcb *);
extern int change_syscall(struct tcb *, int);
extern int internal_fork(struct tcb *);
extern int internal_exec(struct tcb *);
extern int internal_wait(struct tcb *, int);
extern int internal_exit(struct tcb *);
#ifdef LINUX
extern int handle_new_child(struct tcb *, int, int);

View File

@ -1929,92 +1929,6 @@ printwaitn(struct tcb *tcp, int n, int bitness)
return 0;
}
int
internal_wait(struct tcb *tcp, int flagarg)
{
int got_kids;
#ifdef TCB_CLONE_THREAD
if (tcp->flags & TCB_CLONE_THREAD)
/* The children we wait for are our parent's children. */
got_kids = (tcp->parent->nchildren
> tcp->parent->nclone_threads);
else
got_kids = (tcp->nchildren > tcp->nclone_threads);
#else
got_kids = tcp->nchildren > 0;
#endif
if (entering(tcp) && got_kids) {
/* There are children that this parent should block for.
But ptrace made us the parent of the traced children
and the real parent will get ECHILD from the wait call.
XXX If we attached with strace -f -p PID, then there
may be untraced dead children the parent could be reaping
now, but we make him block. */
/* ??? WTA: fix bug with hanging children */
if (!(tcp->u_arg[flagarg] & WNOHANG)) {
/*
* There are traced children. We'll make the parent
* block to avoid a false ECHILD error due to our
* ptrace having stolen the children. However,
* we shouldn't block if there are zombies to reap.
* XXX doesn't handle pgrp matches (u_arg[0]==0,<-1)
*/
struct tcb *child = NULL;
if (tcp->nzombies > 0 &&
(tcp->u_arg[0] == -1 ||
(child = pid2tcb(tcp->u_arg[0])) == NULL))
return 0;
if (tcp->u_arg[0] > 0) {
/*
* If the parent waits for a specified child
* PID, then it must get ECHILD right away
* if that PID is not one of its children.
* Make sure that the requested PID matches
* one of the parent's children that we are
* tracing, and don't suspend it otherwise.
*/
if (child == NULL)
child = pid2tcb(tcp->u_arg[0]);
if (child == NULL || child->parent != (
#ifdef TCB_CLONE_THREAD
(tcp->flags & TCB_CLONE_THREAD)
? tcp->parent :
#endif
tcp) ||
(child->flags & TCB_EXITING))
return 0;
}
tcp->flags |= TCB_SUSPENDED;
tcp->waitpid = tcp->u_arg[0];
#ifdef TCB_CLONE_THREAD
if (tcp->flags & TCB_CLONE_THREAD)
tcp->parent->nclone_waiting++;
#endif
}
}
if (exiting(tcp) && tcp->u_error == ECHILD && got_kids) {
if (tcp->u_arg[flagarg] & WNOHANG) {
/* We must force a fake result of 0 instead of
the ECHILD error. */
return force_result(tcp, 0, 0);
}
}
else if (exiting(tcp) && tcp->u_error == 0 && tcp->u_rval > 0 &&
tcp->nzombies > 0 && pid2tcb(tcp->u_rval) == NULL) {
/*
* We just reaped a child we don't know about,
* presumably a zombie we already droptcb'd.
*/
tcp->nzombies--;
}
return 0;
}
#ifdef SVR4
int

101
strace.c
View File

@ -1289,7 +1289,6 @@ alloc_tcb(int pid, int command_options_parsed)
tcp->nzombies = 0;
#ifdef TCB_CLONE_THREAD
tcp->nclone_threads = 0;
tcp->nclone_waiting = 0;
#endif
tcp->flags = TCB_INUSE | TCB_STARTUP;
tcp->outf = outf; /* Initialise to current out file */
@ -1709,99 +1708,6 @@ droptcb(struct tcb *tcp)
tcp->outf = 0;
}
#ifndef USE_PROCFS
static int
resume(struct tcb *tcp)
{
if (tcp == NULL)
return -1;
if (!(tcp->flags & TCB_SUSPENDED)) {
fprintf(stderr, "PANIC: pid %u not suspended\n", tcp->pid);
return -1;
}
tcp->flags &= ~TCB_SUSPENDED;
#ifdef TCB_CLONE_THREAD
if (tcp->flags & TCB_CLONE_THREAD)
tcp->parent->nclone_waiting--;
#endif
if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0)
return -1;
if (!qflag)
fprintf(stderr, "Process %u resumed\n", tcp->pid);
return 0;
}
static int
resume_from_tcp(struct tcb *tcp)
{
int error = 0;
int resumed = 0;
/* XXX This won't always be quite right (but it never was).
A waiter with argument 0 or < -1 is waiting for any pid in
a particular pgrp, which this child might or might not be
in. The waiter will only wake up if it's argument is -1
or if it's waiting for tcp->pid's pgrp. It makes a
difference to wake up a waiter when there might be more
traced children, because it could get a false ECHILD
error. OTOH, if this was the last child in the pgrp, then
it ought to wake up and get ECHILD. We would have to
search the system for all pid's in the pgrp to be sure.
&& (t->waitpid == -1 ||
(t->waitpid == 0 && getpgid (tcp->pid) == getpgid (t->pid))
|| (t->waitpid < 0 && t->waitpid == -getpid (t->pid)))
*/
if (tcp->parent &&
(tcp->parent->flags & TCB_SUSPENDED) &&
(tcp->parent->waitpid <= 0 || tcp->parent->waitpid == tcp->pid)) {
error = resume(tcp->parent);
++resumed;
}
#ifdef TCB_CLONE_THREAD
if (tcp->parent && tcp->parent->nclone_waiting > 0) {
/* Some other threads of our parent are waiting too. */
unsigned int i;
/* Resume all the threads that were waiting for this PID. */
for (i = 0; i < tcbtabsize; i++) {
struct tcb *t = tcbtab[i];
if (t->parent == tcp->parent && t != tcp
&& ((t->flags & (TCB_CLONE_THREAD|TCB_SUSPENDED))
== (TCB_CLONE_THREAD|TCB_SUSPENDED))
&& t->waitpid == tcp->pid) {
error |= resume(t);
++resumed;
}
}
if (resumed == 0)
/* Noone was waiting for this PID in particular,
so now we might need to resume some wildcarders. */
for (i = 0; i < tcbtabsize; i++) {
struct tcb *t = tcbtab[i];
if (t->parent == tcp->parent && t != tcp
&& ((t->flags
& (TCB_CLONE_THREAD|TCB_SUSPENDED))
== (TCB_CLONE_THREAD|TCB_SUSPENDED))
&& t->waitpid <= 0
) {
error |= resume(t);
break;
}
}
}
#endif
return error;
}
#endif /* !USE_PROCFS */
/* detach traced process; continue with sig
Never call DETACH twice on the same process as both unattached and
attached-unstopped processes give the same ESRCH. For unattached process we
@ -1918,10 +1824,6 @@ detach(struct tcb *tcp, int sig)
error = ptrace_restart(PTRACE_DETACH, tcp, sig);
#endif /* SUNOS4 */
#ifndef USE_PROCFS
error |= resume_from_tcp(tcp);
#endif
if (!qflag)
fprintf(stderr, "Process %u detached\n", tcp->pid);
@ -2498,9 +2400,6 @@ handle_group_exit(struct tcb *tcp, int sig)
tcp->pid, leader ? leader->pid : -1);
}
/* TCP no longer exists therefore you must not detach() it. */
#ifndef USE_PROCFS
resume_from_tcp(tcp);
#endif
droptcb(tcp); /* Already died. */
}
else {

View File

@ -653,22 +653,6 @@ internal_syscall(struct tcb *tcp)
)
return internal_exec(tcp);
if ( sys_waitpid == func
|| sys_wait4 == func
#if defined(SVR4) || defined(FREEBSD) || defined(SUNOS4)
|| sys_wait == func
#endif
#ifdef ALPHA
|| sys_osf_wait4 == func
#endif
)
return internal_wait(tcp, 2);
#if defined(LINUX) || defined(SVR4)
if (sys_waitid == func)
return internal_wait(tcp, 3);
#endif
return 0;
}