Do not detach when we think tracee is going to die.
Current code plays some ungodly tricks, trying to not detach thread group leader until all threads exit. Also, it detaches from a tracee when signal delivery is detected which will cause tracee to exit. This operation is racy (not to mention the determination whether signal is set to SIG_DFL is a horrible hack): after we determined that this signal is indeed fatal but before we detach and let process die, *other thread* may set a handler to this signal, and we will leak the process, falsely displaying it as killed! I need to look in the past to figure out why we even do it. First guess is that it's a workaround for old kernel bugs: kernel used to deliver exit notifications to the tracer, not to real parent. These workarounds are ancient (internal_exit is from 1995). The patch deletes the hacks. We no longer need tcp->nclone_threads, TCB_EXITING and TCB_GROUP_EXITING. We also lose a few rather ugly functions. I also added a new message: "+++ exited with EXITCODE +++" which shows exact moment strace got exit notification. It is analogous to existing "+++ killed by SIG +++" message. * defs.h: Delete struct tcb::nclone_threads field, TCB_EXITING and TCB_GROUP_EXITING constants, declarations of sigishandled() and internal_exit(). * process.c (internal_exit): Delete this function. (handle_new_child): Don't ++tcp->nclone_threads. * signal.c (parse_sigset_t): Delete this function. (sigishandled): Delete this function. * strace.c (startup_attach): Don't tcbtab[tcbi]->nclone_threads++. (droptcb): Don't delay dropping if tcp->nclone_threads > 0, don't drop parent if its nclone_threads reached 0: just drop (only) this tcb unconditionally. (detach): don't drop parent. (handle_group_exit): Delete this function. (handle_ptrace_event): Instead of handle_group_exit, just drop tcb; do not panic if we see WIFEXITED from an attached pid; print "+++ exited with EXITCODE +++" for every WIFEXITED pid. * syscall.c (internal_syscall): Do not treat sys_exit specially - don't call internal_exit on it. Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
This commit is contained in:
parent
02a08fb6f0
commit
19cdada5b4
8
defs.h
8
defs.h
@ -386,10 +386,6 @@ struct tcb {
|
||||
struct timeval etime; /* Syscall entry time */
|
||||
/* Support for tracing forked processes */
|
||||
struct tcb *parent; /* Parent of this process */
|
||||
#ifdef LINUX
|
||||
int nclone_threads; /* # of children with CLONE_THREAD */
|
||||
#endif
|
||||
/* (1st arg of wait4()) */
|
||||
long baddr; /* `Breakpoint' address */
|
||||
long inst[2]; /* Instructions on above */
|
||||
int pfd; /* proc file descriptor */
|
||||
@ -415,7 +411,6 @@ struct tcb {
|
||||
#define TCB_INUSE 00002 /* This table entry is in use */
|
||||
#define TCB_INSYSCALL 00004 /* A system call is in progress */
|
||||
#define TCB_ATTACHED 00010 /* Process is not our own child */
|
||||
#define TCB_EXITING 00020 /* As far as we know, this process is exiting */
|
||||
#define TCB_SUSPENDED 00040 /* Process can not be allowed to resume just now */
|
||||
#define TCB_BPTSET 00100 /* "Breakpoint" set after fork(2) */
|
||||
#define TCB_SIGTRAPPED 00200 /* Process wanted to block SIGTRAP */
|
||||
@ -433,7 +428,6 @@ struct tcb {
|
||||
# define TCB_WAITEXECVE 04000 /* ignore SIGTRAP after exceve */
|
||||
# endif
|
||||
# define TCB_CLONE_THREAD 010000 /* CLONE_THREAD set in creating syscall */
|
||||
# define TCB_GROUP_EXITING 020000 /* TCB_EXITING was exit_group, not _exit */
|
||||
# include <sys/syscall.h>
|
||||
# ifndef __NR_exit_group
|
||||
# /* Hack: Most headers around are too old to have __NR_exit_group. */
|
||||
@ -593,7 +587,6 @@ extern int clearbpt(struct tcb *);
|
||||
* On newer kernels, we use PTRACE_O_TRACECLONE/TRACE[V]FORK instead.
|
||||
*/
|
||||
extern int setbpt(struct tcb *);
|
||||
extern int sigishandled(struct tcb *, int);
|
||||
extern void printcall(struct tcb *);
|
||||
extern const char *signame(int);
|
||||
extern void print_sigset(struct tcb *, long, int);
|
||||
@ -613,7 +606,6 @@ extern int pathtrace_match(struct tcb *);
|
||||
extern int change_syscall(struct tcb *, int);
|
||||
extern int internal_fork(struct tcb *);
|
||||
extern int internal_exec(struct tcb *);
|
||||
extern int internal_exit(struct tcb *);
|
||||
#ifdef LINUX
|
||||
extern int handle_new_child(struct tcb *, int, int);
|
||||
#endif
|
||||
|
14
process.c
14
process.c
@ -435,19 +435,6 @@ sys_exit(struct tcb *tcp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
internal_exit(struct tcb *tcp)
|
||||
{
|
||||
if (entering(tcp)) {
|
||||
tcp->flags |= TCB_EXITING;
|
||||
#ifdef __NR_exit_group
|
||||
if (known_scno(tcp) == __NR_exit_group)
|
||||
tcp->flags |= TCB_GROUP_EXITING;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef USE_PROCFS
|
||||
|
||||
int
|
||||
@ -850,7 +837,6 @@ Process %u resumed (parent %d ready)\n",
|
||||
}
|
||||
if (call_flags & CLONE_THREAD) {
|
||||
tcpchild->flags |= TCB_CLONE_THREAD;
|
||||
++tcp->nclone_threads;
|
||||
}
|
||||
if ((call_flags & CLONE_PARENT) &&
|
||||
!(call_flags & CLONE_THREAD)) {
|
||||
|
131
signal.c
131
signal.c
@ -798,137 +798,6 @@ printsiginfo(siginfo_t *sip, int verbose)
|
||||
|
||||
#endif /* SVR4 || LINUX */
|
||||
|
||||
#ifdef LINUX
|
||||
|
||||
static void
|
||||
parse_sigset_t(const char *str, sigset_t *set)
|
||||
{
|
||||
const char *p;
|
||||
unsigned int digit;
|
||||
int i;
|
||||
|
||||
sigemptyset(set);
|
||||
|
||||
p = strchr(str, '\n');
|
||||
if (p == NULL)
|
||||
p = strchr(str, '\0');
|
||||
for (i = 0; p-- > str; i += 4) {
|
||||
if (*p >= '0' && *p <= '9')
|
||||
digit = *p - '0';
|
||||
else if (*p >= 'a' && *p <= 'f')
|
||||
digit = *p - 'a' + 10;
|
||||
else if (*p >= 'A' && *p <= 'F')
|
||||
digit = *p - 'A' + 10;
|
||||
else
|
||||
break;
|
||||
if (digit & 1)
|
||||
sigaddset(set, i + 1);
|
||||
if (digit & 2)
|
||||
sigaddset(set, i + 2);
|
||||
if (digit & 4)
|
||||
sigaddset(set, i + 3);
|
||||
if (digit & 8)
|
||||
sigaddset(set, i + 4);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check process TCP for the disposition of signal SIG.
|
||||
* Return 1 if the process would somehow manage to survive signal SIG,
|
||||
* else return 0. This routine will never be called with SIGKILL.
|
||||
*/
|
||||
int
|
||||
sigishandled(struct tcb *tcp, int sig)
|
||||
{
|
||||
#ifdef LINUX
|
||||
int sfd;
|
||||
char sname[32];
|
||||
char buf[2048];
|
||||
const char *s;
|
||||
int i;
|
||||
sigset_t ignored, caught;
|
||||
#endif
|
||||
#ifdef SVR4
|
||||
/*
|
||||
* Since procfs doesn't interfere with wait I think it is safe
|
||||
* to punt on this question. If not, the information is there.
|
||||
*/
|
||||
return 1;
|
||||
#else /* !SVR4 */
|
||||
switch (sig) {
|
||||
case SIGCONT:
|
||||
case SIGSTOP:
|
||||
case SIGTSTP:
|
||||
case SIGTTIN:
|
||||
case SIGTTOU:
|
||||
case SIGCHLD:
|
||||
case SIGIO:
|
||||
#if defined(SIGURG) && SIGURG != SIGIO
|
||||
case SIGURG:
|
||||
#endif
|
||||
case SIGWINCH:
|
||||
/* Gloria Gaynor says ... */
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif /* !SVR4 */
|
||||
#ifdef LINUX
|
||||
|
||||
/* This is incredibly costly but it's worth it. */
|
||||
/* NOTE: LinuxThreads internally uses SIGRTMIN, SIGRTMIN + 1 and
|
||||
SIGRTMIN + 2, so we can't use the obsolete /proc/%d/stat which
|
||||
doesn't handle real-time signals). */
|
||||
sprintf(sname, "/proc/%d/status", tcp->pid);
|
||||
if ((sfd = open(sname, O_RDONLY)) == -1) {
|
||||
perror(sname);
|
||||
return 1;
|
||||
}
|
||||
i = read(sfd, buf, sizeof(buf));
|
||||
buf[i] = '\0';
|
||||
close(sfd);
|
||||
/*
|
||||
* Skip the extraneous fields. We need to skip
|
||||
* command name has any spaces in it. So be it.
|
||||
*/
|
||||
s = strstr(buf, "SigIgn:\t");
|
||||
if (!s) {
|
||||
fprintf(stderr, "/proc/pid/status format error\n");
|
||||
return 1;
|
||||
}
|
||||
parse_sigset_t(s + 8, &ignored);
|
||||
|
||||
s = strstr(buf, "SigCgt:\t");
|
||||
if (!s) {
|
||||
fprintf(stderr, "/proc/pid/status format error\n");
|
||||
return 1;
|
||||
}
|
||||
parse_sigset_t(s + 8, &caught);
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "sigs: %016qx %016qx (sig=%d)\n",
|
||||
*(long long *) &ignored, *(long long *) &caught, sig);
|
||||
#endif
|
||||
if (sigismember(&ignored, sig) || sigismember(&caught, sig))
|
||||
return 1;
|
||||
#endif /* LINUX */
|
||||
|
||||
#ifdef SUNOS4
|
||||
void (*u_signal)();
|
||||
|
||||
if (upeek(tcp, uoff(u_signal[0]) + sig*sizeof(u_signal),
|
||||
(long *) &u_signal) < 0) {
|
||||
return 0;
|
||||
}
|
||||
if (u_signal != SIG_DFL)
|
||||
return 1;
|
||||
#endif /* SUNOS4 */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(SUNOS4) || defined(FREEBSD)
|
||||
|
||||
int
|
||||
|
157
strace.c
157
strace.c
@ -482,7 +482,6 @@ startup_attach(void)
|
||||
if (tid != tcbtab[tcbi]->pid) {
|
||||
tcp = alloctcb(tid);
|
||||
tcp->flags |= TCB_ATTACHED|TCB_CLONE_THREAD;
|
||||
tcbtab[tcbi]->nclone_threads++;
|
||||
tcp->parent = tcbtab[tcbi];
|
||||
}
|
||||
}
|
||||
@ -1553,40 +1552,11 @@ droptcb(struct tcb *tcp)
|
||||
{
|
||||
if (tcp->pid == 0)
|
||||
return;
|
||||
#ifdef TCB_CLONE_THREAD
|
||||
if (tcp->nclone_threads > 0) {
|
||||
/* There are other threads left in this process, but this
|
||||
is the one whose PID represents the whole process.
|
||||
We need to keep this record around as a zombie until
|
||||
all the threads die. */
|
||||
tcp->flags |= TCB_EXITING;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
nprocs--;
|
||||
if (debug)
|
||||
fprintf(stderr, "dropped tcb for pid %d, %d remain\n", tcp->pid, nprocs);
|
||||
tcp->pid = 0;
|
||||
|
||||
if (tcp->parent != NULL) {
|
||||
#ifdef TCB_CLONE_THREAD
|
||||
if (tcp->flags & TCB_CLONE_THREAD)
|
||||
tcp->parent->nclone_threads--;
|
||||
#endif
|
||||
#ifdef LINUX
|
||||
/* Update fields like NCLONE_DETACHED, only
|
||||
for zombie group leader that has already reported
|
||||
and been short-circuited at the top of this
|
||||
function. The same condition as at the top of DETACH. */
|
||||
if ((tcp->flags & TCB_CLONE_THREAD) &&
|
||||
tcp->parent->nclone_threads == 0 &&
|
||||
(tcp->parent->flags & TCB_EXITING))
|
||||
droptcb(tcp->parent);
|
||||
#endif
|
||||
tcp->parent = NULL;
|
||||
}
|
||||
|
||||
tcp->flags = 0;
|
||||
if (tcp->pfd != -1) {
|
||||
close(tcp->pfd);
|
||||
tcp->pfd = -1;
|
||||
@ -1601,14 +1571,15 @@ droptcb(struct tcb *tcp)
|
||||
}
|
||||
#endif /* !FREEBSD */
|
||||
#ifdef USE_PROCFS
|
||||
rebuild_pollv(); /* Note, flags needs to be cleared by now. */
|
||||
tcp->flags = 0; /* rebuild_pollv needs it */
|
||||
rebuild_pollv();
|
||||
#endif
|
||||
}
|
||||
|
||||
if (outfname && followfork > 1 && tcp->outf)
|
||||
fclose(tcp->outf);
|
||||
|
||||
tcp->outf = 0;
|
||||
memset(tcp, 0, sizeof(*tcp));
|
||||
}
|
||||
|
||||
/* detach traced process; continue with sig
|
||||
@ -1622,14 +1593,6 @@ detach(struct tcb *tcp, int sig)
|
||||
int error = 0;
|
||||
#ifdef LINUX
|
||||
int status, catch_sigstop;
|
||||
struct tcb *zombie = NULL;
|
||||
|
||||
/* If the group leader is lingering only because of this other
|
||||
thread now dying, then detach the leader as well. */
|
||||
if ((tcp->flags & TCB_CLONE_THREAD) &&
|
||||
tcp->parent->nclone_threads == 1 &&
|
||||
(tcp->parent->flags & TCB_EXITING))
|
||||
zombie = tcp->parent;
|
||||
#endif
|
||||
|
||||
if (tcp->flags & TCB_BPTSET)
|
||||
@ -1732,13 +1695,6 @@ detach(struct tcb *tcp, int sig)
|
||||
|
||||
droptcb(tcp);
|
||||
|
||||
#ifdef LINUX
|
||||
if (zombie != NULL) {
|
||||
/* TCP no longer exists therefore you must not detach() it. */
|
||||
droptcb(zombie);
|
||||
}
|
||||
#endif
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -2260,62 +2216,6 @@ trace(void)
|
||||
|
||||
#else /* !USE_PROCFS */
|
||||
|
||||
#ifdef TCB_GROUP_EXITING
|
||||
/* Handle an exit detach or death signal that is taking all the
|
||||
related clone threads with it. This is called in three circumstances:
|
||||
SIG == -1 TCP has already died (TCB_ATTACHED is clear, strace is parent).
|
||||
SIG == 0 Continuing TCP will perform an exit_group syscall.
|
||||
SIG == other Continuing TCP with SIG will kill the process.
|
||||
*/
|
||||
static int
|
||||
handle_group_exit(struct tcb *tcp, int sig)
|
||||
{
|
||||
/* We need to locate our records of all the clone threads
|
||||
related to TCP, either its children or siblings. */
|
||||
struct tcb *leader = NULL;
|
||||
|
||||
if (tcp->flags & TCB_CLONE_THREAD)
|
||||
leader = tcp->parent;
|
||||
|
||||
if (sig < 0) {
|
||||
if (leader != NULL && leader != tcp
|
||||
&& !(leader->flags & TCB_GROUP_EXITING)
|
||||
&& !(tcp->flags & TCB_STARTUP)
|
||||
) {
|
||||
fprintf(stderr,
|
||||
"PANIC: handle_group_exit: %d leader %d\n",
|
||||
tcp->pid, leader ? leader->pid : -1);
|
||||
}
|
||||
/* TCP no longer exists therefore you must not detach() it. */
|
||||
droptcb(tcp); /* Already died. */
|
||||
}
|
||||
else {
|
||||
/* Mark that we are taking the process down. */
|
||||
tcp->flags |= TCB_EXITING | TCB_GROUP_EXITING;
|
||||
if (tcp->flags & TCB_ATTACHED) {
|
||||
detach(tcp, sig);
|
||||
if (leader != NULL && leader != tcp)
|
||||
leader->flags |= TCB_GROUP_EXITING;
|
||||
} else {
|
||||
if (ptrace_restart(PTRACE_CONT, tcp, sig) < 0) {
|
||||
cleanup();
|
||||
return -1;
|
||||
}
|
||||
if (leader != NULL) {
|
||||
leader->flags |= TCB_GROUP_EXITING;
|
||||
if (leader != tcp)
|
||||
droptcb(tcp);
|
||||
}
|
||||
/* The leader will report to us as parent now,
|
||||
and then we'll get to the SIG==-1 case. */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef LINUX
|
||||
static int
|
||||
handle_ptrace_event(int status, struct tcb *tcp)
|
||||
@ -2522,37 +2422,24 @@ Process %d attached (waiting for parent)\n",
|
||||
#endif
|
||||
printtrailer();
|
||||
}
|
||||
#ifdef TCB_GROUP_EXITING
|
||||
handle_group_exit(tcp, -1);
|
||||
#else
|
||||
droptcb(tcp);
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
if (WIFEXITED(status)) {
|
||||
if (pid == strace_child)
|
||||
exit_code = WEXITSTATUS(status);
|
||||
if ((tcp->flags & (TCB_ATTACHED|TCB_STARTUP)) == TCB_ATTACHED
|
||||
#ifdef TCB_GROUP_EXITING
|
||||
&& !(tcp->parent && (tcp->parent->flags & TCB_GROUP_EXITING))
|
||||
&& !(tcp->flags & TCB_GROUP_EXITING)
|
||||
#endif
|
||||
) {
|
||||
fprintf(stderr,
|
||||
"PANIC: attached pid %u exited with %d\n",
|
||||
pid, WEXITSTATUS(status));
|
||||
}
|
||||
if (tcp == tcp_last) {
|
||||
if ((tcp->flags & (TCB_INSYSCALL|TCB_REPRINT)) == TCB_INSYSCALL)
|
||||
tprintf(" <unfinished ... exit status %d>\n",
|
||||
WEXITSTATUS(status));
|
||||
tcp_last = NULL;
|
||||
}
|
||||
#ifdef TCB_GROUP_EXITING
|
||||
handle_group_exit(tcp, -1);
|
||||
#else
|
||||
if (!cflag /* && (qual_flags[WTERMSIG(status)] & QUAL_SIGNAL) */ ) {
|
||||
printleader(tcp);
|
||||
tprintf("+++ exited with %d +++", WEXITSTATUS(status));
|
||||
printtrailer();
|
||||
}
|
||||
droptcb(tcp);
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
if (!WIFSTOPPED(status)) {
|
||||
@ -2657,16 +2544,6 @@ Process %d attached (waiting for parent)\n",
|
||||
PC_FORMAT_ARG);
|
||||
printtrailer();
|
||||
}
|
||||
if (((tcp->flags & TCB_ATTACHED) ||
|
||||
tcp->nclone_threads > 0) &&
|
||||
!sigishandled(tcp, WSTOPSIG(status))) {
|
||||
#ifdef TCB_GROUP_EXITING
|
||||
handle_group_exit(tcp, WSTOPSIG(status));
|
||||
#else
|
||||
detach(tcp, WSTOPSIG(status));
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
if (ptrace_restart(PTRACE_SYSCALL, tcp, WSTOPSIG(status)) < 0) {
|
||||
cleanup();
|
||||
return -1;
|
||||
@ -2702,22 +2579,6 @@ Process %d attached (waiting for parent)\n",
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (tcp->flags & TCB_EXITING) {
|
||||
#ifdef TCB_GROUP_EXITING
|
||||
if (tcp->flags & TCB_GROUP_EXITING) {
|
||||
if (handle_group_exit(tcp, 0) < 0)
|
||||
return -1;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (tcp->flags & TCB_ATTACHED)
|
||||
detach(tcp, 0);
|
||||
else if (ptrace_restart(PTRACE_CONT, tcp, 0) < 0) {
|
||||
cleanup();
|
||||
return -1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (tcp->flags & TCB_SUSPENDED) {
|
||||
if (!qflag)
|
||||
fprintf(stderr, "Process %u suspended\n", pid);
|
||||
|
Loading…
x
Reference in New Issue
Block a user