get_scno is an unholy mess, make it less horrible

Currently, get_scno does *much* more than "get syscall no".
It checks for post-execve SIGTRAP. It checks for changes
in personality. It retrieves params on entry and registers on exit.
Worse still, it is different in different architectures: for example,
for AVR32 regs are fetched in get_scno(), while for e.g. I386
it is done in syscall_enter().

Another problem is that get_scno() is called on both syscall entry and
syscall exit, which is stupid: we don't need to know scno on syscall
exit, it is already known from last syscall entry and stored in
tcp->scno! In essence, get_scno() does two completely different things
on syscall entry and on exit, they are just mixed into one bottle, like
shampoo and conditioner.

The following patches will try to improve this situation.

This change duplicates get_scno into identical get_scno_on_sysenter,
get_scno_on_sysexit functions. Call them in syscall enter and syscall
exit, correspondingly.

* defs.h: Rename get_scno to get_scno_on_sysenter; declare it only
if USE_PROCFS.
* strace.c (proc_open): Call get_scno_on_sysenter instead of get_scno.
* syscall.c (get_scno): Split into two (so far identical) functions
get_scno_on_sysenter and get_scno_on_sysexit.
(trace_syscall_entering): Call get_scno_on_sysenter instead of get_scno.
(trace_syscall_exiting): Call get_scno_on_sysexit instead of get_scno.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
This commit is contained in:
Denys Vlasenko 2011-08-24 16:47:32 +02:00
parent 5f731c4e1b
commit 9a36ae5e88
3 changed files with 623 additions and 5 deletions

4
defs.h
View File

@ -586,7 +586,9 @@ extern void droptcb(struct tcb *);
extern void set_sortby(const char *);
extern void set_overhead(int);
extern void qualify(const char *);
extern int get_scno(struct tcb *);
#ifdef USE_PROCFS
extern int get_scno_on_sysenter(struct tcb *);
#endif
extern long known_scno(struct tcb *);
extern long do_ptrace(int request, struct tcb *tcp, void *addr, void *data);
extern int ptrace_restart(int request, struct tcb *tcp, int sig);

View File

@ -1488,7 +1488,7 @@ proc_open(struct tcb *tcp, int attaching)
}
if (tcp->status.PR_WHY == PR_SYSENTRY) {
tcp->flags &= ~TCB_INSYSCALL;
get_scno(tcp);
get_scno_on_sysenter(tcp);
if (known_scno(tcp) == SYS_execve)
break;
}

622
syscall.c
View File

@ -744,8 +744,11 @@ struct reg regs; /* TODO: make static? */
* other: error, trace_syscall() should print error indicator
* ("????" etc) and bail out.
*/
#ifndef USE_PROCFS
static
#endif
int
get_scno(struct tcb *tcp)
get_scno_on_sysenter(struct tcb *tcp)
{
long scno = 0;
@ -1352,6 +1355,619 @@ get_scno(struct tcb *tcp)
return 1;
}
/* Returns:
* 0: "ignore this ptrace stop", bail out of trace_syscall() silently.
* 1: ok, continue in trace_syscall().
* other: error, trace_syscall() should print error indicator
* ("????" etc) and bail out.
*/
static int
get_scno_on_sysexit(struct tcb *tcp)
{
long scno = 0;
#ifdef LINUX
# if defined(S390) || defined(S390X)
if (tcp->flags & TCB_WAITEXECVE) {
/*
* When the execve system call completes successfully, the
* new process still has -ENOSYS (old style) or __NR_execve
* (new style) in gpr2. We cannot recover the scno again
* by disassembly, because the image that executed the
* syscall is gone now. Fortunately, we don't want it. We
* leave the flag set so that syscall_fixup can fake the
* result.
*/
if (exiting(tcp))
return 1;
/*
* This is the post-execve SIGTRAP. We cannot try to read
* the system call here either.
*/
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
if (upeek(tcp, PT_GPR2, &syscall_mode) < 0)
return -1;
if (syscall_mode != -ENOSYS) {
/*
* Since kernel version 2.5.44 the scno gets passed in gpr2.
*/
scno = syscall_mode;
} else {
/*
* Old style of "passing" the scno via the SVC instruction.
*/
long opcode, offset_reg, tmp;
void * svc_addr;
static const int gpr_offset[16] = {
PT_GPR0, PT_GPR1, PT_ORIGGPR2, PT_GPR3,
PT_GPR4, PT_GPR5, PT_GPR6, PT_GPR7,
PT_GPR8, PT_GPR9, PT_GPR10, PT_GPR11,
PT_GPR12, PT_GPR13, PT_GPR14, PT_GPR15
};
if (upeek(tcp, PT_PSWADDR, &pc) < 0)
return -1;
errno = 0;
opcode = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)(pc-sizeof(long)), 0);
if (errno) {
perror("peektext(pc-oneword)");
return -1;
}
/*
* We have to check if the SVC got executed directly or via an
* EXECUTE instruction. In case of EXECUTE it is necessary to do
* instruction decoding to derive the system call number.
* Unfortunately the opcode sizes of EXECUTE and SVC are differently,
* so that this doesn't work if a SVC opcode is part of an EXECUTE
* opcode. Since there is no way to find out the opcode size this
* is the best we can do...
*/
if ((opcode & 0xff00) == 0x0a00) {
/* SVC opcode */
scno = opcode & 0xff;
}
else {
/* SVC got executed by EXECUTE instruction */
/*
* Do instruction decoding of EXECUTE. If you really want to
* understand this, read the Principles of Operations.
*/
svc_addr = (void *) (opcode & 0xfff);
tmp = 0;
offset_reg = (opcode & 0x000f0000) >> 16;
if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0))
return -1;
svc_addr += tmp;
tmp = 0;
offset_reg = (opcode & 0x0000f000) >> 12;
if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0))
return -1;
svc_addr += tmp;
scno = ptrace(PTRACE_PEEKTEXT, tcp->pid, svc_addr, 0);
if (errno)
return -1;
# if defined(S390X)
scno >>= 48;
# else
scno >>= 16;
# endif
tmp = 0;
offset_reg = (opcode & 0x00f00000) >> 20;
if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0))
return -1;
scno = (scno | tmp) & 0xff;
}
}
# elif defined (POWERPC)
if (upeek(tcp, sizeof(unsigned long)*PT_R0, &scno) < 0)
return -1;
if (entering(tcp)) {
/* Check if this is the post-execve SIGTRAP. */
if (scno == 0 && (tcp->flags & TCB_WAITEXECVE)) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
}
# ifdef POWERPC64
if (entering(tcp)) {
/* TODO: speed up strace by not doing this at every syscall.
* We only need to do it after execve.
*/
int currpers;
long val;
int pid = tcp->pid;
/* Check for 64/32 bit mode. */
if (upeek(tcp, sizeof(unsigned long)*PT_MSR, &val) < 0)
return -1;
/* SF is bit 0 of MSR */
if (val < 0)
currpers = 0;
else
currpers = 1;
if (currpers != current_personality) {
static const char *const names[] = {"64 bit", "32 bit"};
set_personality(currpers);
fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n",
pid, names[current_personality]);
}
}
# endif
# elif defined(AVR32)
/*
* Read complete register set in one go.
*/
if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, &regs) < 0)
return -1;
/*
* We only need to grab the syscall number on syscall entry.
*/
if (entering(tcp)) {
scno = regs.r8;
/* Check if this is the post-execve SIGTRAP. */
if (tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
}
# elif defined(BFIN)
if (upeek(tcp, PT_ORIG_P0, &scno))
return -1;
# elif defined (I386)
if (upeek(tcp, 4*ORIG_EAX, &scno) < 0)
return -1;
# elif defined (X86_64)
if (upeek(tcp, 8*ORIG_RAX, &scno) < 0)
return -1;
if (entering(tcp)) {
/* TODO: speed up strace by not doing this at every syscall.
* We only need to do it after execve.
*/
int currpers;
long val;
int pid = tcp->pid;
/* Check CS register value. On x86-64 linux it is:
* 0x33 for long mode (64 bit)
* 0x23 for compatibility mode (32 bit)
* It takes only one ptrace and thus doesn't need
* to be cached.
*/
if (upeek(tcp, 8*CS, &val) < 0)
return -1;
switch (val) {
case 0x23: currpers = 1; break;
case 0x33: currpers = 0; break;
default:
fprintf(stderr, "Unknown value CS=0x%02X while "
"detecting personality of process "
"PID=%d\n", (int)val, pid);
currpers = current_personality;
break;
}
# if 0
/* This version analyzes the opcode of a syscall instruction.
* (int 0x80 on i386 vs. syscall on x86-64)
* It works, but is too complicated.
*/
unsigned long val, rip, i;
if (upeek(tcp, 8*RIP, &rip) < 0)
perror("upeek(RIP)");
/* sizeof(syscall) == sizeof(int 0x80) == 2 */
rip -= 2;
errno = 0;
call = ptrace(PTRACE_PEEKTEXT, pid, (char *)rip, (char *)0);
if (errno)
fprintf(stderr, "ptrace_peektext failed: %s\n",
strerror(errno));
switch (call & 0xffff) {
/* x86-64: syscall = 0x0f 0x05 */
case 0x050f: currpers = 0; break;
/* i386: int 0x80 = 0xcd 0x80 */
case 0x80cd: currpers = 1; break;
default:
currpers = current_personality;
fprintf(stderr,
"Unknown syscall opcode (0x%04X) while "
"detecting personality of process "
"PID=%d\n", (int)call, pid);
break;
}
# endif
if (currpers != current_personality) {
static const char *const names[] = {"64 bit", "32 bit"};
set_personality(currpers);
fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n",
pid, names[current_personality]);
}
}
# elif defined(IA64)
# define IA64_PSR_IS ((long)1 << 34)
if (upeek(tcp, PT_CR_IPSR, &psr) >= 0)
ia32 = (psr & IA64_PSR_IS) != 0;
if (entering(tcp)) {
if (ia32) {
if (upeek(tcp, PT_R1, &scno) < 0) /* orig eax */
return -1;
} else {
if (upeek(tcp, PT_R15, &scno) < 0)
return -1;
}
/* Check if this is the post-execve SIGTRAP. */
if (tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
} else {
/* Syscall exit */
if (upeek(tcp, PT_R8, &r8) < 0)
return -1;
if (upeek(tcp, PT_R10, &r10) < 0)
return -1;
}
# elif defined (ARM)
/*
* Read complete register set in one go.
*/
if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, (void *)&regs) == -1)
return -1;
/*
* We only need to grab the syscall number on syscall entry.
*/
if (regs.ARM_ip == 0) {
if (entering(tcp)) {
/* Check if this is the post-execve SIGTRAP. */
if (tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
}
/*
* Note: we only deal with only 32-bit CPUs here.
*/
if (regs.ARM_cpsr & 0x20) {
/*
* Get the Thumb-mode system call number
*/
scno = regs.ARM_r7;
} else {
/*
* Get the ARM-mode system call number
*/
errno = 0;
scno = ptrace(PTRACE_PEEKTEXT, tcp->pid, (void *)(regs.ARM_pc - 4), NULL);
if (errno)
return -1;
/* FIXME: bogus check? it is already done on entering before,
* so we never can see it here?
*/
if (scno == 0 && (tcp->flags & TCB_WAITEXECVE)) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
/* Handle the EABI syscall convention. We do not
bother converting structures between the two
ABIs, but basic functionality should work even
if strace and the traced program have different
ABIs. */
if (scno == 0xef000000) {
scno = regs.ARM_r7;
} else {
if ((scno & 0x0ff00000) != 0x0f900000) {
fprintf(stderr, "syscall: unknown syscall trap 0x%08lx\n",
scno);
return -1;
}
/*
* Fixup the syscall number
*/
scno &= 0x000fffff;
}
}
if (scno & 0x0f0000) {
/*
* Handle ARM specific syscall
*/
set_personality(1);
scno &= 0x0000ffff;
} else
set_personality(0);
if (exiting(tcp)) {
fprintf(stderr, "pid %d stray syscall exit\n", tcp->pid);
tcp->flags &= ~TCB_INSYSCALL;
}
} else {
if (entering(tcp)) {
fprintf(stderr, "pid %d stray syscall entry\n", tcp->pid);
tcp->flags |= TCB_INSYSCALL;
}
}
# elif defined (M68K)
if (upeek(tcp, 4*PT_ORIG_D0, &scno) < 0)
return -1;
# elif defined (LINUX_MIPSN32)
unsigned long long regs[38];
if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long) &regs) < 0)
return -1;
a3 = regs[REG_A3];
r2 = regs[REG_V0];
if (entering(tcp)) {
scno = r2;
/* Check if this is the post-execve SIGTRAP. */
if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
if (scno < 0 || scno > nsyscalls) {
if (a3 == 0 || a3 == -1) {
if (debug)
fprintf(stderr, "stray syscall exit: v0 = %ld\n", scno);
return 0;
}
}
}
# elif defined (MIPS)
if (upeek(tcp, REG_A3, &a3) < 0)
return -1;
if (entering(tcp)) {
if (upeek(tcp, REG_V0, &scno) < 0)
return -1;
/* Check if this is the post-execve SIGTRAP. */
if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
if (scno < 0 || scno > nsyscalls) {
if (a3 == 0 || a3 == -1) {
if (debug)
fprintf(stderr, "stray syscall exit: v0 = %ld\n", scno);
return 0;
}
}
} else {
if (upeek(tcp, REG_V0, &r2) < 0)
return -1;
}
# elif defined (ALPHA)
if (upeek(tcp, REG_A3, &a3) < 0)
return -1;
if (entering(tcp)) {
if (upeek(tcp, REG_R0, &scno) < 0)
return -1;
/* Check if this is the post-execve SIGTRAP. */
if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
/*
* Do some sanity checks to figure out if it's
* really a syscall entry
*/
if (scno < 0 || scno > nsyscalls) {
if (a3 == 0 || a3 == -1) {
if (debug)
fprintf(stderr, "stray syscall exit: r0 = %ld\n", scno);
return 0;
}
}
}
else {
if (upeek(tcp, REG_R0, &r0) < 0)
return -1;
}
# elif defined (SPARC) || defined (SPARC64)
/* Everything we need is in the current register set. */
if (ptrace(PTRACE_GETREGS, tcp->pid, (char *)&regs, 0) < 0)
return -1;
/* If we are entering, then disassemble the syscall trap. */
if (entering(tcp)) {
/* Retrieve the syscall trap instruction. */
errno = 0;
# if defined(SPARC64)
trap = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)regs.tpc, 0);
trap >>= 32;
# else
trap = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)regs.pc, 0);
# endif
if (errno)
return -1;
/* Disassemble the trap to see what personality to use. */
switch (trap) {
case 0x91d02010:
/* Linux/SPARC syscall trap. */
set_personality(0);
break;
case 0x91d0206d:
/* Linux/SPARC64 syscall trap. */
set_personality(2);
break;
case 0x91d02000:
/* SunOS syscall trap. (pers 1) */
fprintf(stderr, "syscall: SunOS no support\n");
return -1;
case 0x91d02008:
/* Solaris 2.x syscall trap. (per 2) */
set_personality(1);
break;
case 0x91d02009:
/* NetBSD/FreeBSD syscall trap. */
fprintf(stderr, "syscall: NetBSD/FreeBSD not supported\n");
return -1;
case 0x91d02027:
/* Solaris 2.x gettimeofday */
set_personality(1);
break;
default:
/* Check if this is the post-execve SIGTRAP. */
if (tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
# if defined (SPARC64)
fprintf(stderr, "syscall: unknown syscall trap %08lx %016lx\n", trap, regs.tpc);
# else
fprintf(stderr, "syscall: unknown syscall trap %08lx %08lx\n", trap, regs.pc);
# endif
return -1;
}
/* Extract the system call number from the registers. */
if (trap == 0x91d02027)
scno = 156;
else
scno = regs.u_regs[U_REG_G1];
if (scno == 0) {
scno = regs.u_regs[U_REG_O0];
memmove(&regs.u_regs[U_REG_O0], &regs.u_regs[U_REG_O1], 7*sizeof(regs.u_regs[0]));
}
}
# elif defined(HPPA)
if (upeek(tcp, PT_GR20, &scno) < 0)
return -1;
if (entering(tcp)) {
/* Check if this is the post-execve SIGTRAP. */
if (tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
}
# elif defined(SH)
/*
* In the new syscall ABI, the system call number is in R3.
*/
if (upeek(tcp, 4*(REG_REG0+3), &scno) < 0)
return -1;
if (scno < 0) {
/* Odd as it may seem, a glibc bug has been known to cause
glibc to issue bogus negative syscall numbers. So for
our purposes, make strace print what it *should* have been */
long correct_scno = (scno & 0xff);
if (debug)
fprintf(stderr,
"Detected glibc bug: bogus system call"
" number = %ld, correcting to %ld\n",
scno,
correct_scno);
scno = correct_scno;
}
if (entering(tcp)) {
/* Check if this is the post-execve SIGTRAP. */
if (scno == 0 && tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
}
# elif defined(SH64)
if (upeek(tcp, REG_SYSCALL, &scno) < 0)
return -1;
scno &= 0xFFFF;
if (entering(tcp)) {
/* Check if this is the post-execve SIGTRAP. */
if (tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
}
# elif defined(CRISV10) || defined(CRISV32)
if (upeek(tcp, 4*PT_R9, &scno) < 0)
return -1;
# elif defined(TILE)
if (upeek(tcp, PTREGS_OFFSET_REG(10), &scno) < 0)
return -1;
if (entering(tcp)) {
/* Check if this is the post-execve SIGTRAP. */
if (tcp->flags & TCB_WAITEXECVE) {
tcp->flags &= ~TCB_WAITEXECVE;
return 0;
}
}
# elif defined(MICROBLAZE)
if (upeek(tcp, 0, &scno) < 0)
return -1;
# endif
#endif /* LINUX */
#ifdef SUNOS4
if (upeek(tcp, uoff(u_arg[7]), &scno) < 0)
return -1;
#elif defined(SH)
/* new syscall ABI returns result in R0 */
if (upeek(tcp, 4*REG_REG0, (long *)&r0) < 0)
return -1;
#elif defined(SH64)
/* ABI defines result returned in r9 */
if (upeek(tcp, REG_GENERAL(9), (long *)&r9) < 0)
return -1;
#endif
#ifdef USE_PROCFS
# ifdef HAVE_PR_SYSCALL
scno = tcp->status.PR_SYSCALL;
# else
# ifndef FREEBSD
scno = tcp->status.PR_WHAT;
# else
if (pread(tcp->pfd_reg, &regs, sizeof(regs), 0) < 0) {
perror("pread");
return -1;
}
switch (regs.r_eax) {
case SYS_syscall:
case SYS___syscall:
pread(tcp->pfd, &scno, sizeof(scno), regs.r_esp + sizeof(int));
break;
default:
scno = regs.r_eax;
break;
}
# endif /* FREEBSD */
# endif /* !HAVE_PR_SYSCALL */
#endif /* USE_PROCFS */
if (entering(tcp))
tcp->scno = scno;
return 1;
}
long
known_scno(struct tcb *tcp)
@ -2271,7 +2887,7 @@ trace_syscall_entering(struct tcb *tcp)
{
int res, scno_good;
scno_good = res = get_scno(tcp);
scno_good = res = get_scno_on_sysenter(tcp);
if (res == 0)
return res;
if (res == 1)
@ -2443,7 +3059,7 @@ trace_syscall_exiting(struct tcb *tcp)
/* BTW, why we don't just memorize syscall no. on entry
* in tcp->something?
*/
scno_good = res = get_scno(tcp);
scno_good = res = get_scno_on_sysexit(tcp);
if (res == 0)
return res;
if (res == 1)