diff --git a/defs.h b/defs.h index cf308a3f..bda72cb6 100644 --- a/defs.h +++ b/defs.h @@ -586,7 +586,9 @@ extern void droptcb(struct tcb *); extern void set_sortby(const char *); extern void set_overhead(int); extern void qualify(const char *); -extern int get_scno(struct tcb *); +#ifdef USE_PROCFS +extern int get_scno_on_sysenter(struct tcb *); +#endif extern long known_scno(struct tcb *); extern long do_ptrace(int request, struct tcb *tcp, void *addr, void *data); extern int ptrace_restart(int request, struct tcb *tcp, int sig); diff --git a/strace.c b/strace.c index 04af6b56..f6a8bdb1 100644 --- a/strace.c +++ b/strace.c @@ -1488,7 +1488,7 @@ proc_open(struct tcb *tcp, int attaching) } if (tcp->status.PR_WHY == PR_SYSENTRY) { tcp->flags &= ~TCB_INSYSCALL; - get_scno(tcp); + get_scno_on_sysenter(tcp); if (known_scno(tcp) == SYS_execve) break; } diff --git a/syscall.c b/syscall.c index a3b5727b..cb4bef9f 100644 --- a/syscall.c +++ b/syscall.c @@ -744,8 +744,11 @@ struct reg regs; /* TODO: make static? */ * other: error, trace_syscall() should print error indicator * ("????" etc) and bail out. */ +#ifndef USE_PROCFS +static +#endif int -get_scno(struct tcb *tcp) +get_scno_on_sysenter(struct tcb *tcp) { long scno = 0; @@ -1352,6 +1355,619 @@ get_scno(struct tcb *tcp) return 1; } +/* Returns: + * 0: "ignore this ptrace stop", bail out of trace_syscall() silently. + * 1: ok, continue in trace_syscall(). + * other: error, trace_syscall() should print error indicator + * ("????" etc) and bail out. + */ +static int +get_scno_on_sysexit(struct tcb *tcp) +{ + long scno = 0; + +#ifdef LINUX +# if defined(S390) || defined(S390X) + if (tcp->flags & TCB_WAITEXECVE) { + /* + * When the execve system call completes successfully, the + * new process still has -ENOSYS (old style) or __NR_execve + * (new style) in gpr2. We cannot recover the scno again + * by disassembly, because the image that executed the + * syscall is gone now. Fortunately, we don't want it. We + * leave the flag set so that syscall_fixup can fake the + * result. + */ + if (exiting(tcp)) + return 1; + /* + * This is the post-execve SIGTRAP. We cannot try to read + * the system call here either. + */ + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + + if (upeek(tcp, PT_GPR2, &syscall_mode) < 0) + return -1; + + if (syscall_mode != -ENOSYS) { + /* + * Since kernel version 2.5.44 the scno gets passed in gpr2. + */ + scno = syscall_mode; + } else { + /* + * Old style of "passing" the scno via the SVC instruction. + */ + + long opcode, offset_reg, tmp; + void * svc_addr; + static const int gpr_offset[16] = { + PT_GPR0, PT_GPR1, PT_ORIGGPR2, PT_GPR3, + PT_GPR4, PT_GPR5, PT_GPR6, PT_GPR7, + PT_GPR8, PT_GPR9, PT_GPR10, PT_GPR11, + PT_GPR12, PT_GPR13, PT_GPR14, PT_GPR15 + }; + + if (upeek(tcp, PT_PSWADDR, &pc) < 0) + return -1; + errno = 0; + opcode = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)(pc-sizeof(long)), 0); + if (errno) { + perror("peektext(pc-oneword)"); + return -1; + } + + /* + * We have to check if the SVC got executed directly or via an + * EXECUTE instruction. In case of EXECUTE it is necessary to do + * instruction decoding to derive the system call number. + * Unfortunately the opcode sizes of EXECUTE and SVC are differently, + * so that this doesn't work if a SVC opcode is part of an EXECUTE + * opcode. Since there is no way to find out the opcode size this + * is the best we can do... + */ + + if ((opcode & 0xff00) == 0x0a00) { + /* SVC opcode */ + scno = opcode & 0xff; + } + else { + /* SVC got executed by EXECUTE instruction */ + + /* + * Do instruction decoding of EXECUTE. If you really want to + * understand this, read the Principles of Operations. + */ + svc_addr = (void *) (opcode & 0xfff); + + tmp = 0; + offset_reg = (opcode & 0x000f0000) >> 16; + if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0)) + return -1; + svc_addr += tmp; + + tmp = 0; + offset_reg = (opcode & 0x0000f000) >> 12; + if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0)) + return -1; + svc_addr += tmp; + + scno = ptrace(PTRACE_PEEKTEXT, tcp->pid, svc_addr, 0); + if (errno) + return -1; +# if defined(S390X) + scno >>= 48; +# else + scno >>= 16; +# endif + tmp = 0; + offset_reg = (opcode & 0x00f00000) >> 20; + if (offset_reg && (upeek(tcp, gpr_offset[offset_reg], &tmp) < 0)) + return -1; + + scno = (scno | tmp) & 0xff; + } + } +# elif defined (POWERPC) + if (upeek(tcp, sizeof(unsigned long)*PT_R0, &scno) < 0) + return -1; + if (entering(tcp)) { + /* Check if this is the post-execve SIGTRAP. */ + if (scno == 0 && (tcp->flags & TCB_WAITEXECVE)) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + } + +# ifdef POWERPC64 + if (entering(tcp)) { + /* TODO: speed up strace by not doing this at every syscall. + * We only need to do it after execve. + */ + int currpers; + long val; + int pid = tcp->pid; + + /* Check for 64/32 bit mode. */ + if (upeek(tcp, sizeof(unsigned long)*PT_MSR, &val) < 0) + return -1; + /* SF is bit 0 of MSR */ + if (val < 0) + currpers = 0; + else + currpers = 1; + if (currpers != current_personality) { + static const char *const names[] = {"64 bit", "32 bit"}; + set_personality(currpers); + fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n", + pid, names[current_personality]); + } + } +# endif +# elif defined(AVR32) + /* + * Read complete register set in one go. + */ + if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, ®s) < 0) + return -1; + + /* + * We only need to grab the syscall number on syscall entry. + */ + if (entering(tcp)) { + scno = regs.r8; + + /* Check if this is the post-execve SIGTRAP. */ + if (tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + } +# elif defined(BFIN) + if (upeek(tcp, PT_ORIG_P0, &scno)) + return -1; +# elif defined (I386) + if (upeek(tcp, 4*ORIG_EAX, &scno) < 0) + return -1; +# elif defined (X86_64) + if (upeek(tcp, 8*ORIG_RAX, &scno) < 0) + return -1; + + if (entering(tcp)) { + /* TODO: speed up strace by not doing this at every syscall. + * We only need to do it after execve. + */ + int currpers; + long val; + int pid = tcp->pid; + + /* Check CS register value. On x86-64 linux it is: + * 0x33 for long mode (64 bit) + * 0x23 for compatibility mode (32 bit) + * It takes only one ptrace and thus doesn't need + * to be cached. + */ + if (upeek(tcp, 8*CS, &val) < 0) + return -1; + switch (val) { + case 0x23: currpers = 1; break; + case 0x33: currpers = 0; break; + default: + fprintf(stderr, "Unknown value CS=0x%02X while " + "detecting personality of process " + "PID=%d\n", (int)val, pid); + currpers = current_personality; + break; + } +# if 0 + /* This version analyzes the opcode of a syscall instruction. + * (int 0x80 on i386 vs. syscall on x86-64) + * It works, but is too complicated. + */ + unsigned long val, rip, i; + + if (upeek(tcp, 8*RIP, &rip) < 0) + perror("upeek(RIP)"); + + /* sizeof(syscall) == sizeof(int 0x80) == 2 */ + rip -= 2; + errno = 0; + + call = ptrace(PTRACE_PEEKTEXT, pid, (char *)rip, (char *)0); + if (errno) + fprintf(stderr, "ptrace_peektext failed: %s\n", + strerror(errno)); + switch (call & 0xffff) { + /* x86-64: syscall = 0x0f 0x05 */ + case 0x050f: currpers = 0; break; + /* i386: int 0x80 = 0xcd 0x80 */ + case 0x80cd: currpers = 1; break; + default: + currpers = current_personality; + fprintf(stderr, + "Unknown syscall opcode (0x%04X) while " + "detecting personality of process " + "PID=%d\n", (int)call, pid); + break; + } +# endif + if (currpers != current_personality) { + static const char *const names[] = {"64 bit", "32 bit"}; + set_personality(currpers); + fprintf(stderr, "[ Process PID=%d runs in %s mode. ]\n", + pid, names[current_personality]); + } + } +# elif defined(IA64) +# define IA64_PSR_IS ((long)1 << 34) + if (upeek(tcp, PT_CR_IPSR, &psr) >= 0) + ia32 = (psr & IA64_PSR_IS) != 0; + if (entering(tcp)) { + if (ia32) { + if (upeek(tcp, PT_R1, &scno) < 0) /* orig eax */ + return -1; + } else { + if (upeek(tcp, PT_R15, &scno) < 0) + return -1; + } + /* Check if this is the post-execve SIGTRAP. */ + if (tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + } else { + /* Syscall exit */ + if (upeek(tcp, PT_R8, &r8) < 0) + return -1; + if (upeek(tcp, PT_R10, &r10) < 0) + return -1; + } +# elif defined (ARM) + /* + * Read complete register set in one go. + */ + if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, (void *)®s) == -1) + return -1; + + /* + * We only need to grab the syscall number on syscall entry. + */ + if (regs.ARM_ip == 0) { + if (entering(tcp)) { + /* Check if this is the post-execve SIGTRAP. */ + if (tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + } + + /* + * Note: we only deal with only 32-bit CPUs here. + */ + if (regs.ARM_cpsr & 0x20) { + /* + * Get the Thumb-mode system call number + */ + scno = regs.ARM_r7; + } else { + /* + * Get the ARM-mode system call number + */ + errno = 0; + scno = ptrace(PTRACE_PEEKTEXT, tcp->pid, (void *)(regs.ARM_pc - 4), NULL); + if (errno) + return -1; + + /* FIXME: bogus check? it is already done on entering before, + * so we never can see it here? + */ + if (scno == 0 && (tcp->flags & TCB_WAITEXECVE)) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + + /* Handle the EABI syscall convention. We do not + bother converting structures between the two + ABIs, but basic functionality should work even + if strace and the traced program have different + ABIs. */ + if (scno == 0xef000000) { + scno = regs.ARM_r7; + } else { + if ((scno & 0x0ff00000) != 0x0f900000) { + fprintf(stderr, "syscall: unknown syscall trap 0x%08lx\n", + scno); + return -1; + } + + /* + * Fixup the syscall number + */ + scno &= 0x000fffff; + } + } + if (scno & 0x0f0000) { + /* + * Handle ARM specific syscall + */ + set_personality(1); + scno &= 0x0000ffff; + } else + set_personality(0); + + if (exiting(tcp)) { + fprintf(stderr, "pid %d stray syscall exit\n", tcp->pid); + tcp->flags &= ~TCB_INSYSCALL; + } + } else { + if (entering(tcp)) { + fprintf(stderr, "pid %d stray syscall entry\n", tcp->pid); + tcp->flags |= TCB_INSYSCALL; + } + } +# elif defined (M68K) + if (upeek(tcp, 4*PT_ORIG_D0, &scno) < 0) + return -1; +# elif defined (LINUX_MIPSN32) + unsigned long long regs[38]; + + if (ptrace(PTRACE_GETREGS, tcp->pid, NULL, (long) ®s) < 0) + return -1; + a3 = regs[REG_A3]; + r2 = regs[REG_V0]; + + if (entering(tcp)) { + scno = r2; + + /* Check if this is the post-execve SIGTRAP. */ + if (scno == 0 && tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + + if (scno < 0 || scno > nsyscalls) { + if (a3 == 0 || a3 == -1) { + if (debug) + fprintf(stderr, "stray syscall exit: v0 = %ld\n", scno); + return 0; + } + } + } +# elif defined (MIPS) + if (upeek(tcp, REG_A3, &a3) < 0) + return -1; + if (entering(tcp)) { + if (upeek(tcp, REG_V0, &scno) < 0) + return -1; + + /* Check if this is the post-execve SIGTRAP. */ + if (scno == 0 && tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + + if (scno < 0 || scno > nsyscalls) { + if (a3 == 0 || a3 == -1) { + if (debug) + fprintf(stderr, "stray syscall exit: v0 = %ld\n", scno); + return 0; + } + } + } else { + if (upeek(tcp, REG_V0, &r2) < 0) + return -1; + } +# elif defined (ALPHA) + if (upeek(tcp, REG_A3, &a3) < 0) + return -1; + + if (entering(tcp)) { + if (upeek(tcp, REG_R0, &scno) < 0) + return -1; + + /* Check if this is the post-execve SIGTRAP. */ + if (scno == 0 && tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + + /* + * Do some sanity checks to figure out if it's + * really a syscall entry + */ + if (scno < 0 || scno > nsyscalls) { + if (a3 == 0 || a3 == -1) { + if (debug) + fprintf(stderr, "stray syscall exit: r0 = %ld\n", scno); + return 0; + } + } + } + else { + if (upeek(tcp, REG_R0, &r0) < 0) + return -1; + } +# elif defined (SPARC) || defined (SPARC64) + /* Everything we need is in the current register set. */ + if (ptrace(PTRACE_GETREGS, tcp->pid, (char *)®s, 0) < 0) + return -1; + + /* If we are entering, then disassemble the syscall trap. */ + if (entering(tcp)) { + /* Retrieve the syscall trap instruction. */ + errno = 0; +# if defined(SPARC64) + trap = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)regs.tpc, 0); + trap >>= 32; +# else + trap = ptrace(PTRACE_PEEKTEXT, tcp->pid, (char *)regs.pc, 0); +# endif + if (errno) + return -1; + + /* Disassemble the trap to see what personality to use. */ + switch (trap) { + case 0x91d02010: + /* Linux/SPARC syscall trap. */ + set_personality(0); + break; + case 0x91d0206d: + /* Linux/SPARC64 syscall trap. */ + set_personality(2); + break; + case 0x91d02000: + /* SunOS syscall trap. (pers 1) */ + fprintf(stderr, "syscall: SunOS no support\n"); + return -1; + case 0x91d02008: + /* Solaris 2.x syscall trap. (per 2) */ + set_personality(1); + break; + case 0x91d02009: + /* NetBSD/FreeBSD syscall trap. */ + fprintf(stderr, "syscall: NetBSD/FreeBSD not supported\n"); + return -1; + case 0x91d02027: + /* Solaris 2.x gettimeofday */ + set_personality(1); + break; + default: + /* Check if this is the post-execve SIGTRAP. */ + if (tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } +# if defined (SPARC64) + fprintf(stderr, "syscall: unknown syscall trap %08lx %016lx\n", trap, regs.tpc); +# else + fprintf(stderr, "syscall: unknown syscall trap %08lx %08lx\n", trap, regs.pc); +# endif + return -1; + } + + /* Extract the system call number from the registers. */ + if (trap == 0x91d02027) + scno = 156; + else + scno = regs.u_regs[U_REG_G1]; + if (scno == 0) { + scno = regs.u_regs[U_REG_O0]; + memmove(®s.u_regs[U_REG_O0], ®s.u_regs[U_REG_O1], 7*sizeof(regs.u_regs[0])); + } + } +# elif defined(HPPA) + if (upeek(tcp, PT_GR20, &scno) < 0) + return -1; + if (entering(tcp)) { + /* Check if this is the post-execve SIGTRAP. */ + if (tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + } +# elif defined(SH) + /* + * In the new syscall ABI, the system call number is in R3. + */ + if (upeek(tcp, 4*(REG_REG0+3), &scno) < 0) + return -1; + + if (scno < 0) { + /* Odd as it may seem, a glibc bug has been known to cause + glibc to issue bogus negative syscall numbers. So for + our purposes, make strace print what it *should* have been */ + long correct_scno = (scno & 0xff); + if (debug) + fprintf(stderr, + "Detected glibc bug: bogus system call" + " number = %ld, correcting to %ld\n", + scno, + correct_scno); + scno = correct_scno; + } + + if (entering(tcp)) { + /* Check if this is the post-execve SIGTRAP. */ + if (scno == 0 && tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + } +# elif defined(SH64) + if (upeek(tcp, REG_SYSCALL, &scno) < 0) + return -1; + scno &= 0xFFFF; + + if (entering(tcp)) { + /* Check if this is the post-execve SIGTRAP. */ + if (tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + } +# elif defined(CRISV10) || defined(CRISV32) + if (upeek(tcp, 4*PT_R9, &scno) < 0) + return -1; +# elif defined(TILE) + if (upeek(tcp, PTREGS_OFFSET_REG(10), &scno) < 0) + return -1; + + if (entering(tcp)) { + /* Check if this is the post-execve SIGTRAP. */ + if (tcp->flags & TCB_WAITEXECVE) { + tcp->flags &= ~TCB_WAITEXECVE; + return 0; + } + } +# elif defined(MICROBLAZE) + if (upeek(tcp, 0, &scno) < 0) + return -1; +# endif +#endif /* LINUX */ + +#ifdef SUNOS4 + if (upeek(tcp, uoff(u_arg[7]), &scno) < 0) + return -1; +#elif defined(SH) + /* new syscall ABI returns result in R0 */ + if (upeek(tcp, 4*REG_REG0, (long *)&r0) < 0) + return -1; +#elif defined(SH64) + /* ABI defines result returned in r9 */ + if (upeek(tcp, REG_GENERAL(9), (long *)&r9) < 0) + return -1; +#endif + +#ifdef USE_PROCFS +# ifdef HAVE_PR_SYSCALL + scno = tcp->status.PR_SYSCALL; +# else +# ifndef FREEBSD + scno = tcp->status.PR_WHAT; +# else + if (pread(tcp->pfd_reg, ®s, sizeof(regs), 0) < 0) { + perror("pread"); + return -1; + } + switch (regs.r_eax) { + case SYS_syscall: + case SYS___syscall: + pread(tcp->pfd, &scno, sizeof(scno), regs.r_esp + sizeof(int)); + break; + default: + scno = regs.r_eax; + break; + } +# endif /* FREEBSD */ +# endif /* !HAVE_PR_SYSCALL */ +#endif /* USE_PROCFS */ + + if (entering(tcp)) + tcp->scno = scno; + return 1; +} long known_scno(struct tcb *tcp) @@ -2271,7 +2887,7 @@ trace_syscall_entering(struct tcb *tcp) { int res, scno_good; - scno_good = res = get_scno(tcp); + scno_good = res = get_scno_on_sysenter(tcp); if (res == 0) return res; if (res == 1) @@ -2443,7 +3059,7 @@ trace_syscall_exiting(struct tcb *tcp) /* BTW, why we don't just memorize syscall no. on entry * in tcp->something? */ - scno_good = res = get_scno(tcp); + scno_good = res = get_scno_on_sysexit(tcp); if (res == 0) return res; if (res == 1)