795e2a023b
Add tests running under ptrace for syscall_numbering_64. ptrace stopping on syscall entry and possibly modifying the syscall number (regs.orig_rax) or the default return value (regs.rax) can have different results than the normal system call path. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lore.kernel.org/r/20210518191303.4135296-4-hpa@zytor.com
483 lines
11 KiB
C
483 lines
11 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* syscall_numbering.c - test calling the x86-64 kernel with various
|
|
* valid and invalid system call numbers.
|
|
*
|
|
* Copyright (c) 2018 Andrew Lutomirski
|
|
*/
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <stdbool.h>
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <fcntl.h>
|
|
#include <limits.h>
|
|
#include <signal.h>
|
|
#include <sysexits.h>
|
|
|
|
#include <sys/ptrace.h>
|
|
#include <sys/user.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/mman.h>
|
|
|
|
#include <linux/ptrace.h>
|
|
|
|
/* Common system call numbers */
|
|
#define SYS_READ 0
|
|
#define SYS_WRITE 1
|
|
#define SYS_GETPID 39
|
|
/* x64-only system call numbers */
|
|
#define X64_IOCTL 16
|
|
#define X64_READV 19
|
|
#define X64_WRITEV 20
|
|
/* x32-only system call numbers (without X32_BIT) */
|
|
#define X32_IOCTL 514
|
|
#define X32_READV 515
|
|
#define X32_WRITEV 516
|
|
|
|
#define X32_BIT 0x40000000
|
|
|
|
static int nullfd = -1; /* File descriptor for /dev/null */
|
|
static bool with_x32; /* x32 supported on this kernel? */
|
|
|
|
enum ptrace_pass {
|
|
PTP_NOTHING,
|
|
PTP_GETREGS,
|
|
PTP_WRITEBACK,
|
|
PTP_FUZZRET,
|
|
PTP_FUZZHIGH,
|
|
PTP_INTNUM,
|
|
PTP_DONE
|
|
};
|
|
|
|
static const char * const ptrace_pass_name[] =
|
|
{
|
|
[PTP_NOTHING] = "just stop, no data read",
|
|
[PTP_GETREGS] = "only getregs",
|
|
[PTP_WRITEBACK] = "getregs, unmodified setregs",
|
|
[PTP_FUZZRET] = "modifying the default return",
|
|
[PTP_FUZZHIGH] = "clobbering the top 32 bits",
|
|
[PTP_INTNUM] = "sign-extending the syscall number",
|
|
};
|
|
|
|
/*
|
|
* Shared memory block between tracer and test
|
|
*/
|
|
struct shared {
|
|
unsigned int nerr; /* Total error count */
|
|
unsigned int indent; /* Message indentation level */
|
|
enum ptrace_pass ptrace_pass;
|
|
bool probing_syscall; /* In probe_syscall() */
|
|
};
|
|
static volatile struct shared *sh;
|
|
|
|
static inline unsigned int offset(void)
|
|
{
|
|
unsigned int level = sh ? sh->indent : 0;
|
|
|
|
return 8 + level * 4;
|
|
}
|
|
|
|
#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
|
|
## __VA_ARGS__)
|
|
|
|
#define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__)
|
|
#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
|
|
#define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__)
|
|
|
|
#define fail(fmt, ...) \
|
|
do { \
|
|
msg(FAIL, fmt, ## __VA_ARGS__); \
|
|
sh->nerr++; \
|
|
} while (0)
|
|
|
|
#define crit(fmt, ...) \
|
|
do { \
|
|
sh->indent = 0; \
|
|
msg(FAIL, fmt, ## __VA_ARGS__); \
|
|
msg(SKIP, "Unable to run test\n"); \
|
|
exit(EX_OSERR); \
|
|
} while (0)
|
|
|
|
/* Sentinel for ptrace-modified return value */
|
|
#define MODIFIED_BY_PTRACE -9999
|
|
|
|
/*
|
|
* Directly invokes the given syscall with nullfd as the first argument
|
|
* and the rest zero. Avoids involving glibc wrappers in case they ever
|
|
* end up intercepting some system calls for some reason, or modify
|
|
* the system call number itself.
|
|
*/
|
|
static long long probe_syscall(int msb, int lsb)
|
|
{
|
|
register long long arg1 asm("rdi") = nullfd;
|
|
register long long arg2 asm("rsi") = 0;
|
|
register long long arg3 asm("rdx") = 0;
|
|
register long long arg4 asm("r10") = 0;
|
|
register long long arg5 asm("r8") = 0;
|
|
register long long arg6 asm("r9") = 0;
|
|
long long nr = ((long long)msb << 32) | (unsigned int)lsb;
|
|
long long ret;
|
|
|
|
/*
|
|
* We pass in an extra copy of the extended system call number
|
|
* in %rbx, so we can examine it from the ptrace handler without
|
|
* worrying about it being possibly modified. This is to test
|
|
* the validity of struct user regs.orig_rax a.k.a.
|
|
* struct pt_regs.orig_ax.
|
|
*/
|
|
sh->probing_syscall = true;
|
|
asm volatile("syscall"
|
|
: "=a" (ret)
|
|
: "a" (nr), "b" (nr),
|
|
"r" (arg1), "r" (arg2), "r" (arg3),
|
|
"r" (arg4), "r" (arg5), "r" (arg6)
|
|
: "rcx", "r11", "memory", "cc");
|
|
sh->probing_syscall = false;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static const char *syscall_str(int msb, int start, int end)
|
|
{
|
|
static char buf[64];
|
|
const char * const type = (start & X32_BIT) ? "x32" : "x64";
|
|
int lsb = start;
|
|
|
|
/*
|
|
* Improve readability by stripping the x32 bit, but round
|
|
* toward zero so we don't display -1 as -1073741825.
|
|
*/
|
|
if (lsb < 0)
|
|
lsb |= X32_BIT;
|
|
else
|
|
lsb &= ~X32_BIT;
|
|
|
|
if (start == end)
|
|
snprintf(buf, sizeof buf, "%s syscall %d:%d",
|
|
type, msb, lsb);
|
|
else
|
|
snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
|
|
type, msb, lsb, lsb + (end-start));
|
|
|
|
return buf;
|
|
}
|
|
|
|
static unsigned int _check_for(int msb, int start, int end, long long expect,
|
|
const char *expect_str)
|
|
{
|
|
unsigned int err = 0;
|
|
|
|
sh->indent++;
|
|
if (start != end)
|
|
sh->indent++;
|
|
|
|
for (int nr = start; nr <= end; nr++) {
|
|
long long ret = probe_syscall(msb, nr);
|
|
|
|
if (ret != expect) {
|
|
fail("%s returned %lld, but it should have returned %s\n",
|
|
syscall_str(msb, nr, nr),
|
|
ret, expect_str);
|
|
err++;
|
|
}
|
|
}
|
|
|
|
if (start != end)
|
|
sh->indent--;
|
|
|
|
if (err) {
|
|
if (start != end)
|
|
fail("%s had %u failure%s\n",
|
|
syscall_str(msb, start, end),
|
|
err, err == 1 ? "s" : "");
|
|
} else {
|
|
ok("%s returned %s as expected\n",
|
|
syscall_str(msb, start, end), expect_str);
|
|
}
|
|
|
|
sh->indent--;
|
|
|
|
return err;
|
|
}
|
|
|
|
#define check_for(msb,start,end,expect) \
|
|
_check_for(msb,start,end,expect,#expect)
|
|
|
|
static bool check_zero(int msb, int nr)
|
|
{
|
|
return check_for(msb, nr, nr, 0);
|
|
}
|
|
|
|
static bool check_enosys(int msb, int nr)
|
|
{
|
|
return check_for(msb, nr, nr, -ENOSYS);
|
|
}
|
|
|
|
/*
|
|
* Anyone diagnosing a failure will want to know whether the kernel
|
|
* supports x32. Tell them. This can also be used to conditionalize
|
|
* tests based on existence or nonexistence of x32.
|
|
*/
|
|
static bool test_x32(void)
|
|
{
|
|
long long ret;
|
|
pid_t mypid = getpid();
|
|
|
|
run("Checking for x32 by calling x32 getpid()\n");
|
|
ret = probe_syscall(0, SYS_GETPID | X32_BIT);
|
|
|
|
sh->indent++;
|
|
if (ret == mypid) {
|
|
info("x32 is supported\n");
|
|
with_x32 = true;
|
|
} else if (ret == -ENOSYS) {
|
|
info("x32 is not supported\n");
|
|
with_x32 = false;
|
|
} else {
|
|
fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
|
|
with_x32 = false;
|
|
}
|
|
sh->indent--;
|
|
return with_x32;
|
|
}
|
|
|
|
static void test_syscalls_common(int msb)
|
|
{
|
|
enum ptrace_pass pass = sh->ptrace_pass;
|
|
|
|
run("Checking some common syscalls as 64 bit\n");
|
|
check_zero(msb, SYS_READ);
|
|
check_zero(msb, SYS_WRITE);
|
|
|
|
run("Checking some 64-bit only syscalls as 64 bit\n");
|
|
check_zero(msb, X64_READV);
|
|
check_zero(msb, X64_WRITEV);
|
|
|
|
run("Checking out of range system calls\n");
|
|
check_for(msb, -64, -2, -ENOSYS);
|
|
if (pass >= PTP_FUZZRET)
|
|
check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
|
|
else
|
|
check_for(msb, -1, -1, -ENOSYS);
|
|
check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
|
|
check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
|
|
check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
|
|
}
|
|
|
|
static void test_syscalls_with_x32(int msb)
|
|
{
|
|
/*
|
|
* Syscalls 512-547 are "x32" syscalls. They are
|
|
* intended to be called with the x32 (0x40000000) bit
|
|
* set. Calling them without the x32 bit set is
|
|
* nonsense and should not work.
|
|
*/
|
|
run("Checking x32 syscalls as 64 bit\n");
|
|
check_for(msb, 512, 547, -ENOSYS);
|
|
|
|
run("Checking some common syscalls as x32\n");
|
|
check_zero(msb, SYS_READ | X32_BIT);
|
|
check_zero(msb, SYS_WRITE | X32_BIT);
|
|
|
|
run("Checking some x32 syscalls as x32\n");
|
|
check_zero(msb, X32_READV | X32_BIT);
|
|
check_zero(msb, X32_WRITEV | X32_BIT);
|
|
|
|
run("Checking some 64-bit syscalls as x32\n");
|
|
check_enosys(msb, X64_IOCTL | X32_BIT);
|
|
check_enosys(msb, X64_READV | X32_BIT);
|
|
check_enosys(msb, X64_WRITEV | X32_BIT);
|
|
}
|
|
|
|
static void test_syscalls_without_x32(int msb)
|
|
{
|
|
run("Checking for absence of x32 system calls\n");
|
|
check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
|
|
}
|
|
|
|
static void test_syscall_numbering(void)
|
|
{
|
|
static const int msbs[] = {
|
|
0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
|
|
INT_MIN, INT_MIN+1
|
|
};
|
|
|
|
sh->indent++;
|
|
|
|
/*
|
|
* The MSB is supposed to be ignored, so we loop over a few
|
|
* to test that out.
|
|
*/
|
|
for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
|
|
int msb = msbs[i];
|
|
run("Checking system calls with msb = %d (0x%x)\n",
|
|
msb, msb);
|
|
|
|
sh->indent++;
|
|
|
|
test_syscalls_common(msb);
|
|
if (with_x32)
|
|
test_syscalls_with_x32(msb);
|
|
else
|
|
test_syscalls_without_x32(msb);
|
|
|
|
sh->indent--;
|
|
}
|
|
|
|
sh->indent--;
|
|
}
|
|
|
|
static void syscall_numbering_tracee(void)
|
|
{
|
|
enum ptrace_pass pass;
|
|
|
|
if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
|
|
crit("Failed to request tracing\n");
|
|
return;
|
|
}
|
|
raise(SIGSTOP);
|
|
|
|
for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
|
|
sh->ptrace_pass = ++pass) {
|
|
run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
|
|
test_syscall_numbering();
|
|
}
|
|
}
|
|
|
|
static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
|
|
{
|
|
struct user_regs_struct regs;
|
|
|
|
sh->probing_syscall = false; /* Do this on entry only */
|
|
|
|
/* For these, don't even getregs */
|
|
if (pass == PTP_NOTHING || pass == PTP_DONE)
|
|
return;
|
|
|
|
ptrace(PTRACE_GETREGS, testpid, NULL, ®s);
|
|
|
|
if (regs.orig_rax != regs.rbx) {
|
|
fail("orig_rax %#llx doesn't match syscall number %#llx\n",
|
|
(unsigned long long)regs.orig_rax,
|
|
(unsigned long long)regs.rbx);
|
|
}
|
|
|
|
switch (pass) {
|
|
case PTP_GETREGS:
|
|
/* Just read, no writeback */
|
|
return;
|
|
case PTP_WRITEBACK:
|
|
/* Write back the same register state verbatim */
|
|
break;
|
|
case PTP_FUZZRET:
|
|
regs.rax = MODIFIED_BY_PTRACE;
|
|
break;
|
|
case PTP_FUZZHIGH:
|
|
regs.rax = MODIFIED_BY_PTRACE;
|
|
regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
|
|
break;
|
|
case PTP_INTNUM:
|
|
regs.rax = MODIFIED_BY_PTRACE;
|
|
regs.orig_rax = (int)regs.orig_rax;
|
|
break;
|
|
default:
|
|
crit("invalid ptrace_pass\n");
|
|
break;
|
|
}
|
|
|
|
ptrace(PTRACE_SETREGS, testpid, NULL, ®s);
|
|
}
|
|
|
|
static void syscall_numbering_tracer(pid_t testpid)
|
|
{
|
|
int wstatus;
|
|
|
|
do {
|
|
pid_t wpid = waitpid(testpid, &wstatus, 0);
|
|
if (wpid < 0 && errno != EINTR)
|
|
break;
|
|
if (wpid != testpid)
|
|
continue;
|
|
if (!WIFSTOPPED(wstatus))
|
|
break; /* Thread exited? */
|
|
|
|
if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
|
|
mess_with_syscall(testpid, sh->ptrace_pass);
|
|
} while (sh->ptrace_pass != PTP_DONE &&
|
|
!ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
|
|
|
|
ptrace(PTRACE_DETACH, testpid, NULL, NULL);
|
|
|
|
/* Wait for the child process to terminate */
|
|
while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
|
|
/* wait some more */;
|
|
}
|
|
|
|
static void test_traced_syscall_numbering(void)
|
|
{
|
|
pid_t testpid;
|
|
|
|
/* Launch the test thread; this thread continues as the tracer thread */
|
|
testpid = fork();
|
|
|
|
if (testpid < 0) {
|
|
crit("Unable to launch tracer process\n");
|
|
} else if (testpid == 0) {
|
|
syscall_numbering_tracee();
|
|
_exit(0);
|
|
} else {
|
|
syscall_numbering_tracer(testpid);
|
|
}
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
unsigned int nerr;
|
|
|
|
/*
|
|
* It is quite likely to get a segfault on a failure, so make
|
|
* sure the message gets out by setting stdout to nonbuffered.
|
|
*/
|
|
setvbuf(stdout, NULL, _IONBF, 0);
|
|
|
|
/*
|
|
* Harmless file descriptor to work on...
|
|
*/
|
|
nullfd = open("/dev/null", O_RDWR);
|
|
if (nullfd < 0) {
|
|
crit("Unable to open /dev/null: %s\n", strerror(errno));
|
|
}
|
|
|
|
/*
|
|
* Set up a block of shared memory...
|
|
*/
|
|
sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
|
|
MAP_ANONYMOUS|MAP_SHARED, 0, 0);
|
|
if (sh == MAP_FAILED) {
|
|
crit("Unable to allocated shared memory block: %s\n",
|
|
strerror(errno));
|
|
}
|
|
|
|
with_x32 = test_x32();
|
|
|
|
run("Running tests without ptrace...\n");
|
|
test_syscall_numbering();
|
|
|
|
test_traced_syscall_numbering();
|
|
|
|
nerr = sh->nerr;
|
|
if (!nerr) {
|
|
ok("All system calls succeeded or failed as expected\n");
|
|
return 0;
|
|
} else {
|
|
fail("A total of %u system call%s had incorrect behavior\n",
|
|
nerr, nerr != 1 ? "s" : "");
|
|
return 1;
|
|
}
|
|
}
|