linux/tools/testing/selftests/x86/syscall_arg_fault.c
Linus Torvalds 4da9f33026 Support for FSGSBASE. Almost 5 years after the first RFC to support it,
this has been brought into a shape which is maintainable and actually
 works.
 
 This final version was done by Sasha Levin who took it up after Intel
 dropped the ball. Sasha discovered that the SGX (sic!) offerings out there
 ship rogue kernel modules enabling FSGSBASE behind the kernels back which
 opens an instantanious unpriviledged root hole.
 
 The FSGSBASE instructions provide a considerable speedup of the context
 switch path and enable user space to write GSBASE without kernel
 interaction. This enablement requires careful handling of the exception
 entries which go through the paranoid entry path as they cannot longer rely
 on the assumption that user GSBASE is positive (as enforced via prctl() on
 non FSGSBASE enabled systemn). All other entries (syscalls, interrupts and
 exceptions) can still just utilize SWAPGS unconditionally when the entry
 comes from user space. Converting these entries to use FSGSBASE has no
 benefit as SWAPGS is only marginally slower than WRGSBASE and locating and
 retrieving the kernel GSBASE value is not a free operation either. The real
 benefit of RD/WRGSBASE is the avoidance of the MSR reads and writes.
 
 The changes come with appropriate selftests and have held up in field
 testing against the (sanitized) Graphene-SGX driver.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl8pGnoTHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoTYJD/9873GkwvGcc/Vq/dJH1szGTgFftPyZ
 c/Y9gzx7EGBPLo25BS820L+ZlynzXHDxExKfCEaD10TZfe5XIc1vYNR0J74M2NmK
 IBgEDstJeW93ai+rHCFRXIevhpzU4GgGYJ1MeeOgbVMN3aGU1g6HfzMvtF0fPn8Y
 n6fsLZa43wgnoTdjwjjikpDTrzoZbaL1mbODBzBVPAaTbim7IKKTge6r/iCKrOjz
 Uixvm3g9lVzx52zidJ9kWa8esmbOM1j0EPe7/hy3qH9DFo87KxEzjHNH3T6gY5t6
 NJhRAIfY+YyTHpPCUCshj6IkRudE6w/qjEAmKP9kWZxoJrvPCTWOhCzelwsFS9b9
 gxEYfsnaKhsfNhB6fi0PtWlMzPINmEA7SuPza33u5WtQUK7s1iNlgHfvMbjstbwg
 MSETn4SG2/ZyzUrSC06lVwV8kh0RgM3cENc/jpFfIHD0vKGI3qfka/1RY94kcOCG
 AeJd0YRSU2RqL7lmxhHyG8tdb8eexns41IzbPCLXX2sF00eKNkVvMRYT2mKfKLFF
 q8v1x7yuwmODdXfFR6NdCkGm9IU7wtL6wuQ8Nhu9UraFmcXo6X6FLJC18FqcvSb9
 jvcRP4XY/8pNjjf44JB8yWfah0xGQsaMIKQGP4yLv4j6Xk1xAQKH1MqcC7l1D2HN
 5Z24GibFqSK/vA==
 =QaAN
 -----END PGP SIGNATURE-----

Merge tag 'x86-fsgsbase-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fsgsbase from Thomas Gleixner:
 "Support for FSGSBASE. Almost 5 years after the first RFC to support
  it, this has been brought into a shape which is maintainable and
  actually works.

  This final version was done by Sasha Levin who took it up after Intel
  dropped the ball. Sasha discovered that the SGX (sic!) offerings out
  there ship rogue kernel modules enabling FSGSBASE behind the kernels
  back which opens an instantanious unpriviledged root hole.

  The FSGSBASE instructions provide a considerable speedup of the
  context switch path and enable user space to write GSBASE without
  kernel interaction. This enablement requires careful handling of the
  exception entries which go through the paranoid entry path as they
  can no longer rely on the assumption that user GSBASE is positive (as
  enforced via prctl() on non FSGSBASE enabled systemn).

  All other entries (syscalls, interrupts and exceptions) can still just
  utilize SWAPGS unconditionally when the entry comes from user space.
  Converting these entries to use FSGSBASE has no benefit as SWAPGS is
  only marginally slower than WRGSBASE and locating and retrieving the
  kernel GSBASE value is not a free operation either. The real benefit
  of RD/WRGSBASE is the avoidance of the MSR reads and writes.

  The changes come with appropriate selftests and have held up in field
  testing against the (sanitized) Graphene-SGX driver"

* tag 'x86-fsgsbase-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  x86/fsgsbase: Fix Xen PV support
  x86/ptrace: Fix 32-bit PTRACE_SETREGS vs fsbase and gsbase
  selftests/x86/fsgsbase: Add a missing memory constraint
  selftests/x86/fsgsbase: Fix a comment in the ptrace_write_gsbase test
  selftests/x86: Add a syscall_arg_fault_64 test for negative GSBASE
  selftests/x86/fsgsbase: Test ptracer-induced GS base write with FSGSBASE
  selftests/x86/fsgsbase: Test GS selector on ptracer-induced GS base write
  Documentation/x86/64: Add documentation for GS/FS addressing mode
  x86/elf: Enumerate kernel FSGSBASE capability in AT_HWCAP2
  x86/cpu: Enable FSGSBASE on 64bit by default and add a chicken bit
  x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit
  x86/entry/64: Introduce the FIND_PERCPU_BASE macro
  x86/entry/64: Switch CR3 before SWAPGS in paranoid entry
  x86/speculation/swapgs: Check FSGSBASE in enabling SWAPGS mitigation
  x86/process/64: Use FSGSBASE instructions on thread copy and ptrace
  x86/process/64: Use FSBSBASE in switch_to() if available
  x86/process/64: Make save_fsgs_for_kvm() ready for FSGSBASE
  x86/fsgsbase/64: Enable FSGSBASE instructions in helper functions
  x86/fsgsbase/64: Add intrinsics for FSGSBASE instructions
  x86/cpu: Add 'unsafe_fsgsbase' to enable CR4.FSGSBASE
  ...
2020-08-04 21:16:22 -07:00

238 lines
6.1 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
* Copyright (c) 2015 Andrew Lutomirski
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/signal.h>
#include <sys/ucontext.h>
#include <err.h>
#include <setjmp.h>
#include <errno.h>
#include "helpers.h"
/* Our sigaltstack scratch space. */
static unsigned char altstack_data[SIGSTKSZ];
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = handler;
sa.sa_flags = SA_SIGINFO | flags;
sigemptyset(&sa.sa_mask);
if (sigaction(sig, &sa, 0))
err(1, "sigaction");
}
static volatile sig_atomic_t sig_traps;
static sigjmp_buf jmpbuf;
static volatile sig_atomic_t n_errs;
#ifdef __x86_64__
#define REG_AX REG_RAX
#define REG_IP REG_RIP
#else
#define REG_AX REG_EAX
#define REG_IP REG_EIP
#endif
static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
long ax = (long)ctx->uc_mcontext.gregs[REG_AX];
if (ax != -EFAULT && ax != -ENOSYS) {
printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
(unsigned long)ax);
printf("\tIP = 0x%lx\n", (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
n_errs++;
} else {
printf("[OK]\tSeems okay\n");
}
siglongjmp(jmpbuf, 1);
}
static volatile sig_atomic_t sigtrap_consecutive_syscalls;
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{
/*
* KVM has some bugs that can cause us to stop making progress.
* detect them and complain, but don't infinite loop or fail the
* test.
*/
ucontext_t *ctx = (ucontext_t*)ctx_void;
unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
if (*ip == 0x340f || *ip == 0x050f) {
/* The trap was on SYSCALL or SYSENTER */
sigtrap_consecutive_syscalls++;
if (sigtrap_consecutive_syscalls > 3) {
printf("[WARN]\tGot stuck single-stepping -- you probably have a KVM bug\n");
siglongjmp(jmpbuf, 1);
}
} else {
sigtrap_consecutive_syscalls = 0;
}
}
static void sigill(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
if (*ip == 0x0b0f) {
/* one of the ud2 instructions faulted */
printf("[OK]\tSYSCALL returned normally\n");
} else {
printf("[SKIP]\tIllegal instruction\n");
}
siglongjmp(jmpbuf, 1);
}
int main()
{
stack_t stack = {
.ss_sp = altstack_data,
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
err(1, "sigaltstack");
sethandler(SIGSEGV, sigsegv_or_sigbus, SA_ONSTACK);
/*
* The actual exception can vary. On Atom CPUs, we get #SS
* instead of #PF when the vDSO fails to access the stack when
* ESP is too close to 2^32, and #SS causes SIGBUS.
*/
sethandler(SIGBUS, sigsegv_or_sigbus, SA_ONSTACK);
sethandler(SIGILL, sigill, SA_ONSTACK);
/*
* Exercise another nasty special case. The 32-bit SYSCALL
* and SYSENTER instructions (even in compat mode) each
* clobber one register. A Linux system call has a syscall
* number and six arguments, and the user stack pointer
* needs to live in some register on return. That means
* that we need eight registers, but SYSCALL and SYSENTER
* only preserve seven registers. As a result, one argument
* ends up on the stack. The stack is user memory, which
* means that the kernel can fail to read it.
*
* The 32-bit fast system calls don't have a defined ABI:
* we're supposed to invoke them through the vDSO. So we'll
* fudge it: we set all regs to invalid pointer values and
* invoke the entry instruction. The return will fail no
* matter what, and we completely lose our program state,
* but we can fix it up with a signal handler.
*/
printf("[RUN]\tSYSENTER with invalid state\n");
if (sigsetjmp(jmpbuf, 1) == 0) {
asm volatile (
"movl $-1, %%eax\n\t"
"movl $-1, %%ebx\n\t"
"movl $-1, %%ecx\n\t"
"movl $-1, %%edx\n\t"
"movl $-1, %%esi\n\t"
"movl $-1, %%edi\n\t"
"movl $-1, %%ebp\n\t"
"movl $-1, %%esp\n\t"
"sysenter"
: : : "memory", "flags");
}
printf("[RUN]\tSYSCALL with invalid state\n");
if (sigsetjmp(jmpbuf, 1) == 0) {
asm volatile (
"movl $-1, %%eax\n\t"
"movl $-1, %%ebx\n\t"
"movl $-1, %%ecx\n\t"
"movl $-1, %%edx\n\t"
"movl $-1, %%esi\n\t"
"movl $-1, %%edi\n\t"
"movl $-1, %%ebp\n\t"
"movl $-1, %%esp\n\t"
"syscall\n\t"
"ud2" /* make sure we recover cleanly */
: : : "memory", "flags");
}
printf("[RUN]\tSYSENTER with TF and invalid state\n");
sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
if (sigsetjmp(jmpbuf, 1) == 0) {
sigtrap_consecutive_syscalls = 0;
set_eflags(get_eflags() | X86_EFLAGS_TF);
asm volatile (
"movl $-1, %%eax\n\t"
"movl $-1, %%ebx\n\t"
"movl $-1, %%ecx\n\t"
"movl $-1, %%edx\n\t"
"movl $-1, %%esi\n\t"
"movl $-1, %%edi\n\t"
"movl $-1, %%ebp\n\t"
"movl $-1, %%esp\n\t"
"sysenter"
: : : "memory", "flags");
}
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
printf("[RUN]\tSYSCALL with TF and invalid state\n");
if (sigsetjmp(jmpbuf, 1) == 0) {
sigtrap_consecutive_syscalls = 0;
set_eflags(get_eflags() | X86_EFLAGS_TF);
asm volatile (
"movl $-1, %%eax\n\t"
"movl $-1, %%ebx\n\t"
"movl $-1, %%ecx\n\t"
"movl $-1, %%edx\n\t"
"movl $-1, %%esi\n\t"
"movl $-1, %%edi\n\t"
"movl $-1, %%ebp\n\t"
"movl $-1, %%esp\n\t"
"syscall\n\t"
"ud2" /* make sure we recover cleanly */
: : : "memory", "flags");
}
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
#ifdef __x86_64__
printf("[RUN]\tSYSENTER with TF, invalid state, and GSBASE < 0\n");
if (sigsetjmp(jmpbuf, 1) == 0) {
sigtrap_consecutive_syscalls = 0;
asm volatile ("wrgsbase %%rax\n\t"
:: "a" (0xffffffffffff0000UL));
set_eflags(get_eflags() | X86_EFLAGS_TF);
asm volatile (
"movl $-1, %%eax\n\t"
"movl $-1, %%ebx\n\t"
"movl $-1, %%ecx\n\t"
"movl $-1, %%edx\n\t"
"movl $-1, %%esi\n\t"
"movl $-1, %%edi\n\t"
"movl $-1, %%ebp\n\t"
"movl $-1, %%esp\n\t"
"sysenter"
: : : "memory", "flags");
}
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
#endif
return 0;
}