4da9f33026
this has been brought into a shape which is maintainable and actually works. This final version was done by Sasha Levin who took it up after Intel dropped the ball. Sasha discovered that the SGX (sic!) offerings out there ship rogue kernel modules enabling FSGSBASE behind the kernels back which opens an instantanious unpriviledged root hole. The FSGSBASE instructions provide a considerable speedup of the context switch path and enable user space to write GSBASE without kernel interaction. This enablement requires careful handling of the exception entries which go through the paranoid entry path as they cannot longer rely on the assumption that user GSBASE is positive (as enforced via prctl() on non FSGSBASE enabled systemn). All other entries (syscalls, interrupts and exceptions) can still just utilize SWAPGS unconditionally when the entry comes from user space. Converting these entries to use FSGSBASE has no benefit as SWAPGS is only marginally slower than WRGSBASE and locating and retrieving the kernel GSBASE value is not a free operation either. The real benefit of RD/WRGSBASE is the avoidance of the MSR reads and writes. The changes come with appropriate selftests and have held up in field testing against the (sanitized) Graphene-SGX driver. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAl8pGnoTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoTYJD/9873GkwvGcc/Vq/dJH1szGTgFftPyZ c/Y9gzx7EGBPLo25BS820L+ZlynzXHDxExKfCEaD10TZfe5XIc1vYNR0J74M2NmK IBgEDstJeW93ai+rHCFRXIevhpzU4GgGYJ1MeeOgbVMN3aGU1g6HfzMvtF0fPn8Y n6fsLZa43wgnoTdjwjjikpDTrzoZbaL1mbODBzBVPAaTbim7IKKTge6r/iCKrOjz Uixvm3g9lVzx52zidJ9kWa8esmbOM1j0EPe7/hy3qH9DFo87KxEzjHNH3T6gY5t6 NJhRAIfY+YyTHpPCUCshj6IkRudE6w/qjEAmKP9kWZxoJrvPCTWOhCzelwsFS9b9 gxEYfsnaKhsfNhB6fi0PtWlMzPINmEA7SuPza33u5WtQUK7s1iNlgHfvMbjstbwg MSETn4SG2/ZyzUrSC06lVwV8kh0RgM3cENc/jpFfIHD0vKGI3qfka/1RY94kcOCG AeJd0YRSU2RqL7lmxhHyG8tdb8eexns41IzbPCLXX2sF00eKNkVvMRYT2mKfKLFF q8v1x7yuwmODdXfFR6NdCkGm9IU7wtL6wuQ8Nhu9UraFmcXo6X6FLJC18FqcvSb9 jvcRP4XY/8pNjjf44JB8yWfah0xGQsaMIKQGP4yLv4j6Xk1xAQKH1MqcC7l1D2HN 5Z24GibFqSK/vA== =QaAN -----END PGP SIGNATURE----- Merge tag 'x86-fsgsbase-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 fsgsbase from Thomas Gleixner: "Support for FSGSBASE. Almost 5 years after the first RFC to support it, this has been brought into a shape which is maintainable and actually works. This final version was done by Sasha Levin who took it up after Intel dropped the ball. Sasha discovered that the SGX (sic!) offerings out there ship rogue kernel modules enabling FSGSBASE behind the kernels back which opens an instantanious unpriviledged root hole. The FSGSBASE instructions provide a considerable speedup of the context switch path and enable user space to write GSBASE without kernel interaction. This enablement requires careful handling of the exception entries which go through the paranoid entry path as they can no longer rely on the assumption that user GSBASE is positive (as enforced via prctl() on non FSGSBASE enabled systemn). All other entries (syscalls, interrupts and exceptions) can still just utilize SWAPGS unconditionally when the entry comes from user space. Converting these entries to use FSGSBASE has no benefit as SWAPGS is only marginally slower than WRGSBASE and locating and retrieving the kernel GSBASE value is not a free operation either. The real benefit of RD/WRGSBASE is the avoidance of the MSR reads and writes. The changes come with appropriate selftests and have held up in field testing against the (sanitized) Graphene-SGX driver" * tag 'x86-fsgsbase-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) x86/fsgsbase: Fix Xen PV support x86/ptrace: Fix 32-bit PTRACE_SETREGS vs fsbase and gsbase selftests/x86/fsgsbase: Add a missing memory constraint selftests/x86/fsgsbase: Fix a comment in the ptrace_write_gsbase test selftests/x86: Add a syscall_arg_fault_64 test for negative GSBASE selftests/x86/fsgsbase: Test ptracer-induced GS base write with FSGSBASE selftests/x86/fsgsbase: Test GS selector on ptracer-induced GS base write Documentation/x86/64: Add documentation for GS/FS addressing mode x86/elf: Enumerate kernel FSGSBASE capability in AT_HWCAP2 x86/cpu: Enable FSGSBASE on 64bit by default and add a chicken bit x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit x86/entry/64: Introduce the FIND_PERCPU_BASE macro x86/entry/64: Switch CR3 before SWAPGS in paranoid entry x86/speculation/swapgs: Check FSGSBASE in enabling SWAPGS mitigation x86/process/64: Use FSGSBASE instructions on thread copy and ptrace x86/process/64: Use FSBSBASE in switch_to() if available x86/process/64: Make save_fsgs_for_kvm() ready for FSGSBASE x86/fsgsbase/64: Enable FSGSBASE instructions in helper functions x86/fsgsbase/64: Add intrinsics for FSGSBASE instructions x86/cpu: Add 'unsafe_fsgsbase' to enable CR4.FSGSBASE ...
238 lines
6.1 KiB
C
238 lines
6.1 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
|
|
* Copyright (c) 2015 Andrew Lutomirski
|
|
*/
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <sys/signal.h>
|
|
#include <sys/ucontext.h>
|
|
#include <err.h>
|
|
#include <setjmp.h>
|
|
#include <errno.h>
|
|
|
|
#include "helpers.h"
|
|
|
|
/* Our sigaltstack scratch space. */
|
|
static unsigned char altstack_data[SIGSTKSZ];
|
|
|
|
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
|
|
int flags)
|
|
{
|
|
struct sigaction sa;
|
|
memset(&sa, 0, sizeof(sa));
|
|
sa.sa_sigaction = handler;
|
|
sa.sa_flags = SA_SIGINFO | flags;
|
|
sigemptyset(&sa.sa_mask);
|
|
if (sigaction(sig, &sa, 0))
|
|
err(1, "sigaction");
|
|
}
|
|
|
|
static volatile sig_atomic_t sig_traps;
|
|
static sigjmp_buf jmpbuf;
|
|
|
|
static volatile sig_atomic_t n_errs;
|
|
|
|
#ifdef __x86_64__
|
|
#define REG_AX REG_RAX
|
|
#define REG_IP REG_RIP
|
|
#else
|
|
#define REG_AX REG_EAX
|
|
#define REG_IP REG_EIP
|
|
#endif
|
|
|
|
static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
|
|
{
|
|
ucontext_t *ctx = (ucontext_t*)ctx_void;
|
|
long ax = (long)ctx->uc_mcontext.gregs[REG_AX];
|
|
|
|
if (ax != -EFAULT && ax != -ENOSYS) {
|
|
printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
|
|
(unsigned long)ax);
|
|
printf("\tIP = 0x%lx\n", (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
|
|
n_errs++;
|
|
} else {
|
|
printf("[OK]\tSeems okay\n");
|
|
}
|
|
|
|
siglongjmp(jmpbuf, 1);
|
|
}
|
|
|
|
static volatile sig_atomic_t sigtrap_consecutive_syscalls;
|
|
|
|
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
|
|
{
|
|
/*
|
|
* KVM has some bugs that can cause us to stop making progress.
|
|
* detect them and complain, but don't infinite loop or fail the
|
|
* test.
|
|
*/
|
|
|
|
ucontext_t *ctx = (ucontext_t*)ctx_void;
|
|
unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
|
|
|
|
if (*ip == 0x340f || *ip == 0x050f) {
|
|
/* The trap was on SYSCALL or SYSENTER */
|
|
sigtrap_consecutive_syscalls++;
|
|
if (sigtrap_consecutive_syscalls > 3) {
|
|
printf("[WARN]\tGot stuck single-stepping -- you probably have a KVM bug\n");
|
|
siglongjmp(jmpbuf, 1);
|
|
}
|
|
} else {
|
|
sigtrap_consecutive_syscalls = 0;
|
|
}
|
|
}
|
|
|
|
static void sigill(int sig, siginfo_t *info, void *ctx_void)
|
|
{
|
|
ucontext_t *ctx = (ucontext_t*)ctx_void;
|
|
unsigned short *ip = (unsigned short *)ctx->uc_mcontext.gregs[REG_IP];
|
|
|
|
if (*ip == 0x0b0f) {
|
|
/* one of the ud2 instructions faulted */
|
|
printf("[OK]\tSYSCALL returned normally\n");
|
|
} else {
|
|
printf("[SKIP]\tIllegal instruction\n");
|
|
}
|
|
siglongjmp(jmpbuf, 1);
|
|
}
|
|
|
|
int main()
|
|
{
|
|
stack_t stack = {
|
|
.ss_sp = altstack_data,
|
|
.ss_size = SIGSTKSZ,
|
|
};
|
|
if (sigaltstack(&stack, NULL) != 0)
|
|
err(1, "sigaltstack");
|
|
|
|
sethandler(SIGSEGV, sigsegv_or_sigbus, SA_ONSTACK);
|
|
/*
|
|
* The actual exception can vary. On Atom CPUs, we get #SS
|
|
* instead of #PF when the vDSO fails to access the stack when
|
|
* ESP is too close to 2^32, and #SS causes SIGBUS.
|
|
*/
|
|
sethandler(SIGBUS, sigsegv_or_sigbus, SA_ONSTACK);
|
|
sethandler(SIGILL, sigill, SA_ONSTACK);
|
|
|
|
/*
|
|
* Exercise another nasty special case. The 32-bit SYSCALL
|
|
* and SYSENTER instructions (even in compat mode) each
|
|
* clobber one register. A Linux system call has a syscall
|
|
* number and six arguments, and the user stack pointer
|
|
* needs to live in some register on return. That means
|
|
* that we need eight registers, but SYSCALL and SYSENTER
|
|
* only preserve seven registers. As a result, one argument
|
|
* ends up on the stack. The stack is user memory, which
|
|
* means that the kernel can fail to read it.
|
|
*
|
|
* The 32-bit fast system calls don't have a defined ABI:
|
|
* we're supposed to invoke them through the vDSO. So we'll
|
|
* fudge it: we set all regs to invalid pointer values and
|
|
* invoke the entry instruction. The return will fail no
|
|
* matter what, and we completely lose our program state,
|
|
* but we can fix it up with a signal handler.
|
|
*/
|
|
|
|
printf("[RUN]\tSYSENTER with invalid state\n");
|
|
if (sigsetjmp(jmpbuf, 1) == 0) {
|
|
asm volatile (
|
|
"movl $-1, %%eax\n\t"
|
|
"movl $-1, %%ebx\n\t"
|
|
"movl $-1, %%ecx\n\t"
|
|
"movl $-1, %%edx\n\t"
|
|
"movl $-1, %%esi\n\t"
|
|
"movl $-1, %%edi\n\t"
|
|
"movl $-1, %%ebp\n\t"
|
|
"movl $-1, %%esp\n\t"
|
|
"sysenter"
|
|
: : : "memory", "flags");
|
|
}
|
|
|
|
printf("[RUN]\tSYSCALL with invalid state\n");
|
|
if (sigsetjmp(jmpbuf, 1) == 0) {
|
|
asm volatile (
|
|
"movl $-1, %%eax\n\t"
|
|
"movl $-1, %%ebx\n\t"
|
|
"movl $-1, %%ecx\n\t"
|
|
"movl $-1, %%edx\n\t"
|
|
"movl $-1, %%esi\n\t"
|
|
"movl $-1, %%edi\n\t"
|
|
"movl $-1, %%ebp\n\t"
|
|
"movl $-1, %%esp\n\t"
|
|
"syscall\n\t"
|
|
"ud2" /* make sure we recover cleanly */
|
|
: : : "memory", "flags");
|
|
}
|
|
|
|
printf("[RUN]\tSYSENTER with TF and invalid state\n");
|
|
sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
|
|
|
|
if (sigsetjmp(jmpbuf, 1) == 0) {
|
|
sigtrap_consecutive_syscalls = 0;
|
|
set_eflags(get_eflags() | X86_EFLAGS_TF);
|
|
asm volatile (
|
|
"movl $-1, %%eax\n\t"
|
|
"movl $-1, %%ebx\n\t"
|
|
"movl $-1, %%ecx\n\t"
|
|
"movl $-1, %%edx\n\t"
|
|
"movl $-1, %%esi\n\t"
|
|
"movl $-1, %%edi\n\t"
|
|
"movl $-1, %%ebp\n\t"
|
|
"movl $-1, %%esp\n\t"
|
|
"sysenter"
|
|
: : : "memory", "flags");
|
|
}
|
|
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
|
|
|
|
printf("[RUN]\tSYSCALL with TF and invalid state\n");
|
|
if (sigsetjmp(jmpbuf, 1) == 0) {
|
|
sigtrap_consecutive_syscalls = 0;
|
|
set_eflags(get_eflags() | X86_EFLAGS_TF);
|
|
asm volatile (
|
|
"movl $-1, %%eax\n\t"
|
|
"movl $-1, %%ebx\n\t"
|
|
"movl $-1, %%ecx\n\t"
|
|
"movl $-1, %%edx\n\t"
|
|
"movl $-1, %%esi\n\t"
|
|
"movl $-1, %%edi\n\t"
|
|
"movl $-1, %%ebp\n\t"
|
|
"movl $-1, %%esp\n\t"
|
|
"syscall\n\t"
|
|
"ud2" /* make sure we recover cleanly */
|
|
: : : "memory", "flags");
|
|
}
|
|
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
|
|
|
|
#ifdef __x86_64__
|
|
printf("[RUN]\tSYSENTER with TF, invalid state, and GSBASE < 0\n");
|
|
|
|
if (sigsetjmp(jmpbuf, 1) == 0) {
|
|
sigtrap_consecutive_syscalls = 0;
|
|
|
|
asm volatile ("wrgsbase %%rax\n\t"
|
|
:: "a" (0xffffffffffff0000UL));
|
|
|
|
set_eflags(get_eflags() | X86_EFLAGS_TF);
|
|
asm volatile (
|
|
"movl $-1, %%eax\n\t"
|
|
"movl $-1, %%ebx\n\t"
|
|
"movl $-1, %%ecx\n\t"
|
|
"movl $-1, %%edx\n\t"
|
|
"movl $-1, %%esi\n\t"
|
|
"movl $-1, %%edi\n\t"
|
|
"movl $-1, %%ebp\n\t"
|
|
"movl $-1, %%esp\n\t"
|
|
"sysenter"
|
|
: : : "memory", "flags");
|
|
}
|
|
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
|
|
#endif
|
|
|
|
return 0;
|
|
}
|