932562a604
We're trying to get sched.h down to more or less just types only, not code - rseq can live in its own header. This helps us kill the dependency on preempt.h in sched.h. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
413 lines
11 KiB
C
413 lines
11 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
|
|
*
|
|
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
|
|
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
|
|
* 2000-2002 x86-64 support by Andi Kleen
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/kstrtox.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/wait.h>
|
|
#include <linux/unistd.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/personality.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/user-return-notifier.h>
|
|
#include <linux/uprobes.h>
|
|
#include <linux/context_tracking.h>
|
|
#include <linux/entry-common.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/rseq.h>
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/ucontext.h>
|
|
#include <asm/fpu/signal.h>
|
|
#include <asm/fpu/xstate.h>
|
|
#include <asm/vdso.h>
|
|
#include <asm/mce.h>
|
|
#include <asm/sighandling.h>
|
|
#include <asm/vm86.h>
|
|
|
|
#include <asm/syscall.h>
|
|
#include <asm/sigframe.h>
|
|
#include <asm/signal.h>
|
|
#include <asm/shstk.h>
|
|
|
|
static inline int is_ia32_compat_frame(struct ksignal *ksig)
|
|
{
|
|
return IS_ENABLED(CONFIG_IA32_EMULATION) &&
|
|
ksig->ka.sa.sa_flags & SA_IA32_ABI;
|
|
}
|
|
|
|
static inline int is_ia32_frame(struct ksignal *ksig)
|
|
{
|
|
return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig);
|
|
}
|
|
|
|
static inline int is_x32_frame(struct ksignal *ksig)
|
|
{
|
|
return IS_ENABLED(CONFIG_X86_X32_ABI) &&
|
|
ksig->ka.sa.sa_flags & SA_X32_ABI;
|
|
}
|
|
|
|
/*
|
|
* Set up a signal frame.
|
|
*/
|
|
|
|
/* x86 ABI requires 16-byte alignment */
|
|
#define FRAME_ALIGNMENT 16UL
|
|
|
|
#define MAX_FRAME_PADDING (FRAME_ALIGNMENT - 1)
|
|
|
|
/*
|
|
* Determine which stack to use..
|
|
*/
|
|
void __user *
|
|
get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size,
|
|
void __user **fpstate)
|
|
{
|
|
struct k_sigaction *ka = &ksig->ka;
|
|
int ia32_frame = is_ia32_frame(ksig);
|
|
/* Default to using normal stack */
|
|
bool nested_altstack = on_sig_stack(regs->sp);
|
|
bool entering_altstack = false;
|
|
unsigned long math_size = 0;
|
|
unsigned long sp = regs->sp;
|
|
unsigned long buf_fx = 0;
|
|
|
|
/* redzone */
|
|
if (!ia32_frame)
|
|
sp -= 128;
|
|
|
|
/* This is the X/Open sanctioned signal stack switching. */
|
|
if (ka->sa.sa_flags & SA_ONSTACK) {
|
|
/*
|
|
* This checks nested_altstack via sas_ss_flags(). Sensible
|
|
* programs use SS_AUTODISARM, which disables that check, and
|
|
* programs that don't use SS_AUTODISARM get compatible.
|
|
*/
|
|
if (sas_ss_flags(sp) == 0) {
|
|
sp = current->sas_ss_sp + current->sas_ss_size;
|
|
entering_altstack = true;
|
|
}
|
|
} else if (ia32_frame &&
|
|
!nested_altstack &&
|
|
regs->ss != __USER_DS &&
|
|
!(ka->sa.sa_flags & SA_RESTORER) &&
|
|
ka->sa.sa_restorer) {
|
|
/* This is the legacy signal stack switching. */
|
|
sp = (unsigned long) ka->sa.sa_restorer;
|
|
entering_altstack = true;
|
|
}
|
|
|
|
sp = fpu__alloc_mathframe(sp, ia32_frame, &buf_fx, &math_size);
|
|
*fpstate = (void __user *)sp;
|
|
|
|
sp -= frame_size;
|
|
|
|
if (ia32_frame)
|
|
/*
|
|
* Align the stack pointer according to the i386 ABI,
|
|
* i.e. so that on function entry ((sp + 4) & 15) == 0.
|
|
*/
|
|
sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4;
|
|
else
|
|
sp = round_down(sp, FRAME_ALIGNMENT) - 8;
|
|
|
|
/*
|
|
* If we are on the alternate signal stack and would overflow it, don't.
|
|
* Return an always-bogus address instead so we will die with SIGSEGV.
|
|
*/
|
|
if (unlikely((nested_altstack || entering_altstack) &&
|
|
!__on_sig_stack(sp))) {
|
|
|
|
if (show_unhandled_signals && printk_ratelimit())
|
|
pr_info("%s[%d] overflowed sigaltstack\n",
|
|
current->comm, task_pid_nr(current));
|
|
|
|
return (void __user *)-1L;
|
|
}
|
|
|
|
/* save i387 and extended state */
|
|
if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size))
|
|
return (void __user *)-1L;
|
|
|
|
return (void __user *)sp;
|
|
}
|
|
|
|
/*
|
|
* There are four different struct types for signal frame: sigframe_ia32,
|
|
* rt_sigframe_ia32, rt_sigframe_x32, and rt_sigframe. Use the worst case
|
|
* -- the largest size. It means the size for 64-bit apps is a bit more
|
|
* than needed, but this keeps the code simple.
|
|
*/
|
|
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
|
|
# define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct sigframe_ia32)
|
|
#else
|
|
# define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct rt_sigframe)
|
|
#endif
|
|
|
|
/*
|
|
* The FP state frame contains an XSAVE buffer which must be 64-byte aligned.
|
|
* If a signal frame starts at an unaligned address, extra space is required.
|
|
* This is the max alignment padding, conservatively.
|
|
*/
|
|
#define MAX_XSAVE_PADDING 63UL
|
|
|
|
/*
|
|
* The frame data is composed of the following areas and laid out as:
|
|
*
|
|
* -------------------------
|
|
* | alignment padding |
|
|
* -------------------------
|
|
* | (f)xsave frame |
|
|
* -------------------------
|
|
* | fsave header |
|
|
* -------------------------
|
|
* | alignment padding |
|
|
* -------------------------
|
|
* | siginfo + ucontext |
|
|
* -------------------------
|
|
*/
|
|
|
|
/* max_frame_size tells userspace the worst case signal stack size. */
|
|
static unsigned long __ro_after_init max_frame_size;
|
|
static unsigned int __ro_after_init fpu_default_state_size;
|
|
|
|
static int __init init_sigframe_size(void)
|
|
{
|
|
fpu_default_state_size = fpu__get_fpstate_size();
|
|
|
|
max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING;
|
|
|
|
max_frame_size += fpu_default_state_size + MAX_XSAVE_PADDING;
|
|
|
|
/* Userspace expects an aligned size. */
|
|
max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT);
|
|
|
|
pr_info("max sigframe size: %lu\n", max_frame_size);
|
|
return 0;
|
|
}
|
|
early_initcall(init_sigframe_size);
|
|
|
|
unsigned long get_sigframe_size(void)
|
|
{
|
|
return max_frame_size;
|
|
}
|
|
|
|
static int
|
|
setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
|
|
{
|
|
/* Perform fixup for the pre-signal frame. */
|
|
rseq_signal_deliver(ksig, regs);
|
|
|
|
/* Set up the stack frame */
|
|
if (is_ia32_frame(ksig)) {
|
|
if (ksig->ka.sa.sa_flags & SA_SIGINFO)
|
|
return ia32_setup_rt_frame(ksig, regs);
|
|
else
|
|
return ia32_setup_frame(ksig, regs);
|
|
} else if (is_x32_frame(ksig)) {
|
|
return x32_setup_rt_frame(ksig, regs);
|
|
} else {
|
|
return x64_setup_rt_frame(ksig, regs);
|
|
}
|
|
}
|
|
|
|
static void
|
|
handle_signal(struct ksignal *ksig, struct pt_regs *regs)
|
|
{
|
|
bool stepping, failed;
|
|
struct fpu *fpu = ¤t->thread.fpu;
|
|
|
|
if (v8086_mode(regs))
|
|
save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);
|
|
|
|
/* Are we from a system call? */
|
|
if (syscall_get_nr(current, regs) != -1) {
|
|
/* If so, check system call restarting.. */
|
|
switch (syscall_get_error(current, regs)) {
|
|
case -ERESTART_RESTARTBLOCK:
|
|
case -ERESTARTNOHAND:
|
|
regs->ax = -EINTR;
|
|
break;
|
|
|
|
case -ERESTARTSYS:
|
|
if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
|
|
regs->ax = -EINTR;
|
|
break;
|
|
}
|
|
fallthrough;
|
|
case -ERESTARTNOINTR:
|
|
regs->ax = regs->orig_ax;
|
|
regs->ip -= 2;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If TF is set due to a debugger (TIF_FORCED_TF), clear TF now
|
|
* so that register information in the sigcontext is correct and
|
|
* then notify the tracer before entering the signal handler.
|
|
*/
|
|
stepping = test_thread_flag(TIF_SINGLESTEP);
|
|
if (stepping)
|
|
user_disable_single_step(current);
|
|
|
|
failed = (setup_rt_frame(ksig, regs) < 0);
|
|
if (!failed) {
|
|
/*
|
|
* Clear the direction flag as per the ABI for function entry.
|
|
*
|
|
* Clear RF when entering the signal handler, because
|
|
* it might disable possible debug exception from the
|
|
* signal handler.
|
|
*
|
|
* Clear TF for the case when it wasn't set by debugger to
|
|
* avoid the recursive send_sigtrap() in SIGTRAP handler.
|
|
*/
|
|
regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
|
|
/*
|
|
* Ensure the signal handler starts with the new fpu state.
|
|
*/
|
|
fpu__clear_user_states(fpu);
|
|
}
|
|
signal_setup_done(failed, ksig, stepping);
|
|
}
|
|
|
|
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
|
|
{
|
|
#ifdef CONFIG_IA32_EMULATION
|
|
if (current->restart_block.arch_data & TS_COMPAT)
|
|
return __NR_ia32_restart_syscall;
|
|
#endif
|
|
#ifdef CONFIG_X86_X32_ABI
|
|
return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
|
|
#else
|
|
return __NR_restart_syscall;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Note that 'init' is a special process: it doesn't get signals it doesn't
|
|
* want to handle. Thus you cannot kill init even with a SIGKILL even by
|
|
* mistake.
|
|
*/
|
|
void arch_do_signal_or_restart(struct pt_regs *regs)
|
|
{
|
|
struct ksignal ksig;
|
|
|
|
if (get_signal(&ksig)) {
|
|
/* Whee! Actually deliver the signal. */
|
|
handle_signal(&ksig, regs);
|
|
return;
|
|
}
|
|
|
|
/* Did we come from a system call? */
|
|
if (syscall_get_nr(current, regs) != -1) {
|
|
/* Restart the system call - no handlers present */
|
|
switch (syscall_get_error(current, regs)) {
|
|
case -ERESTARTNOHAND:
|
|
case -ERESTARTSYS:
|
|
case -ERESTARTNOINTR:
|
|
regs->ax = regs->orig_ax;
|
|
regs->ip -= 2;
|
|
break;
|
|
|
|
case -ERESTART_RESTARTBLOCK:
|
|
regs->ax = get_nr_restart_syscall(regs);
|
|
regs->ip -= 2;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If there's no signal to deliver, we just put the saved sigmask
|
|
* back.
|
|
*/
|
|
restore_saved_sigmask();
|
|
}
|
|
|
|
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
|
|
{
|
|
struct task_struct *me = current;
|
|
|
|
if (show_unhandled_signals && printk_ratelimit()) {
|
|
printk("%s"
|
|
"%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
|
|
task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
|
|
me->comm, me->pid, where, frame,
|
|
regs->ip, regs->sp, regs->orig_ax);
|
|
print_vma_addr(KERN_CONT " in ", regs->ip);
|
|
pr_cont("\n");
|
|
}
|
|
|
|
force_sig(SIGSEGV);
|
|
}
|
|
|
|
#ifdef CONFIG_DYNAMIC_SIGFRAME
|
|
#ifdef CONFIG_STRICT_SIGALTSTACK_SIZE
|
|
static bool strict_sigaltstack_size __ro_after_init = true;
|
|
#else
|
|
static bool strict_sigaltstack_size __ro_after_init = false;
|
|
#endif
|
|
|
|
static int __init strict_sas_size(char *arg)
|
|
{
|
|
return kstrtobool(arg, &strict_sigaltstack_size) == 0;
|
|
}
|
|
__setup("strict_sas_size", strict_sas_size);
|
|
|
|
/*
|
|
* MINSIGSTKSZ is 2048 and can't be changed despite the fact that AVX512
|
|
* exceeds that size already. As such programs might never use the
|
|
* sigaltstack they just continued to work. While always checking against
|
|
* the real size would be correct, this might be considered a regression.
|
|
*
|
|
* Therefore avoid the sanity check, unless enforced by kernel
|
|
* configuration or command line option.
|
|
*
|
|
* When dynamic FPU features are supported, the check is also enforced when
|
|
* the task has permissions to use dynamic features. Tasks which have no
|
|
* permission are checked against the size of the non-dynamic feature set
|
|
* if strict checking is enabled. This avoids forcing all tasks on the
|
|
* system to allocate large sigaltstacks even if they are never going
|
|
* to use a dynamic feature. As this is serialized via sighand::siglock
|
|
* any permission request for a dynamic feature either happened already
|
|
* or will see the newly install sigaltstack size in the permission checks.
|
|
*/
|
|
bool sigaltstack_size_valid(size_t ss_size)
|
|
{
|
|
unsigned long fsize = max_frame_size - fpu_default_state_size;
|
|
u64 mask;
|
|
|
|
lockdep_assert_held(¤t->sighand->siglock);
|
|
|
|
if (!fpu_state_size_dynamic() && !strict_sigaltstack_size)
|
|
return true;
|
|
|
|
fsize += current->group_leader->thread.fpu.perm.__user_state_size;
|
|
if (likely(ss_size > fsize))
|
|
return true;
|
|
|
|
if (strict_sigaltstack_size)
|
|
return ss_size > fsize;
|
|
|
|
mask = current->group_leader->thread.fpu.perm.__state_perm;
|
|
if (mask & XFEATURE_MASK_USER_DYNAMIC)
|
|
return ss_size > fsize;
|
|
|
|
return true;
|
|
}
|
|
#endif /* CONFIG_DYNAMIC_SIGFRAME */
|