f0cd5ac1e4
Exceptions which can be taken at (almost) any time are consdiered to be NMIs. On arm64 that includes: * SDEI events * GICv3 Pseudo-NMIs * Kernel stack overflows * Unexpected/unhandled exceptions ... but currently debug exceptions (BRKs, breakpoints, watchpoints, single-step) are not considered NMIs. As these can be taken at any time, kernel features (lockdep, RCU, ftrace) may not be in a consistent kernel state. For example, we may take an NMI from the idle code or partway through an entry/exit path. While nmi_enter() and nmi_exit() handle most of this state, notably they don't save/restore the lockdep state across an NMI being taken and handled. When interrupts are enabled and an NMI is taken, lockdep may see interrupts become disabled within the NMI code, but not see interrupts become enabled when returning from the NMI, leaving lockdep believing interrupts are disabled when they are actually disabled. The x86 code handles this in idtentry_{enter,exit}_nmi(), which will shortly be moved to the generic entry code. As we can't use either yet, we copy the x86 approach in arm64-specific helpers. All the NMI entrypoints are marked as noinstr to prevent any instrumentation handling code being invoked before the state has been corrected. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-11-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org>
240 lines
5.9 KiB
C
240 lines
5.9 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
// Copyright (C) 2017 Arm Ltd.
|
|
#define pr_fmt(fmt) "sdei: " fmt
|
|
|
|
#include <linux/arm-smccc.h>
|
|
#include <linux/arm_sdei.h>
|
|
#include <linux/hardirq.h>
|
|
#include <linux/irqflags.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <asm/alternative.h>
|
|
#include <asm/exception.h>
|
|
#include <asm/kprobes.h>
|
|
#include <asm/mmu.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/stacktrace.h>
|
|
#include <asm/sysreg.h>
|
|
#include <asm/vmap_stack.h>
|
|
|
|
unsigned long sdei_exit_mode;
|
|
|
|
/*
|
|
* VMAP'd stacks checking for stack overflow on exception using sp as a scratch
|
|
* register, meaning SDEI has to switch to its own stack. We need two stacks as
|
|
* a critical event may interrupt a normal event that has just taken a
|
|
* synchronous exception, and is using sp as scratch register. For a critical
|
|
* event interrupting a normal event, we can't reliably tell if we were on the
|
|
* sdei stack.
|
|
* For now, we allocate stacks when the driver is probed.
|
|
*/
|
|
DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
|
|
DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
|
|
|
|
#ifdef CONFIG_VMAP_STACK
|
|
DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
|
|
DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
|
|
#endif
|
|
|
|
static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
|
|
{
|
|
unsigned long *p;
|
|
|
|
p = per_cpu(*ptr, cpu);
|
|
if (p) {
|
|
per_cpu(*ptr, cpu) = NULL;
|
|
vfree(p);
|
|
}
|
|
}
|
|
|
|
static void free_sdei_stacks(void)
|
|
{
|
|
int cpu;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
_free_sdei_stack(&sdei_stack_normal_ptr, cpu);
|
|
_free_sdei_stack(&sdei_stack_critical_ptr, cpu);
|
|
}
|
|
}
|
|
|
|
static int _init_sdei_stack(unsigned long * __percpu *ptr, int cpu)
|
|
{
|
|
unsigned long *p;
|
|
|
|
p = arch_alloc_vmap_stack(SDEI_STACK_SIZE, cpu_to_node(cpu));
|
|
if (!p)
|
|
return -ENOMEM;
|
|
per_cpu(*ptr, cpu) = p;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int init_sdei_stacks(void)
|
|
{
|
|
int cpu;
|
|
int err = 0;
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
|
|
if (err)
|
|
break;
|
|
err = _init_sdei_stack(&sdei_stack_critical_ptr, cpu);
|
|
if (err)
|
|
break;
|
|
}
|
|
|
|
if (err)
|
|
free_sdei_stacks();
|
|
|
|
return err;
|
|
}
|
|
|
|
static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
|
|
{
|
|
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
|
|
unsigned long high = low + SDEI_STACK_SIZE;
|
|
|
|
return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info);
|
|
}
|
|
|
|
static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
|
|
{
|
|
unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
|
|
unsigned long high = low + SDEI_STACK_SIZE;
|
|
|
|
return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info);
|
|
}
|
|
|
|
bool _on_sdei_stack(unsigned long sp, struct stack_info *info)
|
|
{
|
|
if (!IS_ENABLED(CONFIG_VMAP_STACK))
|
|
return false;
|
|
|
|
if (on_sdei_critical_stack(sp, info))
|
|
return true;
|
|
|
|
if (on_sdei_normal_stack(sp, info))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
unsigned long sdei_arch_get_entry_point(int conduit)
|
|
{
|
|
/*
|
|
* SDEI works between adjacent exception levels. If we booted at EL1 we
|
|
* assume a hypervisor is marshalling events. If we booted at EL2 and
|
|
* dropped to EL1 because we don't support VHE, then we can't support
|
|
* SDEI.
|
|
*/
|
|
if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
|
|
pr_err("Not supported on this hardware/boot configuration\n");
|
|
return 0;
|
|
}
|
|
|
|
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
|
if (init_sdei_stacks())
|
|
return 0;
|
|
}
|
|
|
|
sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
|
|
|
|
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
|
|
if (arm64_kernel_unmapped_at_el0()) {
|
|
unsigned long offset;
|
|
|
|
offset = (unsigned long)__sdei_asm_entry_trampoline -
|
|
(unsigned long)__entry_tramp_text_start;
|
|
return TRAMP_VALIAS + offset;
|
|
} else
|
|
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
|
|
return (unsigned long)__sdei_asm_handler;
|
|
|
|
}
|
|
|
|
/*
|
|
* __sdei_handler() returns one of:
|
|
* SDEI_EV_HANDLED - success, return to the interrupted context.
|
|
* SDEI_EV_FAILED - failure, return this error code to firmare.
|
|
* virtual-address - success, return to this address.
|
|
*/
|
|
static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
|
|
struct sdei_registered_event *arg)
|
|
{
|
|
u32 mode;
|
|
int i, err = 0;
|
|
int clobbered_registers = 4;
|
|
u64 elr = read_sysreg(elr_el1);
|
|
u32 kernel_mode = read_sysreg(CurrentEL) | 1; /* +SPSel */
|
|
unsigned long vbar = read_sysreg(vbar_el1);
|
|
|
|
if (arm64_kernel_unmapped_at_el0())
|
|
clobbered_registers++;
|
|
|
|
/* Retrieve the missing registers values */
|
|
for (i = 0; i < clobbered_registers; i++) {
|
|
/* from within the handler, this call always succeeds */
|
|
sdei_api_event_context(i, ®s->regs[i]);
|
|
}
|
|
|
|
/*
|
|
* We didn't take an exception to get here, set PAN. UAO will be cleared
|
|
* by sdei_event_handler()s force_uaccess_begin() call.
|
|
*/
|
|
__uaccess_enable_hw_pan();
|
|
|
|
err = sdei_event_handler(regs, arg);
|
|
if (err)
|
|
return SDEI_EV_FAILED;
|
|
|
|
if (elr != read_sysreg(elr_el1)) {
|
|
/*
|
|
* We took a synchronous exception from the SDEI handler.
|
|
* This could deadlock, and if you interrupt KVM it will
|
|
* hyp-panic instead.
|
|
*/
|
|
pr_warn("unsafe: exception during handler\n");
|
|
}
|
|
|
|
mode = regs->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK);
|
|
|
|
/*
|
|
* If we interrupted the kernel with interrupts masked, we always go
|
|
* back to wherever we came from.
|
|
*/
|
|
if (mode == kernel_mode && !interrupts_enabled(regs))
|
|
return SDEI_EV_HANDLED;
|
|
|
|
/*
|
|
* Otherwise, we pretend this was an IRQ. This lets user space tasks
|
|
* receive signals before we return to them, and KVM to invoke it's
|
|
* world switch to do the same.
|
|
*
|
|
* See DDI0487B.a Table D1-7 'Vector offsets from vector table base
|
|
* address'.
|
|
*/
|
|
if (mode == kernel_mode)
|
|
return vbar + 0x280;
|
|
else if (mode & PSR_MODE32_BIT)
|
|
return vbar + 0x680;
|
|
|
|
return vbar + 0x480;
|
|
}
|
|
|
|
|
|
asmlinkage noinstr unsigned long
|
|
__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
|
|
{
|
|
unsigned long ret;
|
|
|
|
arm64_enter_nmi(regs);
|
|
|
|
ret = _sdei_handler(regs, arg);
|
|
|
|
arm64_exit_nmi(regs);
|
|
|
|
return ret;
|
|
}
|