linux/arch/arm64/kernel/process.c
Linus Torvalds 0ca2ce81eb arm64 updates for 5.11:
- Expose tag address bits in siginfo. The original arm64 ABI did not
   expose any of the bits 63:56 of a tagged address in siginfo. In the
   presence of user ASAN or MTE, this information may be useful. The
   implementation is generic to other architectures supporting tags (like
   SPARC ADI, subject to wiring up the arch code). The user will have to
   opt in via sigaction(SA_EXPOSE_TAGBITS) so that the extra bits, if
   available, become visible in si_addr.
 
 - Default to 32-bit wide ZONE_DMA. Previously, ZONE_DMA was set to the
   lowest 1GB to cope with the Raspberry Pi 4 limitations, to the
   detriment of other platforms. With these changes, the kernel scans the
   Device Tree dma-ranges and the ACPI IORT information before deciding
   on a smaller ZONE_DMA.
 
 - Strengthen READ_ONCE() to acquire when CONFIG_LTO=y. When building
   with LTO, there is an increased risk of the compiler converting an
   address dependency headed by a READ_ONCE() invocation into a control
   dependency and consequently allowing for harmful reordering by the
   CPU.
 
 - Add CPPC FFH support using arm64 AMU counters.
 
 - set_fs() removal on arm64. This renders the User Access Override (UAO)
   ARMv8 feature unnecessary.
 
 - Perf updates: PMU driver for the ARM DMC-620 memory controller, sysfs
   identifier file for SMMUv3, stop event counters support for i.MX8MP,
   enable the perf events-based hard lockup detector.
 
 - Reorganise the kernel VA space slightly so that 52-bit VA
   configurations can use more virtual address space.
 
 - Improve the robustness of the arm64 memory offline event notifier.
 
 - Pad the Image header to 64K following the EFI header definition
   updated recently to increase the section alignment to 64K.
 
 - Support CONFIG_CMDLINE_EXTEND on arm64.
 
 - Do not use tagged PC in the kernel (TCR_EL1.TBID1==1), freeing up 8
   bits for PtrAuth.
 
 - Switch to vmapped shadow call stacks.
 
 - Miscellaneous clean-ups.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE5RElWfyWxS+3PLO2a9axLQDIXvEFAl/XcSgACgkQa9axLQDI
 XvGkwg//SLknimELD/cphf2UzZm5RFuCU0x1UnIXs9XYo5BrOpgVLLA//+XkCrKN
 0GLAdtBDfw1axWJudzgMBiHrv6wSGh4p3YWjLIW06u/PJu3m3U8oiiolvvF8d7Yq
 UKDseKGQnQkrl97J0SyA+Da/u8D11GEzp52SWL5iRxzt6vInEC27iTOp9n1yoaoP
 f3y7qdp9kv831ryUM3rXFYpc8YuMWXk+JpBSNaxqmjlvjMzipA5PhzBLmNzfc657
 XcrRX5qsgjEeJW8UUnWUVNB42j7tVzN77yraoUpoVVCzZZeWOQxqq5EscKPfIhRt
 AjtSIQNOs95ZVE0SFCTjXnUUb823coUs4dMCdftqlE62JNRwdR+3bkfa+QjPTg1F
 O9ohW1AzX0/JB19QBxMaOgbheB8GFXh3DVJ6pizTgxJgyPvQQtFuEhT1kq8Cst0U
 Pe+pEWsg9t41bUXNz+/l9tUWKWpeCfFNMTrBXLmXrNlTLeOvDh/0UiF0+2lYJYgf
 YAboibQ5eOv2wGCcSDEbNMJ6B2/6GtubDJxH4du680F6Emb6pCSw0ntPwB7mSGLG
 5dXz+9FJxDLjmxw7BXxQgc5MoYIrt5JQtaOQ6UxU8dPy53/+py4Ck6tXNkz0+Ap7
 gPPaGGy1GqobQFu3qlHtOK1VleQi/sWcrpmPHrpiiFUf6N7EmcY=
 =zXFk
 -----END PGP SIGNATURE-----

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:

 - Expose tag address bits in siginfo. The original arm64 ABI did not
   expose any of the bits 63:56 of a tagged address in siginfo. In the
   presence of user ASAN or MTE, this information may be useful. The
   implementation is generic to other architectures supporting tags
   (like SPARC ADI, subject to wiring up the arch code). The user will
   have to opt in via sigaction(SA_EXPOSE_TAGBITS) so that the extra
   bits, if available, become visible in si_addr.

 - Default to 32-bit wide ZONE_DMA. Previously, ZONE_DMA was set to the
   lowest 1GB to cope with the Raspberry Pi 4 limitations, to the
   detriment of other platforms. With these changes, the kernel scans
   the Device Tree dma-ranges and the ACPI IORT information before
   deciding on a smaller ZONE_DMA.

 - Strengthen READ_ONCE() to acquire when CONFIG_LTO=y. When building
   with LTO, there is an increased risk of the compiler converting an
   address dependency headed by a READ_ONCE() invocation into a control
   dependency and consequently allowing for harmful reordering by the
   CPU.

 - Add CPPC FFH support using arm64 AMU counters.

 - set_fs() removal on arm64. This renders the User Access Override
   (UAO) ARMv8 feature unnecessary.

 - Perf updates: PMU driver for the ARM DMC-620 memory controller, sysfs
   identifier file for SMMUv3, stop event counters support for i.MX8MP,
   enable the perf events-based hard lockup detector.

 - Reorganise the kernel VA space slightly so that 52-bit VA
   configurations can use more virtual address space.

 - Improve the robustness of the arm64 memory offline event notifier.

 - Pad the Image header to 64K following the EFI header definition
   updated recently to increase the section alignment to 64K.

 - Support CONFIG_CMDLINE_EXTEND on arm64.

 - Do not use tagged PC in the kernel (TCR_EL1.TBID1==1), freeing up 8
   bits for PtrAuth.

 - Switch to vmapped shadow call stacks.

 - Miscellaneous clean-ups.

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (78 commits)
  perf/imx_ddr: Add system PMU identifier for userspace
  bindings: perf: imx-ddr: add compatible string
  arm64: Fix build failure when HARDLOCKUP_DETECTOR_PERF is enabled
  arm64: mte: fix prctl(PR_GET_TAGGED_ADDR_CTRL) if TCF0=NONE
  arm64: mark __system_matches_cap as __maybe_unused
  arm64: uaccess: remove vestigal UAO support
  arm64: uaccess: remove redundant PAN toggling
  arm64: uaccess: remove addr_limit_user_check()
  arm64: uaccess: remove set_fs()
  arm64: uaccess cleanup macro naming
  arm64: uaccess: split user/kernel routines
  arm64: uaccess: refactor __{get,put}_user
  arm64: uaccess: simplify __copy_user_flushcache()
  arm64: uaccess: rename privileged uaccess routines
  arm64: sdei: explicitly simulate PAN/UAO entry
  arm64: sdei: move uaccess logic to arch/arm64/
  arm64: head.S: always initialize PSTATE
  arm64: head.S: cleanup SCTLR_ELx initialization
  arm64: head.S: rename el2_setup -> init_kernel_el
  arm64: add C wrappers for SET_PSTATE_*()
  ...
2020-12-14 16:24:30 -08:00

736 lines
18 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Based on arch/arm/kernel/process.c
*
* Original Copyright (C) 1995 Linus Torvalds
* Copyright (C) 1996-2000 Russell King - Converted to ARM.
* Copyright (C) 2012 ARM Ltd.
*/
#include <stdarg.h>
#include <linux/compat.h>
#include <linux/efi.h>
#include <linux/elf.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/lockdep.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/nospec.h>
#include <linux/stddef.h>
#include <linux/sysctl.h>
#include <linux/unistd.h>
#include <linux/user.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/elfcore.h>
#include <linux/pm.h>
#include <linux/tick.h>
#include <linux/utsname.h>
#include <linux/uaccess.h>
#include <linux/random.h>
#include <linux/hw_breakpoint.h>
#include <linux/personality.h>
#include <linux/notifier.h>
#include <trace/events/power.h>
#include <linux/percpu.h>
#include <linux/thread_info.h>
#include <linux/prctl.h>
#include <asm/alternative.h>
#include <asm/arch_gicv3.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
#include <asm/exec.h>
#include <asm/fpsimd.h>
#include <asm/mmu_context.h>
#include <asm/mte.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
#include <asm/stacktrace.h>
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
/*
* Function pointers to optional machine specific functions
*/
void (*pm_power_off)(void);
EXPORT_SYMBOL_GPL(pm_power_off);
void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
static void noinstr __cpu_do_idle(void)
{
dsb(sy);
wfi();
}
static void noinstr __cpu_do_idle_irqprio(void)
{
unsigned long pmr;
unsigned long daif_bits;
daif_bits = read_sysreg(daif);
write_sysreg(daif_bits | PSR_I_BIT, daif);
/*
* Unmask PMR before going idle to make sure interrupts can
* be raised.
*/
pmr = gic_read_pmr();
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
__cpu_do_idle();
gic_write_pmr(pmr);
write_sysreg(daif_bits, daif);
}
/*
* cpu_do_idle()
*
* Idle the processor (wait for interrupt).
*
* If the CPU supports priority masking we must do additional work to
* ensure that interrupts are not masked at the PMR (because the core will
* not wake up if we block the wake up signal in the interrupt controller).
*/
void noinstr cpu_do_idle(void)
{
if (system_uses_irq_prio_masking())
__cpu_do_idle_irqprio();
else
__cpu_do_idle();
}
/*
* This is our default idle handler.
*/
void noinstr arch_cpu_idle(void)
{
/*
* This should do all the clock switching and wait for interrupt
* tricks
*/
cpu_do_idle();
raw_local_irq_enable();
}
#ifdef CONFIG_HOTPLUG_CPU
void arch_cpu_idle_dead(void)
{
cpu_die();
}
#endif
/*
* Called by kexec, immediately prior to machine_kexec().
*
* This must completely disable all secondary CPUs; simply causing those CPUs
* to execute e.g. a RAM-based pin loop is not sufficient. This allows the
* kexec'd kernel to use any and all RAM as it sees fit, without having to
* avoid any code or data used by any SW CPU pin loop. The CPU hotplug
* functionality embodied in smpt_shutdown_nonboot_cpus() to achieve this.
*/
void machine_shutdown(void)
{
smp_shutdown_nonboot_cpus(reboot_cpu);
}
/*
* Halting simply requires that the secondary CPUs stop performing any
* activity (executing tasks, handling interrupts). smp_send_stop()
* achieves this.
*/
void machine_halt(void)
{
local_irq_disable();
smp_send_stop();
while (1);
}
/*
* Power-off simply requires that the secondary CPUs stop performing any
* activity (executing tasks, handling interrupts). smp_send_stop()
* achieves this. When the system power is turned off, it will take all CPUs
* with it.
*/
void machine_power_off(void)
{
local_irq_disable();
smp_send_stop();
if (pm_power_off)
pm_power_off();
}
/*
* Restart requires that the secondary CPUs stop performing any activity
* while the primary CPU resets the system. Systems with multiple CPUs must
* provide a HW restart implementation, to ensure that all CPUs reset at once.
* This is required so that any code running after reset on the primary CPU
* doesn't have to co-ordinate with other CPUs to ensure they aren't still
* executing pre-reset code, and using RAM that the primary CPU's code wishes
* to use. Implementing such co-ordination would be essentially impossible.
*/
void machine_restart(char *cmd)
{
/* Disable interrupts first */
local_irq_disable();
smp_send_stop();
/*
* UpdateCapsule() depends on the system being reset via
* ResetSystem().
*/
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_reboot(reboot_mode, NULL);
/* Now call the architecture specific reboot code. */
if (arm_pm_restart)
arm_pm_restart(reboot_mode, cmd);
else
do_kernel_restart(cmd);
/*
* Whoops - the architecture was unable to reboot.
*/
printk("Reboot failed -- System halted\n");
while (1);
}
#define bstr(suffix, str) [PSR_BTYPE_ ## suffix >> PSR_BTYPE_SHIFT] = str
static const char *const btypes[] = {
bstr(NONE, "--"),
bstr( JC, "jc"),
bstr( C, "-c"),
bstr( J , "j-")
};
#undef bstr
static void print_pstate(struct pt_regs *regs)
{
u64 pstate = regs->pstate;
if (compat_user_mode(regs)) {
printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c)\n",
pstate,
pstate & PSR_AA32_N_BIT ? 'N' : 'n',
pstate & PSR_AA32_Z_BIT ? 'Z' : 'z',
pstate & PSR_AA32_C_BIT ? 'C' : 'c',
pstate & PSR_AA32_V_BIT ? 'V' : 'v',
pstate & PSR_AA32_Q_BIT ? 'Q' : 'q',
pstate & PSR_AA32_T_BIT ? "T32" : "A32",
pstate & PSR_AA32_E_BIT ? "BE" : "LE",
pstate & PSR_AA32_A_BIT ? 'A' : 'a',
pstate & PSR_AA32_I_BIT ? 'I' : 'i',
pstate & PSR_AA32_F_BIT ? 'F' : 'f');
} else {
const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >>
PSR_BTYPE_SHIFT];
printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n",
pstate,
pstate & PSR_N_BIT ? 'N' : 'n',
pstate & PSR_Z_BIT ? 'Z' : 'z',
pstate & PSR_C_BIT ? 'C' : 'c',
pstate & PSR_V_BIT ? 'V' : 'v',
pstate & PSR_D_BIT ? 'D' : 'd',
pstate & PSR_A_BIT ? 'A' : 'a',
pstate & PSR_I_BIT ? 'I' : 'i',
pstate & PSR_F_BIT ? 'F' : 'f',
pstate & PSR_PAN_BIT ? '+' : '-',
pstate & PSR_UAO_BIT ? '+' : '-',
pstate & PSR_TCO_BIT ? '+' : '-',
btype_str);
}
}
void __show_regs(struct pt_regs *regs)
{
int i, top_reg;
u64 lr, sp;
if (compat_user_mode(regs)) {
lr = regs->compat_lr;
sp = regs->compat_sp;
top_reg = 12;
} else {
lr = regs->regs[30];
sp = regs->sp;
top_reg = 29;
}
show_regs_print_info(KERN_DEFAULT);
print_pstate(regs);
if (!user_mode(regs)) {
printk("pc : %pS\n", (void *)regs->pc);
printk("lr : %pS\n", (void *)ptrauth_strip_insn_pac(lr));
} else {
printk("pc : %016llx\n", regs->pc);
printk("lr : %016llx\n", lr);
}
printk("sp : %016llx\n", sp);
if (system_uses_irq_prio_masking())
printk("pmr_save: %08llx\n", regs->pmr_save);
i = top_reg;
while (i >= 0) {
printk("x%-2d: %016llx ", i, regs->regs[i]);
i--;
if (i % 2 == 0) {
pr_cont("x%-2d: %016llx ", i, regs->regs[i]);
i--;
}
pr_cont("\n");
}
}
void show_regs(struct pt_regs * regs)
{
__show_regs(regs);
dump_backtrace(regs, NULL, KERN_DEFAULT);
}
static void tls_thread_flush(void)
{
write_sysreg(0, tpidr_el0);
if (is_compat_task()) {
current->thread.uw.tp_value = 0;
/*
* We need to ensure ordering between the shadow state and the
* hardware state, so that we don't corrupt the hardware state
* with a stale shadow state during context switch.
*/
barrier();
write_sysreg(0, tpidrro_el0);
}
}
static void flush_tagged_addr_state(void)
{
if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI))
clear_thread_flag(TIF_TAGGED_ADDR);
}
void flush_thread(void)
{
fpsimd_flush_thread();
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
flush_mte_state();
}
void release_thread(struct task_struct *dead_task)
{
}
void arch_release_task_struct(struct task_struct *tsk)
{
fpsimd_release_task(tsk);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
if (current->mm)
fpsimd_preserve_current_state();
*dst = *src;
/* We rely on the above assignment to initialize dst's thread_flags: */
BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK));
/*
* Detach src's sve_state (if any) from dst so that it does not
* get erroneously used or freed prematurely. dst's sve_state
* will be allocated on demand later on if dst uses SVE.
* For consistency, also clear TIF_SVE here: this could be done
* later in copy_process(), but to avoid tripping up future
* maintainers it is best not to leave TIF_SVE and sve_state in
* an inconsistent state, even temporarily.
*/
dst->thread.sve_state = NULL;
clear_tsk_thread_flag(dst, TIF_SVE);
/* clear any pending asynchronous tag fault raised by the parent */
clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
return 0;
}
asmlinkage void ret_from_fork(void) asm("ret_from_fork");
int copy_thread(unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p, unsigned long tls)
{
struct pt_regs *childregs = task_pt_regs(p);
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
/*
* In case p was allocated the same task_struct pointer as some
* other recently-exited task, make sure p is disassociated from
* any cpu that may have run that now-exited task recently.
* Otherwise we could erroneously skip reloading the FPSIMD
* registers for p.
*/
fpsimd_flush_task_state(p);
ptrauth_thread_init_kernel(p);
if (likely(!(p->flags & PF_KTHREAD))) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
/*
* Read the current TLS pointer from tpidr_el0 as it may be
* out-of-sync with the saved value.
*/
*task_user_tls(p) = read_sysreg(tpidr_el0);
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
childregs->compat_sp = stack_start;
else
childregs->sp = stack_start;
}
/*
* If a TLS pointer was passed to clone, use it for the new
* thread.
*/
if (clone_flags & CLONE_SETTLS)
p->thread.uw.tp_value = tls;
} else {
/*
* A kthread has no context to ERET to, so ensure any buggy
* ERET is treated as an illegal exception return.
*
* When a user task is created from a kthread, childregs will
* be initialized by start_thread() or start_compat_thread().
*/
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
p->thread.cpu_context.x19 = stack_start;
p->thread.cpu_context.x20 = stk_sz;
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
ptrace_hw_copy_thread(p);
return 0;
}
void tls_preserve_current_state(void)
{
*task_user_tls(current) = read_sysreg(tpidr_el0);
}
static void tls_thread_switch(struct task_struct *next)
{
tls_preserve_current_state();
if (is_compat_thread(task_thread_info(next)))
write_sysreg(next->thread.uw.tp_value, tpidrro_el0);
else if (!arm64_kernel_unmapped_at_el0())
write_sysreg(0, tpidrro_el0);
write_sysreg(*task_user_tls(next), tpidr_el0);
}
/*
* Force SSBS state on context-switch, since it may be lost after migrating
* from a CPU which treats the bit as RES0 in a heterogeneous system.
*/
static void ssbs_thread_switch(struct task_struct *next)
{
/*
* Nothing to do for kernel threads, but 'regs' may be junk
* (e.g. idle task) so check the flags and bail early.
*/
if (unlikely(next->flags & PF_KTHREAD))
return;
/*
* If all CPUs implement the SSBS extension, then we just need to
* context-switch the PSTATE field.
*/
if (cpus_have_const_cap(ARM64_SSBS))
return;
spectre_v4_enable_task_mitigation(next);
}
/*
* We store our current task in sp_el0, which is clobbered by userspace. Keep a
* shadow copy so that we can restore this upon entry from userspace.
*
* This is *only* for exception entry from EL0, and is not valid until we
* __switch_to() a user task.
*/
DEFINE_PER_CPU(struct task_struct *, __entry_task);
static void entry_task_switch(struct task_struct *next)
{
__this_cpu_write(__entry_task, next);
}
/*
* ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
* Assuming the virtual counter is enabled at the beginning of times:
*
* - disable access when switching from a 64bit task to a 32bit task
* - enable access when switching from a 32bit task to a 64bit task
*/
static void erratum_1418040_thread_switch(struct task_struct *prev,
struct task_struct *next)
{
bool prev32, next32;
u64 val;
if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040))
return;
prev32 = is_compat_thread(task_thread_info(prev));
next32 = is_compat_thread(task_thread_info(next));
if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
return;
val = read_sysreg(cntkctl_el1);
if (!next32)
val |= ARCH_TIMER_USR_VCT_ACCESS_EN;
else
val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN;
write_sysreg(val, cntkctl_el1);
}
/*
* Thread switching.
*/
__notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next)
{
struct task_struct *last;
fpsimd_thread_switch(next);
tls_thread_switch(next);
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
entry_task_switch(next);
ssbs_thread_switch(next);
erratum_1418040_thread_switch(prev, next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
* the thread migrates to a different CPU.
* This full barrier is also required by the membarrier system
* call.
*/
dsb(ish);
/*
* MTE thread switching must happen after the DSB above to ensure that
* any asynchronous tag check faults have been logged in the TFSR*_EL1
* registers.
*/
mte_thread_switch(next);
/* the actual thread switch */
last = cpu_switch_to(prev, next);
return last;
}
unsigned long get_wchan(struct task_struct *p)
{
struct stackframe frame;
unsigned long stack_page, ret = 0;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
stack_page = (unsigned long)try_get_task_stack(p);
if (!stack_page)
return 0;
start_backtrace(&frame, thread_saved_fp(p), thread_saved_pc(p));
do {
if (unwind_frame(p, &frame))
goto out;
if (!in_sched_functions(frame.pc)) {
ret = frame.pc;
goto out;
}
} while (count ++ < 16);
out:
put_task_stack(p);
return ret;
}
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
sp -= get_random_int() & ~PAGE_MASK;
return sp & ~0xf;
}
/*
* Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
*/
void arch_setup_new_exec(void)
{
current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
ptrauth_thread_init_user(current);
if (task_spec_ssb_noexec(current)) {
arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
PR_SPEC_ENABLE);
}
}
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
/*
* Control the relaxed ABI allowing tagged user addresses into the kernel.
*/
static unsigned int tagged_addr_disabled;
long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
{
unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
struct thread_info *ti = task_thread_info(task);
if (is_compat_thread(ti))
return -EINVAL;
if (system_supports_mte())
valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
if (arg & ~valid_mask)
return -EINVAL;
/*
* Do not allow the enabling of the tagged address ABI if globally
* disabled via sysctl abi.tagged_addr_disabled.
*/
if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
return -EINVAL;
if (set_mte_ctrl(task, arg) != 0)
return -EINVAL;
update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
return 0;
}
long get_tagged_addr_ctrl(struct task_struct *task)
{
long ret = 0;
struct thread_info *ti = task_thread_info(task);
if (is_compat_thread(ti))
return -EINVAL;
if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR))
ret = PR_TAGGED_ADDR_ENABLE;
ret |= get_mte_ctrl(task);
return ret;
}
/*
* Global sysctl to disable the tagged user addresses support. This control
* only prevents the tagged address ABI enabling via prctl() and does not
* disable it for tasks that already opted in to the relaxed ABI.
*/
static struct ctl_table tagged_addr_sysctl_table[] = {
{
.procname = "tagged_addr_disabled",
.mode = 0644,
.data = &tagged_addr_disabled,
.maxlen = sizeof(int),
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{ }
};
static int __init tagged_addr_init(void)
{
if (!register_sysctl("abi", tagged_addr_sysctl_table))
return -EINVAL;
return 0;
}
core_initcall(tagged_addr_init);
#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
asmlinkage void __sched arm64_preempt_schedule_irq(void)
{
lockdep_assert_irqs_disabled();
/*
* Preempting a task from an IRQ means we leave copies of PSTATE
* on the stack. cpufeature's enable calls may modify PSTATE, but
* resuming one of these preempted tasks would undo those changes.
*
* Only allow a task to be preempted once cpufeatures have been
* enabled.
*/
if (system_capabilities_finalized())
preempt_schedule_irq();
}
#ifdef CONFIG_BINFMT_ELF
int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
bool has_interp, bool is_interp)
{
/*
* For dynamically linked executables the interpreter is
* responsible for setting PROT_BTI on everything except
* itself.
*/
if (is_interp != has_interp)
return prot;
if (!(state->flags & ARM64_ELF_BTI))
return prot;
if (prot & PROT_EXEC)
prot |= PROT_BTI;
return prot;
}
#endif