18fd049731
- arm64 perf: DDR PMU driver for Alibaba's T-Head Yitian 710 SoC, SVE vector granule register added to the user regs together with SVE perf extensions documentation. - SVE updates: add HWCAP for SVE EBF16, update the SVE ABI documentation to match the actual kernel behaviour (zeroing the registers on syscall rather than "zeroed or preserved" previously). - More conversions to automatic system registers generation. - vDSO: use self-synchronising virtual counter access in gettimeofday() if the architecture supports it. - arm64 stacktrace cleanups and improvements. - arm64 atomics improvements: always inline assembly, remove LL/SC trampolines. - Improve the reporting of EL1 exceptions: rework BTI and FPAC exception handling, better EL1 undefs reporting. - Cortex-A510 erratum 2658417: remove BF16 support due to incorrect result. - arm64 defconfig updates: build CoreSight as a module, enable options necessary for docker, memory hotplug/hotremove, enable all PMUs provided by Arm. - arm64 ptrace() support for TPIDR2_EL0 (register provided with the SME extensions). - arm64 ftraces updates/fixes: fix module PLTs with mcount, remove unused function. - kselftest updates for arm64: simple HWCAP validation, FP stress test improvements, validation of ZA regs in signal handlers, include larger SVE and SME vector lengths in signal tests, various cleanups. - arm64 alternatives (code patching) improvements to robustness and consistency: replace cpucap static branches with equivalent alternatives, associate callback alternatives with a cpucap. - Miscellaneous updates: optimise kprobe performance of patching single-step slots, simplify uaccess_mask_ptr(), move MTE registers initialisation to C, support huge vmalloc() mappings, run softirqs on the per-CPU IRQ stack, compat (arm32) misalignment fixups for multiword accesses. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE5RElWfyWxS+3PLO2a9axLQDIXvEFAmM9W4cACgkQa9axLQDI XvEy3w/+LJ3KCFowWiz5gTAWikjv+UVssHjLMJixn47V7hsEFQ26Xnam/438rTMI kE95u6DHUpw2SMIxKzFRO7oI5cQtP+cWGwTtOUnjVO+U1oN+HqDOIbO9DbylWDcU eeeqMMmawMfTPuZrYklpOhXscsorbrKIvYBg7wHYOcwBYV3EPhWr89lwMvTVRuyJ qpX628KlkGMaBcONNhv3nS3qZcAOs0oHQCAVS4C8czLDL+vtJlumXUS3xr1Mqm72 xtFe7sje8Djr2kZ8mzh0GbFiZEBoBD3F/l7ayq8gVRaVpToUt8sk36Stjs4LojF1 6imuAfji/5TItkScq5KhGqj6MIugwp/eUVbRN74OLNTYx7msF1ZADNFQ+Q0UuY0H SYK13KvmOji0xjS8qAfhqrwNB79sk3fb+zF9LjETbdz4ZJCgg9gcFbSUTY0DvMfS MXZk/jVeB07olA8xYbjh0BRt4UV9xU628FPQzK5k7e4Nzl4jSvgtJZCZanfuVtjy /ZS1vbN8o7tQLBAlVnw+Exi/VedkKxkkMgm8tPKsMgERTFDx0Pc4Gs72hRpDnPWT MRbeCCGleAf3JQ5vF0coBDNOCEVvweQgShHOyHTz0GyhWXLCFx3RJICo5I4EIpps LLUk4JK0fO3LVrf1AEpu5ZP4+Sact0zfsH3gB7qyLPYFDmjDXD8= =jl3Z -----END PGP SIGNATURE----- Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux Pull arm64 updates from Catalin Marinas: - arm64 perf: DDR PMU driver for Alibaba's T-Head Yitian 710 SoC, SVE vector granule register added to the user regs together with SVE perf extensions documentation. - SVE updates: add HWCAP for SVE EBF16, update the SVE ABI documentation to match the actual kernel behaviour (zeroing the registers on syscall rather than "zeroed or preserved" previously). - More conversions to automatic system registers generation. - vDSO: use self-synchronising virtual counter access in gettimeofday() if the architecture supports it. - arm64 stacktrace cleanups and improvements. - arm64 atomics improvements: always inline assembly, remove LL/SC trampolines. - Improve the reporting of EL1 exceptions: rework BTI and FPAC exception handling, better EL1 undefs reporting. - Cortex-A510 erratum 2658417: remove BF16 support due to incorrect result. - arm64 defconfig updates: build CoreSight as a module, enable options necessary for docker, memory hotplug/hotremove, enable all PMUs provided by Arm. - arm64 ptrace() support for TPIDR2_EL0 (register provided with the SME extensions). - arm64 ftraces updates/fixes: fix module PLTs with mcount, remove unused function. - kselftest updates for arm64: simple HWCAP validation, FP stress test improvements, validation of ZA regs in signal handlers, include larger SVE and SME vector lengths in signal tests, various cleanups. - arm64 alternatives (code patching) improvements to robustness and consistency: replace cpucap static branches with equivalent alternatives, associate callback alternatives with a cpucap. - Miscellaneous updates: optimise kprobe performance of patching single-step slots, simplify uaccess_mask_ptr(), move MTE registers initialisation to C, support huge vmalloc() mappings, run softirqs on the per-CPU IRQ stack, compat (arm32) misalignment fixups for multiword accesses. * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (126 commits) arm64: alternatives: Use vdso/bits.h instead of linux/bits.h arm64/kprobe: Optimize the performance of patching single-step slot arm64: defconfig: Add Coresight as module kselftest/arm64: Handle EINTR while reading data from children kselftest/arm64: Flag fp-stress as exiting when we begin finishing up kselftest/arm64: Don't repeat termination handler for fp-stress ARM64: reloc_test: add __init/__exit annotations to module init/exit funcs arm64/mm: fold check for KFENCE into can_set_direct_map() arm64: ftrace: fix module PLTs with mcount arm64: module: Remove unused plt_entry_is_initialized() arm64: module: Make plt_equals_entry() static arm64: fix the build with binutils 2.27 kselftest/arm64: Don't enable v8.5 for MTE selftest builds arm64: uaccess: simplify uaccess_mask_ptr() arm64: asm/perf_regs.h: Avoid C++-style comment in UAPI header kselftest/arm64: Fix typo in hwcap check arm64: mte: move register initialization to C arm64: mm: handle ARM64_KERNEL_USES_PMD_MAPS in vmemmap_populate() arm64: dma: Drop cache invalidation from arch_dma_prep_coherent() arm64/sve: Add Perf extensions documentation ...
329 lines
9.0 KiB
C
329 lines
9.0 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* arch/arm64/kernel/ftrace.c
|
|
*
|
|
* Copyright (C) 2013 Linaro Limited
|
|
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
|
|
*/
|
|
|
|
#include <linux/ftrace.h>
|
|
#include <linux/module.h>
|
|
#include <linux/swab.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/debug-monitors.h>
|
|
#include <asm/ftrace.h>
|
|
#include <asm/insn.h>
|
|
#include <asm/patching.h>
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
/*
|
|
* Replace a single instruction, which may be a branch or NOP.
|
|
* If @validate == true, a replaced instruction is checked against 'old'.
|
|
*/
|
|
static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
|
|
bool validate)
|
|
{
|
|
u32 replaced;
|
|
|
|
/*
|
|
* Note:
|
|
* We are paranoid about modifying text, as if a bug were to happen, it
|
|
* could cause us to read or write to someplace that could cause harm.
|
|
* Carefully read and modify the code with aarch64_insn_*() which uses
|
|
* probe_kernel_*(), and make sure what we read is what we expected it
|
|
* to be before modifying it.
|
|
*/
|
|
if (validate) {
|
|
if (aarch64_insn_read((void *)pc, &replaced))
|
|
return -EFAULT;
|
|
|
|
if (replaced != old)
|
|
return -EINVAL;
|
|
}
|
|
if (aarch64_insn_patch_text_nosync((void *)pc, new))
|
|
return -EPERM;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Replace tracer function in ftrace_caller()
|
|
*/
|
|
int ftrace_update_ftrace_func(ftrace_func_t func)
|
|
{
|
|
unsigned long pc;
|
|
u32 new;
|
|
|
|
pc = (unsigned long)ftrace_call;
|
|
new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func,
|
|
AARCH64_INSN_BRANCH_LINK);
|
|
|
|
return ftrace_modify_code(pc, 0, new, false);
|
|
}
|
|
|
|
static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
|
|
{
|
|
#ifdef CONFIG_ARM64_MODULE_PLTS
|
|
struct plt_entry *plt = mod->arch.ftrace_trampolines;
|
|
|
|
if (addr == FTRACE_ADDR)
|
|
return &plt[FTRACE_PLT_IDX];
|
|
if (addr == FTRACE_REGS_ADDR &&
|
|
IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
|
|
return &plt[FTRACE_REGS_PLT_IDX];
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Find the address the callsite must branch to in order to reach '*addr'.
|
|
*
|
|
* Due to the limited range of 'BL' instructions, modules may be placed too far
|
|
* away to branch directly and must use a PLT.
|
|
*
|
|
* Returns true when '*addr' contains a reachable target address, or has been
|
|
* modified to contain a PLT address. Returns false otherwise.
|
|
*/
|
|
static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
|
|
struct module *mod,
|
|
unsigned long *addr)
|
|
{
|
|
unsigned long pc = rec->ip;
|
|
long offset = (long)*addr - (long)pc;
|
|
struct plt_entry *plt;
|
|
|
|
/*
|
|
* When the target is within range of the 'BL' instruction, use 'addr'
|
|
* as-is and branch to that directly.
|
|
*/
|
|
if (offset >= -SZ_128M && offset < SZ_128M)
|
|
return true;
|
|
|
|
/*
|
|
* When the target is outside of the range of a 'BL' instruction, we
|
|
* must use a PLT to reach it. We can only place PLTs for modules, and
|
|
* only when module PLT support is built-in.
|
|
*/
|
|
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
|
return false;
|
|
|
|
/*
|
|
* 'mod' is only set at module load time, but if we end up
|
|
* dealing with an out-of-range condition, we can assume it
|
|
* is due to a module being loaded far away from the kernel.
|
|
*
|
|
* NOTE: __module_text_address() must be called with preemption
|
|
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
|
|
* retains its validity throughout the remainder of this code.
|
|
*/
|
|
if (!mod) {
|
|
preempt_disable();
|
|
mod = __module_text_address(pc);
|
|
preempt_enable();
|
|
}
|
|
|
|
if (WARN_ON(!mod))
|
|
return false;
|
|
|
|
plt = get_ftrace_plt(mod, *addr);
|
|
if (!plt) {
|
|
pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
|
|
return false;
|
|
}
|
|
|
|
*addr = (unsigned long)plt;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Turn on the call to ftrace_caller() in instrumented function
|
|
*/
|
|
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
|
{
|
|
unsigned long pc = rec->ip;
|
|
u32 old, new;
|
|
|
|
if (!ftrace_find_callable_addr(rec, NULL, &addr))
|
|
return -EINVAL;
|
|
|
|
old = aarch64_insn_gen_nop();
|
|
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
|
|
|
return ftrace_modify_code(pc, old, new, true);
|
|
}
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
|
|
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
|
|
unsigned long addr)
|
|
{
|
|
unsigned long pc = rec->ip;
|
|
u32 old, new;
|
|
|
|
if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
|
|
return -EINVAL;
|
|
if (!ftrace_find_callable_addr(rec, NULL, &addr))
|
|
return -EINVAL;
|
|
|
|
old = aarch64_insn_gen_branch_imm(pc, old_addr,
|
|
AARCH64_INSN_BRANCH_LINK);
|
|
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
|
|
|
return ftrace_modify_code(pc, old, new, true);
|
|
}
|
|
|
|
/*
|
|
* The compiler has inserted two NOPs before the regular function prologue.
|
|
* All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live,
|
|
* and x9-x18 are free for our use.
|
|
*
|
|
* At runtime we want to be able to swing a single NOP <-> BL to enable or
|
|
* disable the ftrace call. The BL requires us to save the original LR value,
|
|
* so here we insert a <MOV X9, LR> over the first NOP so the instructions
|
|
* before the regular prologue are:
|
|
*
|
|
* | Compiled | Disabled | Enabled |
|
|
* +----------+------------+------------+
|
|
* | NOP | MOV X9, LR | MOV X9, LR |
|
|
* | NOP | NOP | BL <entry> |
|
|
*
|
|
* The LR value will be recovered by ftrace_regs_entry, and restored into LR
|
|
* before returning to the regular function prologue. When a function is not
|
|
* being traced, the MOV is not harmful given x9 is not live per the AAPCS.
|
|
*
|
|
* Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of
|
|
* the BL.
|
|
*/
|
|
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
|
|
{
|
|
unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
|
|
u32 old, new;
|
|
|
|
old = aarch64_insn_gen_nop();
|
|
new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
|
|
AARCH64_INSN_REG_LR,
|
|
AARCH64_INSN_VARIANT_64BIT);
|
|
return ftrace_modify_code(pc, old, new, true);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Turn off the call to ftrace_caller() in instrumented function
|
|
*/
|
|
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
|
|
unsigned long addr)
|
|
{
|
|
unsigned long pc = rec->ip;
|
|
u32 old = 0, new;
|
|
|
|
new = aarch64_insn_gen_nop();
|
|
|
|
/*
|
|
* When using mcount, callsites in modules may have been initalized to
|
|
* call an arbitrary module PLT (which redirects to the _mcount stub)
|
|
* rather than the ftrace PLT we'll use at runtime (which redirects to
|
|
* the ftrace trampoline). We can ignore the old PLT when initializing
|
|
* the callsite.
|
|
*
|
|
* Note: 'mod' is only set at module load time.
|
|
*/
|
|
if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
|
|
IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) {
|
|
return aarch64_insn_patch_text_nosync((void *)pc, new);
|
|
}
|
|
|
|
if (!ftrace_find_callable_addr(rec, mod, &addr))
|
|
return -EINVAL;
|
|
|
|
old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
|
|
|
return ftrace_modify_code(pc, old, new, true);
|
|
}
|
|
|
|
void arch_ftrace_update_code(int command)
|
|
{
|
|
command |= FTRACE_MAY_SLEEP;
|
|
ftrace_modify_all_code(command);
|
|
}
|
|
#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
/*
|
|
* function_graph tracer expects ftrace_return_to_handler() to be called
|
|
* on the way back to parent. For this purpose, this function is called
|
|
* in _mcount() or ftrace_caller() to replace return address (*parent) on
|
|
* the call stack to return_to_handler.
|
|
*/
|
|
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
|
|
unsigned long frame_pointer)
|
|
{
|
|
unsigned long return_hooker = (unsigned long)&return_to_handler;
|
|
unsigned long old;
|
|
|
|
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
|
return;
|
|
|
|
/*
|
|
* Note:
|
|
* No protection against faulting at *parent, which may be seen
|
|
* on other archs. It's unlikely on AArch64.
|
|
*/
|
|
old = *parent;
|
|
|
|
if (!function_graph_enter(old, self_addr, frame_pointer,
|
|
(void *)frame_pointer)) {
|
|
*parent = return_hooker;
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
|
|
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
|
struct ftrace_ops *op, struct ftrace_regs *fregs)
|
|
{
|
|
/*
|
|
* When DYNAMIC_FTRACE_WITH_REGS is selected, `fregs` can never be NULL
|
|
* and arch_ftrace_get_regs(fregs) will always give a non-NULL pt_regs
|
|
* in which we can safely modify the LR.
|
|
*/
|
|
struct pt_regs *regs = arch_ftrace_get_regs(fregs);
|
|
unsigned long *parent = (unsigned long *)&procedure_link_pointer(regs);
|
|
|
|
prepare_ftrace_return(ip, parent, frame_pointer(regs));
|
|
}
|
|
#else
|
|
/*
|
|
* Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
|
|
* depending on @enable.
|
|
*/
|
|
static int ftrace_modify_graph_caller(bool enable)
|
|
{
|
|
unsigned long pc = (unsigned long)&ftrace_graph_call;
|
|
u32 branch, nop;
|
|
|
|
branch = aarch64_insn_gen_branch_imm(pc,
|
|
(unsigned long)ftrace_graph_caller,
|
|
AARCH64_INSN_BRANCH_NOLINK);
|
|
nop = aarch64_insn_gen_nop();
|
|
|
|
if (enable)
|
|
return ftrace_modify_code(pc, nop, branch, true);
|
|
else
|
|
return ftrace_modify_code(pc, branch, nop, true);
|
|
}
|
|
|
|
int ftrace_enable_ftrace_graph_caller(void)
|
|
{
|
|
return ftrace_modify_graph_caller(true);
|
|
}
|
|
|
|
int ftrace_disable_ftrace_graph_caller(void)
|
|
{
|
|
return ftrace_modify_graph_caller(false);
|
|
}
|
|
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
|
|
#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|