c6d3cd32fd
When CONFIG_FUNCTION_GRAPH_TRACER is selected and the function graph tracer is in use, unwind_frame() may erroneously associate a traced function with an incorrect return address. This can happen when starting an unwind from a pt_regs, or when unwinding across an exception boundary. This can be seen when recording with perf while the function graph tracer is in use. For example: | # echo function_graph > /sys/kernel/debug/tracing/current_tracer | # perf record -g -e raw_syscalls:sys_enter:k /bin/true | # perf report ... reports the callchain erroneously as: | el0t_64_sync | el0t_64_sync_handler | el0_svc_common.constprop.0 | perf_callchain | get_perf_callchain | syscall_trace_enter | syscall_trace_enter ... whereas when the function graph tracer is not in use, it reports: | el0t_64_sync | el0t_64_sync_handler | el0_svc | do_el0_svc | el0_svc_common.constprop.0 | syscall_trace_enter | syscall_trace_enter The underlying problem is that ftrace_graph_get_ret_stack() takes an index offset from the most recent entry added to the fgraph return stack. We start an unwind at offset 0, and increment the offset each time we encounter a rewritten return address (i.e. when we see `return_to_handler`). This is broken in two cases: 1) Between creating a pt_regs and starting the unwind, function calls may place entries on the stack, leaving an arbitrary offset which we can only determine by performing a full unwind from the caller of the unwind code (and relying on none of the unwind code being instrumented). This can result in erroneous entries being reported in a backtrace recorded by perf or kfence when the function graph tracer is in use. Currently show_regs() is unaffected as dump_backtrace() performs an initial unwind. 2) When unwinding across an exception boundary (whether continuing an unwind or starting a new unwind from regs), we currently always skip the LR of the interrupted context. Where this was live and contained a rewritten address, we won't consume the corresponding fgraph ret stack entry, leaving subsequent entries off-by-one. This can result in erroneous entries being reported in a backtrace performed by any in-kernel unwinder when that backtrace crosses an exception boundary, with entries after the boundary being reported incorrectly. This includes perf, kfence, show_regs(), panic(), etc. To fix this, we need to be able to uniquely identify each rewritten return address such that we can map this back to the original return address. We can use HAVE_FUNCTION_GRAPH_RET_ADDR_PTR to associate each rewritten return address with a unique location on the stack. As the return address is passed in the LR (and so is not guaranteed a unique location in memory), we use the FP upon entry to the function (i.e. the address of the caller's frame record) as the return address pointer. Any nested call will have a different FP value as the caller must create its own frame record and update FP to point to this. Since ftrace_graph_ret_addr() requires the return address with the PAC stripped, the stripping of the PAC is moved before the fixup of the rewritten address. As we would unconditionally strip the PAC, moving this earlier is not harmful, and we can avoid a redundant strip in the return address fixup code. I've tested this with the perf case above, the ftrace selftests, and a number of ad-hoc unwinder tests. The tests all pass, and I have seen no unexpected behaviour as a result of this change. I've tested with pointer authentication under QEMU TCG where magic-sysrq+l correctly recovers the original return addresses. Note that this doesn't fix the issue of skipping a live LR at an exception boundary, which is a more general problem and requires more substantial rework. Were we to consume the LR in all cases this would result in warnings where the interrupted context's LR contains `return_to_handler`, but the FP has been altered, e.g. | func: | <--- ftrace entry ---> // logs FP & LR, rewrites LR | STP FP, LR, [SP, #-16]! | MOV FP, SP | <--- INTERRUPT ---> ... as ftrace_graph_get_ret_stack() fill not find a matching entry, triggering the WARN_ON_ONCE() in unwind_frame(). Link: https://lore.kernel.org/r/20211025164925.GB2001@C02TD0UTHF1T.local Link: https://lore.kernel.org/r/20211027132529.30027-1-mark.rutland@arm.com Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com> Cc: Mark Brown <broonie@kernel.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Will Deacon <will@kernel.org> Reviewed-by: Mark Brown <broonie@kernel.org> Link: https://lore.kernel.org/r/20211029162245.39761-1-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org>
302 lines
8.0 KiB
C
302 lines
8.0 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* arch/arm64/kernel/ftrace.c
|
|
*
|
|
* Copyright (C) 2013 Linaro Limited
|
|
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
|
|
*/
|
|
|
|
#include <linux/ftrace.h>
|
|
#include <linux/module.h>
|
|
#include <linux/swab.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/debug-monitors.h>
|
|
#include <asm/ftrace.h>
|
|
#include <asm/insn.h>
|
|
#include <asm/patching.h>
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
/*
|
|
* Replace a single instruction, which may be a branch or NOP.
|
|
* If @validate == true, a replaced instruction is checked against 'old'.
|
|
*/
|
|
static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
|
|
bool validate)
|
|
{
|
|
u32 replaced;
|
|
|
|
/*
|
|
* Note:
|
|
* We are paranoid about modifying text, as if a bug were to happen, it
|
|
* could cause us to read or write to someplace that could cause harm.
|
|
* Carefully read and modify the code with aarch64_insn_*() which uses
|
|
* probe_kernel_*(), and make sure what we read is what we expected it
|
|
* to be before modifying it.
|
|
*/
|
|
if (validate) {
|
|
if (aarch64_insn_read((void *)pc, &replaced))
|
|
return -EFAULT;
|
|
|
|
if (replaced != old)
|
|
return -EINVAL;
|
|
}
|
|
if (aarch64_insn_patch_text_nosync((void *)pc, new))
|
|
return -EPERM;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Replace tracer function in ftrace_caller()
|
|
*/
|
|
int ftrace_update_ftrace_func(ftrace_func_t func)
|
|
{
|
|
unsigned long pc;
|
|
u32 new;
|
|
|
|
pc = (unsigned long)function_nocfi(ftrace_call);
|
|
new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func,
|
|
AARCH64_INSN_BRANCH_LINK);
|
|
|
|
return ftrace_modify_code(pc, 0, new, false);
|
|
}
|
|
|
|
static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
|
|
{
|
|
#ifdef CONFIG_ARM64_MODULE_PLTS
|
|
struct plt_entry *plt = mod->arch.ftrace_trampolines;
|
|
|
|
if (addr == FTRACE_ADDR)
|
|
return &plt[FTRACE_PLT_IDX];
|
|
if (addr == FTRACE_REGS_ADDR &&
|
|
IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
|
|
return &plt[FTRACE_REGS_PLT_IDX];
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Turn on the call to ftrace_caller() in instrumented function
|
|
*/
|
|
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
|
{
|
|
unsigned long pc = rec->ip;
|
|
u32 old, new;
|
|
long offset = (long)pc - (long)addr;
|
|
|
|
if (offset < -SZ_128M || offset >= SZ_128M) {
|
|
struct module *mod;
|
|
struct plt_entry *plt;
|
|
|
|
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* On kernels that support module PLTs, the offset between the
|
|
* branch instruction and its target may legally exceed the
|
|
* range of an ordinary relative 'bl' opcode. In this case, we
|
|
* need to branch via a trampoline in the module.
|
|
*
|
|
* NOTE: __module_text_address() must be called with preemption
|
|
* disabled, but we can rely on ftrace_lock to ensure that 'mod'
|
|
* retains its validity throughout the remainder of this code.
|
|
*/
|
|
preempt_disable();
|
|
mod = __module_text_address(pc);
|
|
preempt_enable();
|
|
|
|
if (WARN_ON(!mod))
|
|
return -EINVAL;
|
|
|
|
plt = get_ftrace_plt(mod, addr);
|
|
if (!plt) {
|
|
pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
|
|
return -EINVAL;
|
|
}
|
|
|
|
addr = (unsigned long)plt;
|
|
}
|
|
|
|
old = aarch64_insn_gen_nop();
|
|
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
|
|
|
return ftrace_modify_code(pc, old, new, true);
|
|
}
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
|
|
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
|
|
unsigned long addr)
|
|
{
|
|
unsigned long pc = rec->ip;
|
|
u32 old, new;
|
|
|
|
old = aarch64_insn_gen_branch_imm(pc, old_addr,
|
|
AARCH64_INSN_BRANCH_LINK);
|
|
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
|
|
|
|
return ftrace_modify_code(pc, old, new, true);
|
|
}
|
|
|
|
/*
|
|
* The compiler has inserted two NOPs before the regular function prologue.
|
|
* All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live,
|
|
* and x9-x18 are free for our use.
|
|
*
|
|
* At runtime we want to be able to swing a single NOP <-> BL to enable or
|
|
* disable the ftrace call. The BL requires us to save the original LR value,
|
|
* so here we insert a <MOV X9, LR> over the first NOP so the instructions
|
|
* before the regular prologue are:
|
|
*
|
|
* | Compiled | Disabled | Enabled |
|
|
* +----------+------------+------------+
|
|
* | NOP | MOV X9, LR | MOV X9, LR |
|
|
* | NOP | NOP | BL <entry> |
|
|
*
|
|
* The LR value will be recovered by ftrace_regs_entry, and restored into LR
|
|
* before returning to the regular function prologue. When a function is not
|
|
* being traced, the MOV is not harmful given x9 is not live per the AAPCS.
|
|
*
|
|
* Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of
|
|
* the BL.
|
|
*/
|
|
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
|
|
{
|
|
unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
|
|
u32 old, new;
|
|
|
|
old = aarch64_insn_gen_nop();
|
|
new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
|
|
AARCH64_INSN_REG_LR,
|
|
AARCH64_INSN_VARIANT_64BIT);
|
|
return ftrace_modify_code(pc, old, new, true);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Turn off the call to ftrace_caller() in instrumented function
|
|
*/
|
|
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
|
|
unsigned long addr)
|
|
{
|
|
unsigned long pc = rec->ip;
|
|
bool validate = true;
|
|
u32 old = 0, new;
|
|
long offset = (long)pc - (long)addr;
|
|
|
|
if (offset < -SZ_128M || offset >= SZ_128M) {
|
|
u32 replaced;
|
|
|
|
if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* 'mod' is only set at module load time, but if we end up
|
|
* dealing with an out-of-range condition, we can assume it
|
|
* is due to a module being loaded far away from the kernel.
|
|
*/
|
|
if (!mod) {
|
|
preempt_disable();
|
|
mod = __module_text_address(pc);
|
|
preempt_enable();
|
|
|
|
if (WARN_ON(!mod))
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* The instruction we are about to patch may be a branch and
|
|
* link instruction that was redirected via a PLT entry. In
|
|
* this case, the normal validation will fail, but we can at
|
|
* least check that we are dealing with a branch and link
|
|
* instruction that points into the right module.
|
|
*/
|
|
if (aarch64_insn_read((void *)pc, &replaced))
|
|
return -EFAULT;
|
|
|
|
if (!aarch64_insn_is_bl(replaced) ||
|
|
!within_module(pc + aarch64_get_branch_offset(replaced),
|
|
mod))
|
|
return -EINVAL;
|
|
|
|
validate = false;
|
|
} else {
|
|
old = aarch64_insn_gen_branch_imm(pc, addr,
|
|
AARCH64_INSN_BRANCH_LINK);
|
|
}
|
|
|
|
new = aarch64_insn_gen_nop();
|
|
|
|
return ftrace_modify_code(pc, old, new, validate);
|
|
}
|
|
|
|
void arch_ftrace_update_code(int command)
|
|
{
|
|
command |= FTRACE_MAY_SLEEP;
|
|
ftrace_modify_all_code(command);
|
|
}
|
|
#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
/*
|
|
* function_graph tracer expects ftrace_return_to_handler() to be called
|
|
* on the way back to parent. For this purpose, this function is called
|
|
* in _mcount() or ftrace_caller() to replace return address (*parent) on
|
|
* the call stack to return_to_handler.
|
|
*/
|
|
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
|
|
unsigned long frame_pointer)
|
|
{
|
|
unsigned long return_hooker = (unsigned long)&return_to_handler;
|
|
unsigned long old;
|
|
|
|
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
|
return;
|
|
|
|
/*
|
|
* Note:
|
|
* No protection against faulting at *parent, which may be seen
|
|
* on other archs. It's unlikely on AArch64.
|
|
*/
|
|
old = *parent;
|
|
|
|
if (!function_graph_enter(old, self_addr, frame_pointer,
|
|
(void *)frame_pointer)) {
|
|
*parent = return_hooker;
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
/*
|
|
* Turn on/off the call to ftrace_graph_caller() in ftrace_caller()
|
|
* depending on @enable.
|
|
*/
|
|
static int ftrace_modify_graph_caller(bool enable)
|
|
{
|
|
unsigned long pc = (unsigned long)&ftrace_graph_call;
|
|
u32 branch, nop;
|
|
|
|
branch = aarch64_insn_gen_branch_imm(pc,
|
|
(unsigned long)ftrace_graph_caller,
|
|
AARCH64_INSN_BRANCH_NOLINK);
|
|
nop = aarch64_insn_gen_nop();
|
|
|
|
if (enable)
|
|
return ftrace_modify_code(pc, nop, branch, true);
|
|
else
|
|
return ftrace_modify_code(pc, branch, nop, true);
|
|
}
|
|
|
|
int ftrace_enable_ftrace_graph_caller(void)
|
|
{
|
|
return ftrace_modify_graph_caller(true);
|
|
}
|
|
|
|
int ftrace_disable_ftrace_graph_caller(void)
|
|
{
|
|
return ftrace_modify_graph_caller(false);
|
|
}
|
|
#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|