0c32706dac
When the function_graph tracer is in use, arch_stack_walk() may unwind the stack incorrectly, erroneously reporting itself, missing the final entry which is being traced, and reporting all traced entries between these off-by-one from where they should be. When ftrace hooks a function return, the original return address is saved to the fgraph ret_stack, and the return address in the LR (or the function's frame record) is replaced with `return_to_handler`. When arm64's unwinder encounter frames returning to `return_to_handler`, it finds the associated original return address from the fgraph ret stack, assuming the most recent `ret_to_hander` entry on the stack corresponds to the most recent entry in the fgraph ret stack, and so on. When arch_stack_walk() is used to dump the current task's stack, it starts from the caller of arch_stack_walk(). However, arch_stack_walk() can be traced, and so may push an entry on to the fgraph ret stack, leaving the fgraph ret stack offset by one from the expected position. This can be seen when dumping the stack via /proc/self/stack, where enabling the graph tracer results in an unexpected `stack_trace_save_tsk` entry at the start of the trace, and `el0_svc` missing form the end of the trace. This patch fixes this by marking arch_stack_walk() as notrace, as we do for all other functions on the path to ftrace_graph_get_ret_stack(). While a few helper functions are not marked notrace, their calls/returns are balanced, and will have no observable effect when examining the fgraph ret stack. It is possible for an exeption boundary to cause a similar offset if the return address of the interrupted context was in the LR. Fixing those cases will require some more substantial rework, and is left for subsequent patches. Before: | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c | # echo function_graph > /sys/kernel/tracing/current_tracer | # cat /proc/self/stack | [<0>] stack_trace_save_tsk+0xa4/0x110 | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c After: | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c | # echo function_graph > /sys/kernel/tracing/current_tracer | # cat /proc/self/stack | [<0>] proc_pid_stack+0xc4/0x140 | [<0>] proc_single_show+0x6c/0x120 | [<0>] seq_read_iter+0x240/0x4e0 | [<0>] seq_read+0xe8/0x140 | [<0>] vfs_read+0xb8/0x1e4 | [<0>] ksys_read+0x74/0x100 | [<0>] __arm64_sys_read+0x28/0x3c | [<0>] invoke_syscall+0x50/0x120 | [<0>] el0_svc_common.constprop.0+0xc4/0xd4 | [<0>] do_el0_svc+0x30/0x9c | [<0>] el0_svc+0x2c/0x54 | [<0>] el0t_64_sync_handler+0x1a8/0x1b0 | [<0>] el0t_64_sync+0x198/0x19c Cc: <stable@vger.kernel.org> Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Madhavan T. Venkataraman <madvenka@linux.microsoft.com> Cc: Mark Brown <broonie@kernel.org> Cc: Will Deacon <will@kernel.org> Reviwed-by: Mark Brown <broonie@kernel.org> Link: https://lore.kernel.org/r/20210802164845.45506-3-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org>
241 lines
5.8 KiB
C
241 lines
5.8 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Stack tracing support
|
|
*
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/ftrace.h>
|
|
#include <linux/kprobes.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/debug.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/stacktrace.h>
|
|
|
|
#include <asm/irq.h>
|
|
#include <asm/pointer_auth.h>
|
|
#include <asm/stack_pointer.h>
|
|
#include <asm/stacktrace.h>
|
|
|
|
/*
|
|
* AArch64 PCS assigns the frame pointer to x29.
|
|
*
|
|
* A simple function prologue looks like this:
|
|
* sub sp, sp, #0x10
|
|
* stp x29, x30, [sp]
|
|
* mov x29, sp
|
|
*
|
|
* A simple function epilogue looks like this:
|
|
* mov sp, x29
|
|
* ldp x29, x30, [sp]
|
|
* add sp, sp, #0x10
|
|
*/
|
|
|
|
|
|
void start_backtrace(struct stackframe *frame, unsigned long fp,
|
|
unsigned long pc)
|
|
{
|
|
frame->fp = fp;
|
|
frame->pc = pc;
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
frame->graph = 0;
|
|
#endif
|
|
|
|
/*
|
|
* Prime the first unwind.
|
|
*
|
|
* In unwind_frame() we'll check that the FP points to a valid stack,
|
|
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
|
|
* treated as a transition to whichever stack that happens to be. The
|
|
* prev_fp value won't be used, but we set it to 0 such that it is
|
|
* definitely not an accessible stack address.
|
|
*/
|
|
bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
|
|
frame->prev_fp = 0;
|
|
frame->prev_type = STACK_TYPE_UNKNOWN;
|
|
}
|
|
|
|
/*
|
|
* Unwind from one frame record (A) to the next frame record (B).
|
|
*
|
|
* We terminate early if the location of B indicates a malformed chain of frame
|
|
* records (e.g. a cycle), determined based on the location and fp value of A
|
|
* and the location (but not the fp value) of B.
|
|
*/
|
|
int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
|
|
{
|
|
unsigned long fp = frame->fp;
|
|
struct stack_info info;
|
|
|
|
if (!tsk)
|
|
tsk = current;
|
|
|
|
/* Final frame; nothing to unwind */
|
|
if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
|
|
return -ENOENT;
|
|
|
|
if (fp & 0x7)
|
|
return -EINVAL;
|
|
|
|
if (!on_accessible_stack(tsk, fp, 16, &info))
|
|
return -EINVAL;
|
|
|
|
if (test_bit(info.type, frame->stacks_done))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* As stacks grow downward, any valid record on the same stack must be
|
|
* at a strictly higher address than the prior record.
|
|
*
|
|
* Stacks can nest in several valid orders, e.g.
|
|
*
|
|
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
|
|
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
|
|
*
|
|
* ... but the nesting itself is strict. Once we transition from one
|
|
* stack to another, it's never valid to unwind back to that first
|
|
* stack.
|
|
*/
|
|
if (info.type == frame->prev_type) {
|
|
if (fp <= frame->prev_fp)
|
|
return -EINVAL;
|
|
} else {
|
|
set_bit(frame->prev_type, frame->stacks_done);
|
|
}
|
|
|
|
/*
|
|
* Record this frame record's values and location. The prev_fp and
|
|
* prev_type are only meaningful to the next unwind_frame() invocation.
|
|
*/
|
|
frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
|
|
frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
|
|
frame->prev_fp = fp;
|
|
frame->prev_type = info.type;
|
|
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
if (tsk->ret_stack &&
|
|
(ptrauth_strip_insn_pac(frame->pc) == (unsigned long)return_to_handler)) {
|
|
struct ftrace_ret_stack *ret_stack;
|
|
/*
|
|
* This is a case where function graph tracer has
|
|
* modified a return address (LR) in a stack frame
|
|
* to hook a function return.
|
|
* So replace it to an original value.
|
|
*/
|
|
ret_stack = ftrace_graph_get_ret_stack(tsk, frame->graph++);
|
|
if (WARN_ON_ONCE(!ret_stack))
|
|
return -EINVAL;
|
|
frame->pc = ret_stack->ret;
|
|
}
|
|
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
|
|
|
frame->pc = ptrauth_strip_insn_pac(frame->pc);
|
|
|
|
return 0;
|
|
}
|
|
NOKPROBE_SYMBOL(unwind_frame);
|
|
|
|
void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
|
|
bool (*fn)(void *, unsigned long), void *data)
|
|
{
|
|
while (1) {
|
|
int ret;
|
|
|
|
if (!fn(data, frame->pc))
|
|
break;
|
|
ret = unwind_frame(tsk, frame);
|
|
if (ret < 0)
|
|
break;
|
|
}
|
|
}
|
|
NOKPROBE_SYMBOL(walk_stackframe);
|
|
|
|
static void dump_backtrace_entry(unsigned long where, const char *loglvl)
|
|
{
|
|
printk("%s %pSb\n", loglvl, (void *)where);
|
|
}
|
|
|
|
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
|
|
const char *loglvl)
|
|
{
|
|
struct stackframe frame;
|
|
int skip = 0;
|
|
|
|
pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
|
|
|
|
if (regs) {
|
|
if (user_mode(regs))
|
|
return;
|
|
skip = 1;
|
|
}
|
|
|
|
if (!tsk)
|
|
tsk = current;
|
|
|
|
if (!try_get_task_stack(tsk))
|
|
return;
|
|
|
|
if (tsk == current) {
|
|
start_backtrace(&frame,
|
|
(unsigned long)__builtin_frame_address(0),
|
|
(unsigned long)dump_backtrace);
|
|
} else {
|
|
/*
|
|
* task blocked in __switch_to
|
|
*/
|
|
start_backtrace(&frame,
|
|
thread_saved_fp(tsk),
|
|
thread_saved_pc(tsk));
|
|
}
|
|
|
|
printk("%sCall trace:\n", loglvl);
|
|
do {
|
|
/* skip until specified stack frame */
|
|
if (!skip) {
|
|
dump_backtrace_entry(frame.pc, loglvl);
|
|
} else if (frame.fp == regs->regs[29]) {
|
|
skip = 0;
|
|
/*
|
|
* Mostly, this is the case where this function is
|
|
* called in panic/abort. As exception handler's
|
|
* stack frame does not contain the corresponding pc
|
|
* at which an exception has taken place, use regs->pc
|
|
* instead.
|
|
*/
|
|
dump_backtrace_entry(regs->pc, loglvl);
|
|
}
|
|
} while (!unwind_frame(tsk, &frame));
|
|
|
|
put_task_stack(tsk);
|
|
}
|
|
|
|
void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
|
|
{
|
|
dump_backtrace(NULL, tsk, loglvl);
|
|
barrier();
|
|
}
|
|
|
|
#ifdef CONFIG_STACKTRACE
|
|
|
|
noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
|
|
void *cookie, struct task_struct *task,
|
|
struct pt_regs *regs)
|
|
{
|
|
struct stackframe frame;
|
|
|
|
if (regs)
|
|
start_backtrace(&frame, regs->regs[29], regs->pc);
|
|
else if (task == current)
|
|
start_backtrace(&frame,
|
|
(unsigned long)__builtin_frame_address(1),
|
|
(unsigned long)__builtin_return_address(0));
|
|
else
|
|
start_backtrace(&frame, thread_saved_fp(task),
|
|
thread_saved_pc(task));
|
|
|
|
walk_stackframe(task, &frame, consume_entry, cookie);
|
|
}
|
|
|
|
#endif
|