a230883688
It seems like other architectures, namely x86 and arm64 and riscv at least, include the running function as top entry when saving stack trace with save_stack_trace_regs(). Functionnalities like KFENCE expect it. Do the same on powerpc, it allows KFENCE and other users to properly identify the faulting function as depicted below. Before the patch KFENCE was identifying finish_task_switch.isra as the faulting function. [ 14.937370] ================================================================== [ 14.948692] BUG: KFENCE: invalid read in test_invalid_access+0x54/0x108 [ 14.948692] [ 14.956814] Invalid read at 0xdf98800a: [ 14.960664] test_invalid_access+0x54/0x108 [ 14.964876] finish_task_switch.isra.0+0x54/0x23c [ 14.969606] kunit_try_run_case+0x5c/0xd0 [ 14.973658] kunit_generic_run_threadfn_adapter+0x24/0x30 [ 14.979079] kthread+0x15c/0x174 [ 14.982342] ret_from_kernel_thread+0x14/0x1c [ 14.986731] [ 14.988236] CPU: 0 PID: 111 Comm: kunit_try_catch Tainted: G B 5.12.0-rc1-01537-g95f6e2088d7e-dirty #4682 [ 14.999795] NIP: c016ec2c LR: c02f517c CTR: c016ebd8 [ 15.004851] REGS: e2449d90 TRAP: 0301 Tainted: G B (5.12.0-rc1-01537-g95f6e2088d7e-dirty) [ 15.015274] MSR: 00009032 <EE,ME,IR,DR,RI> CR: 22000004 XER: 00000000 [ 15.022043] DAR: df98800a DSISR: 20000000 [ 15.022043] GPR00: c02f517c e2449e50 c1142080 e100dd24 c084b13c 00000008 c084b32b c016ebd8 [ 15.022043] GPR08: c0850000 df988000 c0d10000 e2449eb0 22000288 [ 15.040581] NIP [c016ec2c] test_invalid_access+0x54/0x108 [ 15.046010] LR [c02f517c] kunit_try_run_case+0x5c/0xd0 [ 15.051181] Call Trace: [ 15.053637] [e2449e50] [c005a68c] finish_task_switch.isra.0+0x54/0x23c (unreliable) [ 15.061338] [e2449eb0] [c02f517c] kunit_try_run_case+0x5c/0xd0 [ 15.067215] [e2449ed0] [c02f648c] kunit_generic_run_threadfn_adapter+0x24/0x30 [ 15.074472] [e2449ef0] [c004e7b0] kthread+0x15c/0x174 [ 15.079571] [e2449f30] [c001317c] ret_from_kernel_thread+0x14/0x1c [ 15.085798] Instruction dump: [ 15.088784] 8129d608 38e7ebd8 81020280 911f004c 39000000 995f0024 907f0028 90ff001c [ 15.096613] 3949000a 915f0020 3d40c0d1 3d00c085 <8929000a> 3908adb0 812a4b98 3d40c02f [ 15.104612] ================================================================== Fixes: 35de3b1aa168 ("powerpc: Implement save_stack_trace_regs() to enable kprobe stack tracing") Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Acked-by: Marco Elver <elver@google.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/21324f9e2f21d1640c8397b4d1d857a9355a2283.1615881400.git.christophe.leroy@csgroup.eu
214 lines
5.4 KiB
C
214 lines
5.4 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
* Stack trace utility functions etc.
|
|
*
|
|
* Copyright 2008 Christoph Hellwig, IBM Corp.
|
|
* Copyright 2018 SUSE Linux GmbH
|
|
* Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
|
|
*/
|
|
|
|
#include <linux/export.h>
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/module.h>
|
|
#include <linux/nmi.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/debug.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#include <linux/stacktrace.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/processor.h>
|
|
#include <linux/ftrace.h>
|
|
#include <asm/kprobes.h>
|
|
|
|
#include <asm/paca.h>
|
|
|
|
void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
|
|
struct task_struct *task, struct pt_regs *regs)
|
|
{
|
|
unsigned long sp;
|
|
|
|
if (regs && !consume_entry(cookie, regs->nip))
|
|
return;
|
|
|
|
if (regs)
|
|
sp = regs->gpr[1];
|
|
else if (task == current)
|
|
sp = current_stack_frame();
|
|
else
|
|
sp = task->thread.ksp;
|
|
|
|
for (;;) {
|
|
unsigned long *stack = (unsigned long *) sp;
|
|
unsigned long newsp, ip;
|
|
|
|
if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD))
|
|
return;
|
|
|
|
newsp = stack[0];
|
|
ip = stack[STACK_FRAME_LR_SAVE];
|
|
|
|
if (!consume_entry(cookie, ip))
|
|
return;
|
|
|
|
sp = newsp;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This function returns an error if it detects any unreliable features of the
|
|
* stack. Otherwise it guarantees that the stack trace is reliable.
|
|
*
|
|
* If the task is not 'current', the caller *must* ensure the task is inactive.
|
|
*/
|
|
int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
|
|
void *cookie, struct task_struct *task)
|
|
{
|
|
unsigned long sp;
|
|
unsigned long newsp;
|
|
unsigned long stack_page = (unsigned long)task_stack_page(task);
|
|
unsigned long stack_end;
|
|
int graph_idx = 0;
|
|
bool firstframe;
|
|
|
|
stack_end = stack_page + THREAD_SIZE;
|
|
if (!is_idle_task(task)) {
|
|
/*
|
|
* For user tasks, this is the SP value loaded on
|
|
* kernel entry, see "PACAKSAVE(r13)" in _switch() and
|
|
* system_call_common()/EXCEPTION_PROLOG_COMMON().
|
|
*
|
|
* Likewise for non-swapper kernel threads,
|
|
* this also happens to be the top of the stack
|
|
* as setup by copy_thread().
|
|
*
|
|
* Note that stack backlinks are not properly setup by
|
|
* copy_thread() and thus, a forked task() will have
|
|
* an unreliable stack trace until it's been
|
|
* _switch()'ed to for the first time.
|
|
*/
|
|
stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
|
|
} else {
|
|
/*
|
|
* idle tasks have a custom stack layout,
|
|
* c.f. cpu_idle_thread_init().
|
|
*/
|
|
stack_end -= STACK_FRAME_OVERHEAD;
|
|
}
|
|
|
|
if (task == current)
|
|
sp = current_stack_frame();
|
|
else
|
|
sp = task->thread.ksp;
|
|
|
|
if (sp < stack_page + sizeof(struct thread_struct) ||
|
|
sp > stack_end - STACK_FRAME_MIN_SIZE) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
for (firstframe = true; sp != stack_end;
|
|
firstframe = false, sp = newsp) {
|
|
unsigned long *stack = (unsigned long *) sp;
|
|
unsigned long ip;
|
|
|
|
/* sanity check: ABI requires SP to be aligned 16 bytes. */
|
|
if (sp & 0xF)
|
|
return -EINVAL;
|
|
|
|
newsp = stack[0];
|
|
/* Stack grows downwards; unwinder may only go up. */
|
|
if (newsp <= sp)
|
|
return -EINVAL;
|
|
|
|
if (newsp != stack_end &&
|
|
newsp > stack_end - STACK_FRAME_MIN_SIZE) {
|
|
return -EINVAL; /* invalid backlink, too far up. */
|
|
}
|
|
|
|
/*
|
|
* We can only trust the bottom frame's backlink, the
|
|
* rest of the frame may be uninitialized, continue to
|
|
* the next.
|
|
*/
|
|
if (firstframe)
|
|
continue;
|
|
|
|
/* Mark stacktraces with exception frames as unreliable. */
|
|
if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
|
|
stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Examine the saved LR: it must point into kernel code. */
|
|
ip = stack[STACK_FRAME_LR_SAVE];
|
|
if (!__kernel_text_address(ip))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* FIXME: IMHO these tests do not belong in
|
|
* arch-dependent code, they are generic.
|
|
*/
|
|
ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack);
|
|
#ifdef CONFIG_KPROBES
|
|
/*
|
|
* Mark stacktraces with kretprobed functions on them
|
|
* as unreliable.
|
|
*/
|
|
if (ip == (unsigned long)kretprobe_trampoline)
|
|
return -EINVAL;
|
|
#endif
|
|
|
|
if (!consume_entry(cookie, ip))
|
|
return -EINVAL;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
|
|
static void handle_backtrace_ipi(struct pt_regs *regs)
|
|
{
|
|
nmi_cpu_backtrace(regs);
|
|
}
|
|
|
|
static void raise_backtrace_ipi(cpumask_t *mask)
|
|
{
|
|
unsigned int cpu;
|
|
|
|
for_each_cpu(cpu, mask) {
|
|
if (cpu == smp_processor_id())
|
|
handle_backtrace_ipi(NULL);
|
|
else
|
|
smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
|
|
}
|
|
|
|
for_each_cpu(cpu, mask) {
|
|
struct paca_struct *p = paca_ptrs[cpu];
|
|
|
|
cpumask_clear_cpu(cpu, mask);
|
|
|
|
pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu);
|
|
if (!virt_addr_valid(p)) {
|
|
pr_warn("paca pointer appears corrupt? (%px)\n", p);
|
|
continue;
|
|
}
|
|
|
|
pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d",
|
|
p->irq_soft_mask, p->in_mce, p->in_nmi);
|
|
|
|
if (virt_addr_valid(p->__current))
|
|
pr_cont(" current: %d (%s)\n", p->__current->pid,
|
|
p->__current->comm);
|
|
else
|
|
pr_cont(" current pointer corrupt? (%px)\n", p->__current);
|
|
|
|
pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
|
|
show_stack(p->__current, (unsigned long *)p->saved_r1, KERN_WARNING);
|
|
}
|
|
}
|
|
|
|
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
|
|
{
|
|
nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
|
|
}
|
|
#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
|