8b91cee5ea
Hash faults are not resoved in NMI context, instead causing the access to fail. This is done because perf interrupts can get backtraces including walking the user stack, and taking a hash fault on those could deadlock on the HPTE lock if the perf interrupt hits while the same HPTE lock is being held by the hash fault code. The user-access for the stack walking will notice the access failed and deal with that in the perf code. The reason to allow perf interrupts in is to better profile hash faults. The problem with this is any hash fault on a kernel access that happens in NMI context will crash, because kernel accesses must not fail. Hard lockups, system reset, machine checks that access vmalloc space including modules and including stack backtracing and symbol lookup in modules, per-cpu data, etc could all run into this problem. Fix this by disallowing perf interrupts in the hash fault code (the direct hash fault is covered by MSR[EE]=0 so the PMI disable just needs to extend to the preload case). This simplifies the tricky logic in hash faults and perf, at the cost of reduced profiling of hash faults. perf can still latch addresses when interrupts are disabled, it just won't get the stack trace at that point, so it would still find hot spots, just sometimes with confusing stack chains. An alternative could be to allow perf interrupts here but always do the slowpath stack walk if we are in nmi context, but that slows down all perf interrupt stack walking on hash though and it does not remove as much tricky code. Reported-by: Laurent Dufour <ldufour@linux.ibm.com> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Tested-by: Laurent Dufour <ldufour@linux.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220204035348.545435-1-npiggin@gmail.com
121 lines
3.2 KiB
C
121 lines
3.2 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Performance counter callchain support - powerpc architecture code
|
|
*
|
|
* Copyright © 2009 Paul Mackerras, IBM Corporation.
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/mm.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/sigcontext.h>
|
|
#include <asm/ucontext.h>
|
|
#include <asm/vdso.h>
|
|
#include <asm/pte-walk.h>
|
|
|
|
#include "callchain.h"
|
|
|
|
static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret)
|
|
{
|
|
return __read_user_stack(ptr, ret, sizeof(*ret));
|
|
}
|
|
|
|
/*
|
|
* 64-bit user processes use the same stack frame for RT and non-RT signals.
|
|
*/
|
|
struct signal_frame_64 {
|
|
char dummy[__SIGNAL_FRAMESIZE];
|
|
struct ucontext uc;
|
|
unsigned long unused[2];
|
|
unsigned int tramp[6];
|
|
struct siginfo *pinfo;
|
|
void *puc;
|
|
struct siginfo info;
|
|
char abigap[288];
|
|
};
|
|
|
|
static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
|
|
{
|
|
if (nip == fp + offsetof(struct signal_frame_64, tramp))
|
|
return 1;
|
|
if (current->mm->context.vdso &&
|
|
nip == VDSO64_SYMBOL(current->mm->context.vdso, sigtramp_rt64))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Do some sanity checking on the signal frame pointed to by sp.
|
|
* We check the pinfo and puc pointers in the frame.
|
|
*/
|
|
static int sane_signal_64_frame(unsigned long sp)
|
|
{
|
|
struct signal_frame_64 __user *sf;
|
|
unsigned long pinfo, puc;
|
|
|
|
sf = (struct signal_frame_64 __user *) sp;
|
|
if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
|
|
read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
|
|
return 0;
|
|
return pinfo == (unsigned long) &sf->info &&
|
|
puc == (unsigned long) &sf->uc;
|
|
}
|
|
|
|
void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
|
|
struct pt_regs *regs)
|
|
{
|
|
unsigned long sp, next_sp;
|
|
unsigned long next_ip;
|
|
unsigned long lr;
|
|
long level = 0;
|
|
struct signal_frame_64 __user *sigframe;
|
|
unsigned long __user *fp, *uregs;
|
|
|
|
next_ip = perf_instruction_pointer(regs);
|
|
lr = regs->link;
|
|
sp = regs->gpr[1];
|
|
perf_callchain_store(entry, next_ip);
|
|
|
|
while (entry->nr < entry->max_stack) {
|
|
fp = (unsigned long __user *) sp;
|
|
if (invalid_user_sp(sp) || read_user_stack_64(fp, &next_sp))
|
|
return;
|
|
if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
|
|
return;
|
|
|
|
/*
|
|
* Note: the next_sp - sp >= signal frame size check
|
|
* is true when next_sp < sp, which can happen when
|
|
* transitioning from an alternate signal stack to the
|
|
* normal stack.
|
|
*/
|
|
if (next_sp - sp >= sizeof(struct signal_frame_64) &&
|
|
(is_sigreturn_64_address(next_ip, sp) ||
|
|
(level <= 1 && is_sigreturn_64_address(lr, sp))) &&
|
|
sane_signal_64_frame(sp)) {
|
|
/*
|
|
* This looks like an signal frame
|
|
*/
|
|
sigframe = (struct signal_frame_64 __user *) sp;
|
|
uregs = sigframe->uc.uc_mcontext.gp_regs;
|
|
if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
|
|
read_user_stack_64(&uregs[PT_LNK], &lr) ||
|
|
read_user_stack_64(&uregs[PT_R1], &sp))
|
|
return;
|
|
level = 0;
|
|
perf_callchain_store_context(entry, PERF_CONTEXT_USER);
|
|
perf_callchain_store(entry, next_ip);
|
|
continue;
|
|
}
|
|
|
|
if (level == 0)
|
|
next_ip = lr;
|
|
perf_callchain_store(entry, next_ip);
|
|
++level;
|
|
sp = next_sp;
|
|
}
|
|
}
|