ftrace: Add recording of functions that caused recursion
This adds CONFIG_FTRACE_RECORD_RECURSION that will record to a file "recursed_functions" all the functions that caused recursion while a callback to the function tracer was running. Link: https://lkml.kernel.org/r/20201106023548.102375687@goodmis.org Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Guo Ren <guoren@kernel.org> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Helge Deller <deller@gmx.de> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Borislav Petkov <bp@alien8.de> Cc: x86@kernel.org Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Kees Cook <keescook@chromium.org> Cc: Anton Vorontsov <anton@enomsg.org> Cc: Colin Cross <ccross@android.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Jiri Kosina <jikos@kernel.org> Cc: Miroslav Benes <mbenes@suse.cz> Cc: Petr Mladek <pmladek@suse.com> Cc: Joe Lawrence <joe.lawrence@redhat.com> Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com> Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-csky@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Cc: live-patching@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
This commit is contained in:
parent
a25d036d93
commit
773c167050
@ -118,7 +118,7 @@ can help in this regard. If you start your code with:
|
||||
|
||||
int bit;
|
||||
|
||||
bit = ftrace_test_recursion_trylock();
|
||||
bit = ftrace_test_recursion_trylock(ip, parent_ip);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
@ -130,7 +130,9 @@ The code in between will be safe to use, even if it ends up calling a
|
||||
function that the callback is tracing. Note, on success,
|
||||
ftrace_test_recursion_trylock() will disable preemption, and the
|
||||
ftrace_test_recursion_unlock() will enable it again (if it was previously
|
||||
enabled).
|
||||
enabled). The instruction pointer (ip) and its parent (parent_ip) is passed to
|
||||
ftrace_test_recursion_trylock() to record where the recursion happened
|
||||
(if CONFIG_FTRACE_RECORD_RECURSION is set).
|
||||
|
||||
Alternatively, if the FTRACE_OPS_FL_RECURSION flag is set on the ftrace_ops
|
||||
(as explained below), then a helper trampoline will be used to test
|
||||
|
@ -18,7 +18,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
|
||||
struct kprobe *p;
|
||||
struct kprobe_ctlblk *kcb;
|
||||
|
||||
bit = ftrace_test_recursion_trylock();
|
||||
bit = ftrace_test_recursion_trylock(ip, parent_ip);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
|
@ -210,7 +210,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
|
||||
struct kprobe *p;
|
||||
int bit;
|
||||
|
||||
bit = ftrace_test_recursion_trylock();
|
||||
bit = ftrace_test_recursion_trylock(ip, parent_ip);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
|
@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
|
||||
struct kprobe_ctlblk *kcb;
|
||||
int bit;
|
||||
|
||||
bit = ftrace_test_recursion_trylock();
|
||||
bit = ftrace_test_recursion_trylock(nip, parent_nip);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
|
@ -204,7 +204,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
|
||||
struct kprobe *p;
|
||||
int bit;
|
||||
|
||||
bit = ftrace_test_recursion_trylock();
|
||||
bit = ftrace_test_recursion_trylock(ip, parent_ip);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
|
@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
|
||||
struct kprobe_ctlblk *kcb;
|
||||
int bit;
|
||||
|
||||
bit = ftrace_test_recursion_trylock();
|
||||
bit = ftrace_test_recursion_trylock(ip, parent_ip);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
|
@ -41,7 +41,7 @@ static void notrace pstore_ftrace_call(unsigned long ip,
|
||||
if (unlikely(oops_in_progress))
|
||||
return;
|
||||
|
||||
bit = ftrace_test_recursion_trylock();
|
||||
bit = ftrace_test_recursion_trylock(ip, parent_ip);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
|
@ -91,6 +91,9 @@ enum {
|
||||
* not be correct. Allow for a single recursion to cover this case.
|
||||
*/
|
||||
TRACE_TRANSITION_BIT,
|
||||
|
||||
/* Used to prevent recursion recording from recursing. */
|
||||
TRACE_RECORD_RECURSION_BIT,
|
||||
};
|
||||
|
||||
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
|
||||
@ -142,7 +145,22 @@ static __always_inline int trace_get_context_bit(void)
|
||||
pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ;
|
||||
}
|
||||
|
||||
static __always_inline int trace_test_and_set_recursion(int start, int max)
|
||||
#ifdef CONFIG_FTRACE_RECORD_RECURSION
|
||||
extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
|
||||
# define do_ftrace_record_recursion(ip, pip) \
|
||||
do { \
|
||||
if (!trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \
|
||||
trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \
|
||||
ftrace_record_recursion(ip, pip); \
|
||||
trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
# define do_ftrace_record_recursion(ip, pip) do { } while (0)
|
||||
#endif
|
||||
|
||||
static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip,
|
||||
int start, int max)
|
||||
{
|
||||
unsigned int val = current->trace_recursion;
|
||||
int bit;
|
||||
@ -158,8 +176,10 @@ static __always_inline int trace_test_and_set_recursion(int start, int max)
|
||||
* a switch between contexts. Allow for a single recursion.
|
||||
*/
|
||||
bit = TRACE_TRANSITION_BIT;
|
||||
if (trace_recursion_test(bit))
|
||||
if (trace_recursion_test(bit)) {
|
||||
do_ftrace_record_recursion(ip, pip);
|
||||
return -1;
|
||||
}
|
||||
trace_recursion_set(bit);
|
||||
barrier();
|
||||
return bit + 1;
|
||||
@ -199,9 +219,10 @@ static __always_inline void trace_clear_recursion(int bit)
|
||||
* Returns: -1 if a recursion happened.
|
||||
* >= 0 if no recursion
|
||||
*/
|
||||
static __always_inline int ftrace_test_recursion_trylock(void)
|
||||
static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
|
||||
unsigned long parent_ip)
|
||||
{
|
||||
return trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
|
||||
return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -49,7 +49,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
|
||||
|
||||
ops = container_of(fops, struct klp_ops, fops);
|
||||
|
||||
bit = ftrace_test_recursion_trylock();
|
||||
bit = ftrace_test_recursion_trylock(ip, parent_ip);
|
||||
if (WARN_ON_ONCE(bit < 0))
|
||||
return;
|
||||
/*
|
||||
|
@ -727,6 +727,31 @@ config TRACE_EVAL_MAP_FILE
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config FTRACE_RECORD_RECURSION
|
||||
bool "Record functions that recurse in function tracing"
|
||||
depends on FUNCTION_TRACER
|
||||
help
|
||||
All callbacks that attach to the function tracing have some sort
|
||||
of protection against recursion. Even though the protection exists,
|
||||
it adds overhead. This option will create a file in the tracefs
|
||||
file system called "recursed_functions" that will list the functions
|
||||
that triggered a recursion.
|
||||
|
||||
This will add more overhead to cases that have recursion.
|
||||
|
||||
If unsure, say N
|
||||
|
||||
config FTRACE_RECORD_RECURSION_SIZE
|
||||
int "Max number of recursed functions to record"
|
||||
default 128
|
||||
depends on FTRACE_RECORD_RECURSION
|
||||
help
|
||||
This defines the limit of number of functions that can be
|
||||
listed in the "recursed_functions" file, that lists all
|
||||
the functions that caused a recursion to happen.
|
||||
This file can be reset, but the limit can not change in
|
||||
size at runtime.
|
||||
|
||||
config GCOV_PROFILE_FTRACE
|
||||
bool "Enable GCOV profiling on ftrace subsystem"
|
||||
depends on GCOV_KERNEL
|
||||
|
@ -92,6 +92,7 @@ obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
|
||||
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
|
||||
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
|
||||
obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
|
||||
obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
|
||||
|
||||
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
|
||||
|
||||
|
@ -6918,7 +6918,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op;
|
||||
int bit;
|
||||
|
||||
bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
|
||||
bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
@ -6993,7 +6993,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
|
||||
{
|
||||
int bit;
|
||||
|
||||
bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
|
||||
bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
|
@ -447,7 +447,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
|
||||
if ((unsigned long)ops->private != smp_processor_id())
|
||||
return;
|
||||
|
||||
bit = ftrace_test_recursion_trylock();
|
||||
bit = ftrace_test_recursion_trylock(ip, parent_ip);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
|
@ -141,7 +141,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
|
||||
if (unlikely(!tr->function_enabled))
|
||||
return;
|
||||
|
||||
bit = ftrace_test_recursion_trylock();
|
||||
bit = ftrace_test_recursion_trylock(ip, parent_ip);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
|
@ -353,8 +353,8 @@ static inline const char *kretprobed(const char *name)
|
||||
}
|
||||
#endif /* CONFIG_KRETPROBES */
|
||||
|
||||
static void
|
||||
seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
|
||||
void
|
||||
trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
|
||||
{
|
||||
#ifdef CONFIG_KALLSYMS
|
||||
char str[KSYM_SYMBOL_LEN];
|
||||
@ -420,7 +420,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
|
||||
goto out;
|
||||
}
|
||||
|
||||
seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
|
||||
trace_seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
|
||||
|
||||
if (sym_flags & TRACE_ITER_SYM_ADDR)
|
||||
trace_seq_printf(s, " <" IP_FMT ">", ip);
|
||||
|
@ -16,6 +16,7 @@ extern int
|
||||
seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
|
||||
unsigned long sym_flags);
|
||||
|
||||
extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset);
|
||||
extern int trace_print_context(struct trace_iterator *iter);
|
||||
extern int trace_print_lat_context(struct trace_iterator *iter);
|
||||
|
||||
|
236
kernel/trace/trace_recursion_record.c
Normal file
236
kernel/trace/trace_recursion_record.c
Normal file
@ -0,0 +1,236 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include "trace_output.h"
|
||||
|
||||
struct recursed_functions {
|
||||
unsigned long ip;
|
||||
unsigned long parent_ip;
|
||||
};
|
||||
|
||||
static struct recursed_functions recursed_functions[CONFIG_FTRACE_RECORD_RECURSION_SIZE];
|
||||
static atomic_t nr_records;
|
||||
|
||||
/*
|
||||
* Cache the last found function. Yes, updates to this is racey, but
|
||||
* so is memory cache ;-)
|
||||
*/
|
||||
static unsigned long cached_function;
|
||||
|
||||
void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip)
|
||||
{
|
||||
int index = 0;
|
||||
int i;
|
||||
unsigned long old;
|
||||
|
||||
again:
|
||||
/* First check the last one recorded */
|
||||
if (ip == cached_function)
|
||||
return;
|
||||
|
||||
i = atomic_read(&nr_records);
|
||||
/* nr_records is -1 when clearing records */
|
||||
smp_mb__after_atomic();
|
||||
if (i < 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If there's two writers and this writer comes in second,
|
||||
* the cmpxchg() below to update the ip will fail. Then this
|
||||
* writer will try again. It is possible that index will now
|
||||
* be greater than nr_records. This is because the writer
|
||||
* that succeeded has not updated the nr_records yet.
|
||||
* This writer could keep trying again until the other writer
|
||||
* updates nr_records. But if the other writer takes an
|
||||
* interrupt, and that interrupt locks up that CPU, we do
|
||||
* not want this CPU to lock up due to the recursion protection,
|
||||
* and have a bug report showing this CPU as the cause of
|
||||
* locking up the computer. To not lose this record, this
|
||||
* writer will simply use the next position to update the
|
||||
* recursed_functions, and it will update the nr_records
|
||||
* accordingly.
|
||||
*/
|
||||
if (index < i)
|
||||
index = i;
|
||||
if (index >= CONFIG_FTRACE_RECORD_RECURSION_SIZE)
|
||||
return;
|
||||
|
||||
for (i = index - 1; i >= 0; i--) {
|
||||
if (recursed_functions[i].ip == ip) {
|
||||
cached_function = ip;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
cached_function = ip;
|
||||
|
||||
/*
|
||||
* We only want to add a function if it hasn't been added before.
|
||||
* Add to the current location before incrementing the count.
|
||||
* If it fails to add, then increment the index (save in i)
|
||||
* and try again.
|
||||
*/
|
||||
old = cmpxchg(&recursed_functions[index].ip, 0, ip);
|
||||
if (old != 0) {
|
||||
/* Did something else already added this for us? */
|
||||
if (old == ip)
|
||||
return;
|
||||
/* Try the next location (use i for the next index) */
|
||||
index++;
|
||||
goto again;
|
||||
}
|
||||
|
||||
recursed_functions[index].parent_ip = parent_ip;
|
||||
|
||||
/*
|
||||
* It's still possible that we could race with the clearing
|
||||
* CPU0 CPU1
|
||||
* ---- ----
|
||||
* ip = func
|
||||
* nr_records = -1;
|
||||
* recursed_functions[0] = 0;
|
||||
* i = -1
|
||||
* if (i < 0)
|
||||
* nr_records = 0;
|
||||
* (new recursion detected)
|
||||
* recursed_functions[0] = func
|
||||
* cmpxchg(recursed_functions[0],
|
||||
* func, 0)
|
||||
*
|
||||
* But the worse that could happen is that we get a zero in
|
||||
* the recursed_functions array, and it's likely that "func" will
|
||||
* be recorded again.
|
||||
*/
|
||||
i = atomic_read(&nr_records);
|
||||
smp_mb__after_atomic();
|
||||
if (i < 0)
|
||||
cmpxchg(&recursed_functions[index].ip, ip, 0);
|
||||
else if (i <= index)
|
||||
atomic_cmpxchg(&nr_records, i, index + 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ftrace_record_recursion);
|
||||
|
||||
static DEFINE_MUTEX(recursed_function_lock);
|
||||
static struct trace_seq *tseq;
|
||||
|
||||
static void *recursed_function_seq_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
void *ret = NULL;
|
||||
int index;
|
||||
|
||||
mutex_lock(&recursed_function_lock);
|
||||
index = atomic_read(&nr_records);
|
||||
if (*pos < index) {
|
||||
ret = &recursed_functions[*pos];
|
||||
}
|
||||
|
||||
tseq = kzalloc(sizeof(*tseq), GFP_KERNEL);
|
||||
if (!tseq)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
trace_seq_init(tseq);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *recursed_function_seq_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
int index;
|
||||
int p;
|
||||
|
||||
index = atomic_read(&nr_records);
|
||||
p = ++(*pos);
|
||||
|
||||
return p < index ? &recursed_functions[p] : NULL;
|
||||
}
|
||||
|
||||
static void recursed_function_seq_stop(struct seq_file *m, void *v)
|
||||
{
|
||||
kfree(tseq);
|
||||
mutex_unlock(&recursed_function_lock);
|
||||
}
|
||||
|
||||
static int recursed_function_seq_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct recursed_functions *record = v;
|
||||
int ret = 0;
|
||||
|
||||
if (record) {
|
||||
trace_seq_print_sym(tseq, record->parent_ip, true);
|
||||
trace_seq_puts(tseq, ":\t");
|
||||
trace_seq_print_sym(tseq, record->ip, true);
|
||||
trace_seq_putc(tseq, '\n');
|
||||
ret = trace_print_seq(m, tseq);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct seq_operations recursed_function_seq_ops = {
|
||||
.start = recursed_function_seq_start,
|
||||
.next = recursed_function_seq_next,
|
||||
.stop = recursed_function_seq_stop,
|
||||
.show = recursed_function_seq_show
|
||||
};
|
||||
|
||||
static int recursed_function_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&recursed_function_lock);
|
||||
/* If this file was opened for write, then erase contents */
|
||||
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
|
||||
/* disable updating records */
|
||||
atomic_set(&nr_records, -1);
|
||||
smp_mb__after_atomic();
|
||||
memset(recursed_functions, 0, sizeof(recursed_functions));
|
||||
smp_wmb();
|
||||
/* enable them again */
|
||||
atomic_set(&nr_records, 0);
|
||||
}
|
||||
if (file->f_mode & FMODE_READ)
|
||||
ret = seq_open(file, &recursed_function_seq_ops);
|
||||
mutex_unlock(&recursed_function_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t recursed_function_write(struct file *file,
|
||||
const char __user *buffer,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
return count;
|
||||
}
|
||||
|
||||
static int recursed_function_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
if (file->f_mode & FMODE_READ)
|
||||
seq_release(inode, file);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations recursed_functions_fops = {
|
||||
.open = recursed_function_open,
|
||||
.write = recursed_function_write,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = recursed_function_release,
|
||||
};
|
||||
|
||||
__init static int create_recursed_functions(void)
|
||||
{
|
||||
struct dentry *dentry;
|
||||
|
||||
dentry = trace_create_file("recursed_functions", 0644, NULL, NULL,
|
||||
&recursed_functions_fops);
|
||||
if (!dentry)
|
||||
pr_warn("WARNING: Failed to create recursed_functions\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
fs_initcall(create_recursed_functions);
|
Loading…
Reference in New Issue
Block a user