32c0edaeaa
Now that the trace_event structure is embedded in the ftrace_event_call
structure, there is no need for the ftrace_event_call id field.
The id field is the same as the trace_event type field.
Removing the id and re-arranging the structure brings down the tracepoint
footprint by another 5K.
text data bss dec hex filename
4913961 1088356 861512 6863829 68bbd5 vmlinux.orig
4895024 1023812
861512 6780348 6775bc vmlinux.print
4894944 1018052 861512 6774508 675eec vmlinux.id
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
185 lines
4.0 KiB
C
185 lines
4.0 KiB
C
/*
|
|
* trace event based perf event profiling/tracing
|
|
*
|
|
* Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
|
|
* Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/kprobes.h>
|
|
#include "trace.h"
|
|
|
|
DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
|
|
EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
|
|
|
|
EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
|
|
|
|
static char *perf_trace_buf;
|
|
static char *perf_trace_buf_nmi;
|
|
|
|
/*
|
|
* Force it to be aligned to unsigned long to avoid misaligned accesses
|
|
* suprises
|
|
*/
|
|
typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
|
|
perf_trace_t;
|
|
|
|
/* Count the events in use (per event id, not per instance) */
|
|
static int total_ref_count;
|
|
|
|
static int perf_trace_event_enable(struct ftrace_event_call *event)
|
|
{
|
|
char *buf;
|
|
int ret = -ENOMEM;
|
|
|
|
if (event->perf_refcount++ > 0)
|
|
return 0;
|
|
|
|
if (!total_ref_count) {
|
|
buf = (char *)alloc_percpu(perf_trace_t);
|
|
if (!buf)
|
|
goto fail_buf;
|
|
|
|
rcu_assign_pointer(perf_trace_buf, buf);
|
|
|
|
buf = (char *)alloc_percpu(perf_trace_t);
|
|
if (!buf)
|
|
goto fail_buf_nmi;
|
|
|
|
rcu_assign_pointer(perf_trace_buf_nmi, buf);
|
|
}
|
|
|
|
if (event->class->reg)
|
|
ret = event->class->reg(event, TRACE_REG_PERF_REGISTER);
|
|
else
|
|
ret = tracepoint_probe_register(event->name,
|
|
event->class->perf_probe,
|
|
event);
|
|
if (!ret) {
|
|
total_ref_count++;
|
|
return 0;
|
|
}
|
|
|
|
fail_buf_nmi:
|
|
if (!total_ref_count) {
|
|
free_percpu(perf_trace_buf_nmi);
|
|
free_percpu(perf_trace_buf);
|
|
perf_trace_buf_nmi = NULL;
|
|
perf_trace_buf = NULL;
|
|
}
|
|
fail_buf:
|
|
event->perf_refcount--;
|
|
|
|
return ret;
|
|
}
|
|
|
|
int perf_trace_enable(int event_id)
|
|
{
|
|
struct ftrace_event_call *event;
|
|
int ret = -EINVAL;
|
|
|
|
mutex_lock(&event_mutex);
|
|
list_for_each_entry(event, &ftrace_events, list) {
|
|
if (event->event.type == event_id &&
|
|
event->class && event->class->perf_probe &&
|
|
try_module_get(event->mod)) {
|
|
ret = perf_trace_event_enable(event);
|
|
break;
|
|
}
|
|
}
|
|
mutex_unlock(&event_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void perf_trace_event_disable(struct ftrace_event_call *event)
|
|
{
|
|
char *buf, *nmi_buf;
|
|
|
|
if (--event->perf_refcount > 0)
|
|
return;
|
|
|
|
if (event->class->reg)
|
|
event->class->reg(event, TRACE_REG_PERF_UNREGISTER);
|
|
else
|
|
tracepoint_probe_unregister(event->name, event->class->perf_probe, event);
|
|
|
|
if (!--total_ref_count) {
|
|
buf = perf_trace_buf;
|
|
rcu_assign_pointer(perf_trace_buf, NULL);
|
|
|
|
nmi_buf = perf_trace_buf_nmi;
|
|
rcu_assign_pointer(perf_trace_buf_nmi, NULL);
|
|
|
|
/*
|
|
* Ensure every events in profiling have finished before
|
|
* releasing the buffers
|
|
*/
|
|
synchronize_sched();
|
|
|
|
free_percpu(buf);
|
|
free_percpu(nmi_buf);
|
|
}
|
|
}
|
|
|
|
void perf_trace_disable(int event_id)
|
|
{
|
|
struct ftrace_event_call *event;
|
|
|
|
mutex_lock(&event_mutex);
|
|
list_for_each_entry(event, &ftrace_events, list) {
|
|
if (event->event.type == event_id) {
|
|
perf_trace_event_disable(event);
|
|
module_put(event->mod);
|
|
break;
|
|
}
|
|
}
|
|
mutex_unlock(&event_mutex);
|
|
}
|
|
|
|
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
|
|
int *rctxp, unsigned long *irq_flags)
|
|
{
|
|
struct trace_entry *entry;
|
|
char *trace_buf, *raw_data;
|
|
int pc, cpu;
|
|
|
|
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
|
|
|
|
pc = preempt_count();
|
|
|
|
/* Protect the per cpu buffer, begin the rcu read side */
|
|
local_irq_save(*irq_flags);
|
|
|
|
*rctxp = perf_swevent_get_recursion_context();
|
|
if (*rctxp < 0)
|
|
goto err_recursion;
|
|
|
|
cpu = smp_processor_id();
|
|
|
|
if (in_nmi())
|
|
trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
|
|
else
|
|
trace_buf = rcu_dereference_sched(perf_trace_buf);
|
|
|
|
if (!trace_buf)
|
|
goto err;
|
|
|
|
raw_data = per_cpu_ptr(trace_buf, cpu);
|
|
|
|
/* zero the dead bytes from align to not leak stack to user */
|
|
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
|
|
|
|
entry = (struct trace_entry *)raw_data;
|
|
tracing_generic_entry_update(entry, *irq_flags, pc);
|
|
entry->type = type;
|
|
|
|
return raw_data;
|
|
err:
|
|
perf_swevent_put_recursion_context(*rctxp);
|
|
err_recursion:
|
|
local_irq_restore(*irq_flags);
|
|
return NULL;
|
|
}
|
|
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
|