2009-03-19 20:26:15 +01:00
/*
2010-03-05 05:35:37 +01:00
* trace event based perf event profiling / tracing
2009-03-19 20:26:15 +01:00
*
* Copyright ( C ) 2009 Red Hat Inc , Peter Zijlstra < pzijlstr @ redhat . com >
2010-03-03 07:16:16 +01:00
* Copyright ( C ) 2009 - 2010 Frederic Weisbecker < fweisbec @ gmail . com >
2009-03-19 20:26:15 +01:00
*/
2009-08-24 12:19:47 +08:00
# include <linux/module.h>
2010-01-28 09:32:29 +08:00
# include <linux/kprobes.h>
2009-03-19 20:26:15 +01:00
# include "trace.h"
2010-08-11 12:47:59 +09:00
static char __percpu * perf_trace_buf [ PERF_NR_CONTEXTS ] ;
2009-09-18 06:10:28 +02:00
2010-03-23 00:08:59 +01:00
/*
* Force it to be aligned to unsigned long to avoid misaligned accesses
* suprises
*/
typedef typeof ( unsigned long [ PERF_MAX_TRACE_SIZE / sizeof ( unsigned long ) ] )
perf_trace_t ;
2009-11-22 05:26:55 +01:00
2009-09-18 06:10:28 +02:00
/* Count the events in use (per event id, not per instance) */
2010-03-05 05:35:37 +01:00
static int total_ref_count ;
2009-09-18 06:10:28 +02:00
2010-05-19 14:02:22 +02:00
static int perf_trace_event_init ( struct ftrace_event_call * tp_event ,
struct perf_event * p_event )
2009-09-18 00:54:43 +02:00
{
2010-08-11 12:47:59 +09:00
struct hlist_head __percpu * list ;
2009-09-18 06:10:28 +02:00
int ret = - ENOMEM ;
2010-05-19 14:02:22 +02:00
int cpu ;
2009-09-18 06:10:28 +02:00
2010-05-19 14:02:22 +02:00
p_event - > tp_event = tp_event ;
if ( tp_event - > perf_refcount + + > 0 )
2009-09-18 00:54:43 +02:00
return 0 ;
2010-05-19 14:02:22 +02:00
list = alloc_percpu ( struct hlist_head ) ;
if ( ! list )
goto fail ;
for_each_possible_cpu ( cpu )
INIT_HLIST_HEAD ( per_cpu_ptr ( list , cpu ) ) ;
2009-09-18 06:10:28 +02:00
2010-05-19 14:02:22 +02:00
tp_event - > perf_events = list ;
2009-09-18 00:54:43 +02:00
2010-03-05 05:35:37 +01:00
if ( ! total_ref_count ) {
2010-08-11 12:47:59 +09:00
char __percpu * buf ;
2010-05-19 10:52:27 +02:00
int i ;
2009-09-18 06:10:28 +02:00
2010-08-14 20:45:13 +02:00
for ( i = 0 ; i < PERF_NR_CONTEXTS ; i + + ) {
2010-08-11 12:47:59 +09:00
buf = ( char __percpu * ) alloc_percpu ( perf_trace_t ) ;
2010-05-19 10:52:27 +02:00
if ( ! buf )
2010-05-19 14:02:22 +02:00
goto fail ;
2009-09-18 06:10:28 +02:00
2010-05-19 14:02:22 +02:00
perf_trace_buf [ i ] = buf ;
2010-05-19 10:52:27 +02:00
}
2009-09-18 06:10:28 +02:00
}
2010-06-08 11:22:06 -04:00
ret = tp_event - > class - > reg ( tp_event , TRACE_REG_PERF_REGISTER ) ;
2010-05-19 14:02:22 +02:00
if ( ret )
goto fail ;
2009-09-18 06:10:28 +02:00
2010-05-19 14:02:22 +02:00
total_ref_count + + ;
return 0 ;
fail :
2010-03-05 05:35:37 +01:00
if ( ! total_ref_count ) {
2010-05-19 10:52:27 +02:00
int i ;
2010-08-14 20:45:13 +02:00
for ( i = 0 ; i < PERF_NR_CONTEXTS ; i + + ) {
2010-05-19 10:52:27 +02:00
free_percpu ( perf_trace_buf [ i ] ) ;
perf_trace_buf [ i ] = NULL ;
}
2009-10-03 14:55:18 +02:00
}
2010-05-19 14:02:22 +02:00
if ( ! - - tp_event - > perf_refcount ) {
free_percpu ( tp_event - > perf_events ) ;
tp_event - > perf_events = NULL ;
2009-10-03 14:55:18 +02:00
}
2009-09-18 06:10:28 +02:00
return ret ;
2009-09-18 00:54:43 +02:00
}
2010-05-19 14:02:22 +02:00
int perf_trace_init ( struct perf_event * p_event )
2009-03-19 20:26:15 +01:00
{
2010-05-19 14:02:22 +02:00
struct ftrace_event_call * tp_event ;
int event_id = p_event - > attr . config ;
2009-05-06 10:33:45 +08:00
int ret = - EINVAL ;
2009-03-19 20:26:15 +01:00
2009-05-06 10:33:45 +08:00
mutex_lock ( & event_mutex ) ;
2010-05-19 14:02:22 +02:00
list_for_each_entry ( tp_event , & ftrace_events , list ) {
2010-05-21 11:49:57 -04:00
if ( tp_event - > event . type = = event_id & &
2010-06-08 11:22:06 -04:00
tp_event - > class & & tp_event - > class - > reg & &
2010-05-19 14:02:22 +02:00
try_module_get ( tp_event - > mod ) ) {
ret = perf_trace_event_init ( tp_event , p_event ) ;
2010-09-01 12:58:43 +02:00
if ( ret )
module_put ( tp_event - > mod ) ;
2009-05-06 10:33:45 +08:00
break ;
}
2009-03-19 20:26:15 +01:00
}
2009-05-06 10:33:45 +08:00
mutex_unlock ( & event_mutex ) ;
2009-03-19 20:26:15 +01:00
2009-05-06 10:33:45 +08:00
return ret ;
2009-03-19 20:26:15 +01:00
}
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 14:37:10 +02:00
int perf_trace_add ( struct perf_event * p_event , int flags )
2009-09-18 00:54:43 +02:00
{
2010-05-19 14:02:22 +02:00
struct ftrace_event_call * tp_event = p_event - > tp_event ;
2010-08-11 12:47:59 +09:00
struct hlist_head __percpu * pcpu_list ;
2010-05-19 14:02:22 +02:00
struct hlist_head * list ;
2009-09-18 06:10:28 +02:00
2010-08-11 12:47:59 +09:00
pcpu_list = tp_event - > perf_events ;
if ( WARN_ON_ONCE ( ! pcpu_list ) )
2010-05-19 14:02:22 +02:00
return - EINVAL ;
2009-09-18 06:10:28 +02:00
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 14:37:10 +02:00
if ( ! ( flags & PERF_EF_START ) )
p_event - > hw . state = PERF_HES_STOPPED ;
2010-08-11 12:47:59 +09:00
list = this_cpu_ptr ( pcpu_list ) ;
2010-05-19 14:02:22 +02:00
hlist_add_head_rcu ( & p_event - > hlist_entry , list ) ;
2009-09-18 06:10:28 +02:00
2010-05-19 14:02:22 +02:00
return 0 ;
}
2009-09-18 06:10:28 +02:00
perf: Rework the PMU methods
Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.
The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.
This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).
It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).
The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:
1) We disable the counter:
a) the pmu has per-counter enable bits, we flip that
b) we program a NOP event, preserving the counter state
2) We store the counter state and ignore all read/overflow events
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-06-16 14:37:10 +02:00
void perf_trace_del ( struct perf_event * p_event , int flags )
2010-05-19 14:02:22 +02:00
{
hlist_del_rcu ( & p_event - > hlist_entry ) ;
2009-09-18 00:54:43 +02:00
}
2010-05-19 14:02:22 +02:00
void perf_trace_destroy ( struct perf_event * p_event )
2009-03-19 20:26:15 +01:00
{
2010-05-19 14:02:22 +02:00
struct ftrace_event_call * tp_event = p_event - > tp_event ;
int i ;
2009-03-19 20:26:15 +01:00
2010-05-21 16:22:33 +02:00
mutex_lock ( & event_mutex ) ;
2010-05-19 14:02:22 +02:00
if ( - - tp_event - > perf_refcount > 0 )
2010-05-21 16:22:33 +02:00
goto out ;
2010-05-19 14:02:22 +02:00
2010-06-08 11:22:06 -04:00
tp_event - > class - > reg ( tp_event , TRACE_REG_PERF_UNREGISTER ) ;
2010-05-19 14:02:22 +02:00
2010-05-21 12:31:09 +02:00
/*
2010-07-20 17:29:54 +02:00
* Ensure our callback won ' t be called anymore . The buffers
* will be freed after that .
2010-05-21 12:31:09 +02:00
*/
2010-07-20 17:29:54 +02:00
tracepoint_synchronize_unregister ( ) ;
2010-05-21 12:31:09 +02:00
2010-05-19 14:02:22 +02:00
free_percpu ( tp_event - > perf_events ) ;
tp_event - > perf_events = NULL ;
if ( ! - - total_ref_count ) {
2010-08-14 20:45:13 +02:00
for ( i = 0 ; i < PERF_NR_CONTEXTS ; i + + ) {
2010-05-19 14:02:22 +02:00
free_percpu ( perf_trace_buf [ i ] ) ;
perf_trace_buf [ i ] = NULL ;
2009-05-06 10:33:45 +08:00
}
2009-03-19 20:26:15 +01:00
}
2010-05-21 16:22:33 +02:00
out :
2010-09-01 12:58:43 +02:00
module_put ( tp_event - > mod ) ;
2010-05-21 16:22:33 +02:00
mutex_unlock ( & event_mutex ) ;
2009-03-19 20:26:15 +01:00
}
2010-01-28 09:32:29 +08:00
2010-03-05 05:35:37 +01:00
__kprobes void * perf_trace_buf_prepare ( int size , unsigned short type ,
2010-05-19 10:52:27 +02:00
struct pt_regs * regs , int * rctxp )
2010-01-28 09:32:29 +08:00
{
struct trace_entry * entry ;
2010-05-25 11:02:55 +02:00
unsigned long flags ;
2010-05-19 14:02:22 +02:00
char * raw_data ;
2010-05-19 10:52:27 +02:00
int pc ;
2010-01-28 09:32:29 +08:00
2010-03-23 00:08:59 +01:00
BUILD_BUG_ON ( PERF_MAX_TRACE_SIZE % sizeof ( unsigned long ) ) ;
2010-01-28 09:32:29 +08:00
pc = preempt_count ( ) ;
* rctxp = perf_swevent_get_recursion_context ( ) ;
if ( * rctxp < 0 )
2010-05-19 14:02:22 +02:00
return NULL ;
2010-01-28 09:32:29 +08:00
2010-05-21 12:31:09 +02:00
raw_data = this_cpu_ptr ( perf_trace_buf [ * rctxp ] ) ;
2010-01-28 09:32:29 +08:00
/* zero the dead bytes from align to not leak stack to user */
2010-03-23 00:08:59 +01:00
memset ( & raw_data [ size - sizeof ( u64 ) ] , 0 , sizeof ( u64 ) ) ;
2010-01-28 09:32:29 +08:00
entry = ( struct trace_entry * ) raw_data ;
2010-05-25 11:02:55 +02:00
local_save_flags ( flags ) ;
tracing_generic_entry_update ( entry , flags , pc ) ;
2010-01-28 09:32:29 +08:00
entry - > type = type ;
return raw_data ;
}
2010-03-05 05:35:37 +01:00
EXPORT_SYMBOL_GPL ( perf_trace_buf_prepare ) ;