2009-08-14 00:35:11 +04:00
/*
2009-11-04 03:12:47 +03:00
* Kprobes - based tracing events
2009-08-14 00:35:11 +04:00
*
* Created by Masami Hiramatsu < mhiramat @ redhat . com >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
# include <linux/module.h>
# include <linux/uaccess.h>
# include <linux/kprobes.h>
# include <linux/seq_file.h>
# include <linux/slab.h>
# include <linux/smp.h>
# include <linux/debugfs.h>
# include <linux/types.h>
# include <linux/string.h>
# include <linux/ctype.h>
# include <linux/ptrace.h>
2009-09-24 01:08:43 +04:00
# include <linux/perf_event.h>
2009-08-14 00:35:11 +04:00
# include "trace.h"
# include "trace_output.h"
2009-08-14 00:35:18 +04:00
# define MAX_TRACE_ARGS 128
2009-08-14 00:35:11 +04:00
# define MAX_ARGSTR_LEN 63
2009-08-14 00:35:26 +04:00
# define MAX_EVENT_NAME_LEN 64
2009-09-11 03:53:53 +04:00
# define KPROBE_EVENT_SYSTEM "kprobes"
2009-08-14 00:35:11 +04:00
2009-10-08 02:28:07 +04:00
/* Reserved field names */
2009-10-08 02:28:14 +04:00
# define FIELD_STRING_IP "__probe_ip"
# define FIELD_STRING_NARGS "__probe_nargs"
# define FIELD_STRING_RETIP "__probe_ret_ip"
# define FIELD_STRING_FUNC "__probe_func"
2009-10-08 02:28:07 +04:00
const char * reserved_field_names [ ] = {
" common_type " ,
" common_flags " ,
" common_preempt_count " ,
" common_pid " ,
" common_tgid " ,
" common_lock_depth " ,
FIELD_STRING_IP ,
FIELD_STRING_NARGS ,
FIELD_STRING_RETIP ,
FIELD_STRING_FUNC ,
} ;
2009-08-14 00:35:11 +04:00
struct fetch_func {
unsigned long ( * func ) ( struct pt_regs * , void * ) ;
void * data ;
} ;
static __kprobes unsigned long call_fetch ( struct fetch_func * f ,
struct pt_regs * regs )
{
return f - > func ( regs , f - > data ) ;
}
/* fetch handlers */
static __kprobes unsigned long fetch_register ( struct pt_regs * regs ,
void * offset )
{
return regs_get_register ( regs , ( unsigned int ) ( ( unsigned long ) offset ) ) ;
}
static __kprobes unsigned long fetch_stack ( struct pt_regs * regs ,
void * num )
{
return regs_get_kernel_stack_nth ( regs ,
( unsigned int ) ( ( unsigned long ) num ) ) ;
}
static __kprobes unsigned long fetch_memory ( struct pt_regs * regs , void * addr )
{
unsigned long retval ;
if ( probe_kernel_address ( addr , retval ) )
return 0 ;
return retval ;
}
static __kprobes unsigned long fetch_argument ( struct pt_regs * regs , void * num )
{
return regs_get_argument_nth ( regs , ( unsigned int ) ( ( unsigned long ) num ) ) ;
}
static __kprobes unsigned long fetch_retvalue ( struct pt_regs * regs ,
void * dummy )
{
return regs_return_value ( regs ) ;
}
static __kprobes unsigned long fetch_stack_address ( struct pt_regs * regs ,
void * dummy )
{
return kernel_stack_pointer ( regs ) ;
}
/* Memory fetching by symbol */
struct symbol_cache {
char * symbol ;
long offset ;
unsigned long addr ;
} ;
static unsigned long update_symbol_cache ( struct symbol_cache * sc )
{
sc - > addr = ( unsigned long ) kallsyms_lookup_name ( sc - > symbol ) ;
if ( sc - > addr )
sc - > addr + = sc - > offset ;
return sc - > addr ;
}
static void free_symbol_cache ( struct symbol_cache * sc )
{
kfree ( sc - > symbol ) ;
kfree ( sc ) ;
}
static struct symbol_cache * alloc_symbol_cache ( const char * sym , long offset )
{
struct symbol_cache * sc ;
if ( ! sym | | strlen ( sym ) = = 0 )
return NULL ;
sc = kzalloc ( sizeof ( struct symbol_cache ) , GFP_KERNEL ) ;
if ( ! sc )
return NULL ;
sc - > symbol = kstrdup ( sym , GFP_KERNEL ) ;
if ( ! sc - > symbol ) {
kfree ( sc ) ;
return NULL ;
}
sc - > offset = offset ;
update_symbol_cache ( sc ) ;
return sc ;
}
static __kprobes unsigned long fetch_symbol ( struct pt_regs * regs , void * data )
{
struct symbol_cache * sc = data ;
if ( sc - > addr )
return fetch_memory ( regs , ( void * ) sc - > addr ) ;
else
return 0 ;
}
/* Special indirect memory access interface */
struct indirect_fetch_data {
struct fetch_func orig ;
long offset ;
} ;
static __kprobes unsigned long fetch_indirect ( struct pt_regs * regs , void * data )
{
struct indirect_fetch_data * ind = data ;
unsigned long addr ;
addr = call_fetch ( & ind - > orig , regs ) ;
if ( addr ) {
addr + = ind - > offset ;
return fetch_memory ( regs , ( void * ) addr ) ;
} else
return 0 ;
}
static __kprobes void free_indirect_fetch_data ( struct indirect_fetch_data * data )
{
if ( data - > orig . func = = fetch_indirect )
free_indirect_fetch_data ( data - > orig . data ) ;
else if ( data - > orig . func = = fetch_symbol )
free_symbol_cache ( data - > orig . data ) ;
kfree ( data ) ;
}
/**
2009-11-04 03:12:47 +03:00
* Kprobe event core functions
2009-08-14 00:35:11 +04:00
*/
2009-09-11 03:53:38 +04:00
struct probe_arg {
struct fetch_func fetch ;
const char * name ;
} ;
2009-09-15 00:49:20 +04:00
/* Flags for trace_probe */
# define TP_FLAG_TRACE 1
# define TP_FLAG_PROFILE 2
2009-08-14 00:35:11 +04:00
struct trace_probe {
struct list_head list ;
2009-09-11 07:31:21 +04:00
struct kretprobe rp ; /* Use rp.kp for kprobe use */
2009-08-14 00:35:42 +04:00
unsigned long nhit ;
2009-09-15 00:49:20 +04:00
unsigned int flags ; /* For TP_FLAG_* */
2009-08-14 00:35:11 +04:00
const char * symbol ; /* symbol name */
struct ftrace_event_call call ;
2009-08-14 00:35:34 +04:00
struct trace_event event ;
2009-08-14 00:35:18 +04:00
unsigned int nr_args ;
2009-09-11 03:53:38 +04:00
struct probe_arg args [ ] ;
2009-08-14 00:35:11 +04:00
} ;
2009-08-14 00:35:18 +04:00
# define SIZEOF_TRACE_PROBE(n) \
( offsetof ( struct trace_probe , args ) + \
2009-09-11 03:53:38 +04:00
( sizeof ( struct probe_arg ) * ( n ) ) )
2009-08-14 00:35:18 +04:00
2009-08-14 00:35:11 +04:00
static __kprobes int probe_is_return ( struct trace_probe * tp )
{
2009-09-11 07:31:21 +04:00
return tp - > rp . handler ! = NULL ;
2009-08-14 00:35:11 +04:00
}
static __kprobes const char * probe_symbol ( struct trace_probe * tp )
{
return tp - > symbol ? tp - > symbol : " unknown " ;
}
2009-08-21 23:43:51 +04:00
static int probe_arg_string ( char * buf , size_t n , struct fetch_func * ff )
2009-08-14 00:35:11 +04:00
{
int ret = - EINVAL ;
if ( ff - > func = = fetch_argument )
2009-10-08 02:27:59 +04:00
ret = snprintf ( buf , n , " $arg%lu " , ( unsigned long ) ff - > data ) ;
2009-08-14 00:35:11 +04:00
else if ( ff - > func = = fetch_register ) {
const char * name ;
name = regs_query_register_name ( ( unsigned int ) ( ( long ) ff - > data ) ) ;
ret = snprintf ( buf , n , " %%%s " , name ) ;
} else if ( ff - > func = = fetch_stack )
2009-10-08 02:27:59 +04:00
ret = snprintf ( buf , n , " $stack%lu " , ( unsigned long ) ff - > data ) ;
2009-08-14 00:35:11 +04:00
else if ( ff - > func = = fetch_memory )
ret = snprintf ( buf , n , " @0x%p " , ff - > data ) ;
else if ( ff - > func = = fetch_symbol ) {
struct symbol_cache * sc = ff - > data ;
2009-11-25 11:33:15 +03:00
if ( sc - > offset )
ret = snprintf ( buf , n , " @%s%+ld " , sc - > symbol ,
sc - > offset ) ;
else
ret = snprintf ( buf , n , " @%s " , sc - > symbol ) ;
2009-08-14 00:35:11 +04:00
} else if ( ff - > func = = fetch_retvalue )
2009-10-08 02:27:59 +04:00
ret = snprintf ( buf , n , " $retval " ) ;
2009-08-14 00:35:11 +04:00
else if ( ff - > func = = fetch_stack_address )
2009-10-08 02:27:59 +04:00
ret = snprintf ( buf , n , " $stack " ) ;
2009-08-14 00:35:11 +04:00
else if ( ff - > func = = fetch_indirect ) {
struct indirect_fetch_data * id = ff - > data ;
size_t l = 0 ;
ret = snprintf ( buf , n , " %+ld( " , id - > offset ) ;
if ( ret > = n )
goto end ;
l + = ret ;
2009-08-21 23:43:51 +04:00
ret = probe_arg_string ( buf + l , n - l , & id - > orig ) ;
2009-08-14 00:35:11 +04:00
if ( ret < 0 )
goto end ;
l + = ret ;
ret = snprintf ( buf + l , n - l , " ) " ) ;
ret + = l ;
}
end :
if ( ret > = n )
return - ENOSPC ;
return ret ;
}
static int register_probe_event ( struct trace_probe * tp ) ;
static void unregister_probe_event ( struct trace_probe * tp ) ;
static DEFINE_MUTEX ( probe_lock ) ;
static LIST_HEAD ( probe_list ) ;
2009-09-15 00:49:20 +04:00
static int kprobe_dispatcher ( struct kprobe * kp , struct pt_regs * regs ) ;
static int kretprobe_dispatcher ( struct kretprobe_instance * ri ,
struct pt_regs * regs ) ;
2009-09-11 07:31:21 +04:00
/*
* Allocate new trace_probe and initialize it ( including kprobes ) .
*/
2009-09-11 03:53:53 +04:00
static struct trace_probe * alloc_trace_probe ( const char * group ,
const char * event ,
2009-09-11 07:31:21 +04:00
void * addr ,
const char * symbol ,
unsigned long offs ,
int nargs , int is_return )
2009-08-14 00:35:11 +04:00
{
struct trace_probe * tp ;
2009-08-14 00:35:18 +04:00
tp = kzalloc ( SIZEOF_TRACE_PROBE ( nargs ) , GFP_KERNEL ) ;
2009-08-14 00:35:11 +04:00
if ( ! tp )
return ERR_PTR ( - ENOMEM ) ;
if ( symbol ) {
tp - > symbol = kstrdup ( symbol , GFP_KERNEL ) ;
if ( ! tp - > symbol )
goto error ;
2009-09-11 07:31:21 +04:00
tp - > rp . kp . symbol_name = tp - > symbol ;
tp - > rp . kp . offset = offs ;
} else
tp - > rp . kp . addr = addr ;
if ( is_return )
2009-09-15 00:49:20 +04:00
tp - > rp . handler = kretprobe_dispatcher ;
2009-09-11 07:31:21 +04:00
else
2009-09-15 00:49:20 +04:00
tp - > rp . kp . pre_handler = kprobe_dispatcher ;
2009-09-11 07:31:21 +04:00
2009-08-14 00:35:26 +04:00
if ( ! event )
goto error ;
tp - > call . name = kstrdup ( event , GFP_KERNEL ) ;
if ( ! tp - > call . name )
goto error ;
2009-08-14 00:35:11 +04:00
2009-09-11 03:53:53 +04:00
if ( ! group )
goto error ;
tp - > call . system = kstrdup ( group , GFP_KERNEL ) ;
if ( ! tp - > call . system )
goto error ;
2009-08-14 00:35:11 +04:00
INIT_LIST_HEAD ( & tp - > list ) ;
return tp ;
error :
2009-09-11 03:53:53 +04:00
kfree ( tp - > call . name ) ;
2009-08-14 00:35:11 +04:00
kfree ( tp - > symbol ) ;
kfree ( tp ) ;
return ERR_PTR ( - ENOMEM ) ;
}
2009-09-11 03:53:38 +04:00
static void free_probe_arg ( struct probe_arg * arg )
{
if ( arg - > fetch . func = = fetch_symbol )
free_symbol_cache ( arg - > fetch . data ) ;
else if ( arg - > fetch . func = = fetch_indirect )
free_indirect_fetch_data ( arg - > fetch . data ) ;
kfree ( arg - > name ) ;
}
2009-08-14 00:35:11 +04:00
static void free_trace_probe ( struct trace_probe * tp )
{
int i ;
for ( i = 0 ; i < tp - > nr_args ; i + + )
2009-09-11 03:53:38 +04:00
free_probe_arg ( & tp - > args [ i ] ) ;
2009-08-14 00:35:11 +04:00
2009-09-11 03:53:53 +04:00
kfree ( tp - > call . system ) ;
2009-08-14 00:35:11 +04:00
kfree ( tp - > call . name ) ;
kfree ( tp - > symbol ) ;
kfree ( tp ) ;
}
2009-10-27 23:42:44 +03:00
static struct trace_probe * find_probe_event ( const char * event ,
const char * group )
2009-08-14 00:35:11 +04:00
{
struct trace_probe * tp ;
list_for_each_entry ( tp , & probe_list , list )
2009-10-27 23:42:44 +03:00
if ( strcmp ( tp - > call . name , event ) = = 0 & &
strcmp ( tp - > call . system , group ) = = 0 )
2009-08-14 00:35:11 +04:00
return tp ;
return NULL ;
}
2009-09-15 00:48:56 +04:00
/* Unregister a trace_probe and probe_event: call with locking probe_lock */
static void unregister_trace_probe ( struct trace_probe * tp )
2009-08-14 00:35:11 +04:00
{
if ( probe_is_return ( tp ) )
unregister_kretprobe ( & tp - > rp ) ;
else
2009-09-11 07:31:21 +04:00
unregister_kprobe ( & tp - > rp . kp ) ;
2009-08-14 00:35:11 +04:00
list_del ( & tp - > list ) ;
2009-09-15 00:48:56 +04:00
unregister_probe_event ( tp ) ;
2009-08-14 00:35:11 +04:00
}
/* Register a trace_probe and probe_event */
static int register_trace_probe ( struct trace_probe * tp )
{
struct trace_probe * old_tp ;
int ret ;
mutex_lock ( & probe_lock ) ;
2009-09-15 00:48:56 +04:00
/* register as an event */
2009-10-27 23:42:44 +03:00
old_tp = find_probe_event ( tp - > call . name , tp - > call . system ) ;
2009-09-15 00:48:56 +04:00
if ( old_tp ) {
/* delete old event */
unregister_trace_probe ( old_tp ) ;
free_trace_probe ( old_tp ) ;
}
ret = register_probe_event ( tp ) ;
if ( ret ) {
pr_warning ( " Faild to register probe event(%d) \n " , ret ) ;
goto end ;
}
2009-09-15 00:49:37 +04:00
tp - > rp . kp . flags | = KPROBE_FLAG_DISABLED ;
2009-08-14 00:35:11 +04:00
if ( probe_is_return ( tp ) )
ret = register_kretprobe ( & tp - > rp ) ;
else
2009-09-11 07:31:21 +04:00
ret = register_kprobe ( & tp - > rp . kp ) ;
2009-08-14 00:35:11 +04:00
if ( ret ) {
pr_warning ( " Could not insert probe(%d) \n " , ret ) ;
if ( ret = = - EILSEQ ) {
pr_warning ( " Probing address(0x%p) is not an "
" instruction boundary. \n " ,
2009-09-11 07:31:21 +04:00
tp - > rp . kp . addr ) ;
2009-08-14 00:35:11 +04:00
ret = - EINVAL ;
}
2009-09-15 00:48:56 +04:00
unregister_probe_event ( tp ) ;
} else
list_add_tail ( & tp - > list , & probe_list ) ;
2009-08-14 00:35:11 +04:00
end :
mutex_unlock ( & probe_lock ) ;
return ret ;
}
/* Split symbol and offset. */
2009-09-11 03:53:14 +04:00
static int split_symbol_offset ( char * symbol , unsigned long * offset )
2009-08-14 00:35:11 +04:00
{
char * tmp ;
int ret ;
if ( ! offset )
return - EINVAL ;
tmp = strchr ( symbol , ' + ' ) ;
if ( tmp ) {
/* skip sign because strict_strtol doesn't accept '+' */
2009-09-11 03:53:14 +04:00
ret = strict_strtoul ( tmp + 1 , 0 , offset ) ;
2009-08-14 00:35:11 +04:00
if ( ret )
return ret ;
* tmp = ' \0 ' ;
} else
* offset = 0 ;
return 0 ;
}
# define PARAM_MAX_ARGS 16
# define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
2009-10-08 02:27:40 +04:00
static int parse_probe_vars ( char * arg , struct fetch_func * ff , int is_return )
2009-08-14 00:35:11 +04:00
{
int ret = 0 ;
unsigned long param ;
2009-10-08 02:27:59 +04:00
if ( strcmp ( arg , " retval " ) = = 0 ) {
if ( is_return ) {
2009-08-14 00:35:11 +04:00
ff - > func = fetch_retvalue ;
ff - > data = NULL ;
} else
ret = - EINVAL ;
2009-10-08 02:27:59 +04:00
} else if ( strncmp ( arg , " stack " , 5 ) = = 0 ) {
if ( arg [ 5 ] = = ' \0 ' ) {
2009-08-14 00:35:11 +04:00
ff - > func = fetch_stack_address ;
ff - > data = NULL ;
2009-10-08 02:27:59 +04:00
} else if ( isdigit ( arg [ 5 ] ) ) {
ret = strict_strtoul ( arg + 5 , 10 , & param ) ;
2009-08-14 00:35:11 +04:00
if ( ret | | param > PARAM_MAX_STACK )
ret = - EINVAL ;
else {
ff - > func = fetch_stack ;
ff - > data = ( void * ) param ;
}
2009-10-08 02:27:59 +04:00
} else
ret = - EINVAL ;
} else if ( strncmp ( arg , " arg " , 3 ) = = 0 & & isdigit ( arg [ 3 ] ) ) {
ret = strict_strtoul ( arg + 3 , 10 , & param ) ;
if ( ret | | param > PARAM_MAX_ARGS )
ret = - EINVAL ;
else {
ff - > func = fetch_argument ;
ff - > data = ( void * ) param ;
2009-08-14 00:35:11 +04:00
}
2009-10-08 02:27:59 +04:00
} else
2009-10-08 02:27:40 +04:00
ret = - EINVAL ;
return ret ;
}
2009-12-01 03:19:20 +03:00
/* Recursive argument parser */
static int __parse_probe_arg ( char * arg , struct fetch_func * ff , int is_return )
2009-10-08 02:27:40 +04:00
{
int ret = 0 ;
unsigned long param ;
long offset ;
char * tmp ;
switch ( arg [ 0 ] ) {
case ' $ ' :
ret = parse_probe_vars ( arg + 1 , ff , is_return ) ;
break ;
case ' % ' : /* named register */
ret = regs_query_register_offset ( arg + 1 ) ;
if ( ret > = 0 ) {
ff - > func = fetch_register ;
ff - > data = ( void * ) ( unsigned long ) ret ;
ret = 0 ;
}
break ;
2009-08-14 00:35:11 +04:00
case ' @ ' : /* memory or symbol */
if ( isdigit ( arg [ 1 ] ) ) {
ret = strict_strtoul ( arg + 1 , 0 , & param ) ;
if ( ret )
break ;
ff - > func = fetch_memory ;
ff - > data = ( void * ) param ;
} else {
ret = split_symbol_offset ( arg + 1 , & offset ) ;
if ( ret )
break ;
2009-10-08 02:27:40 +04:00
ff - > data = alloc_symbol_cache ( arg + 1 , offset ) ;
2009-08-14 00:35:11 +04:00
if ( ff - > data )
ff - > func = fetch_symbol ;
else
ret = - EINVAL ;
}
break ;
case ' + ' : /* indirect memory */
case ' - ' :
tmp = strchr ( arg , ' ( ' ) ;
if ( ! tmp ) {
ret = - EINVAL ;
break ;
}
* tmp = ' \0 ' ;
ret = strict_strtol ( arg + 1 , 0 , & offset ) ;
if ( ret )
break ;
if ( arg [ 0 ] = = ' - ' )
offset = - offset ;
arg = tmp + 1 ;
tmp = strrchr ( arg , ' ) ' ) ;
if ( tmp ) {
struct indirect_fetch_data * id ;
* tmp = ' \0 ' ;
id = kzalloc ( sizeof ( struct indirect_fetch_data ) ,
GFP_KERNEL ) ;
if ( ! id )
return - ENOMEM ;
id - > offset = offset ;
2009-12-01 03:19:20 +03:00
ret = __parse_probe_arg ( arg , & id - > orig , is_return ) ;
2009-08-14 00:35:11 +04:00
if ( ret )
kfree ( id ) ;
else {
ff - > func = fetch_indirect ;
ff - > data = ( void * ) id ;
}
} else
ret = - EINVAL ;
break ;
default :
/* TODO: support custom handler */
ret = - EINVAL ;
}
return ret ;
}
2009-12-01 03:19:20 +03:00
/* String length checking wrapper */
static int parse_probe_arg ( char * arg , struct fetch_func * ff , int is_return )
{
if ( strlen ( arg ) > MAX_ARGSTR_LEN ) {
pr_info ( " Argument is too long.: %s \n " , arg ) ;
return - ENOSPC ;
}
return __parse_probe_arg ( arg , ff , is_return ) ;
}
2009-10-08 02:28:07 +04:00
/* Return 1 if name is reserved or already used by another argument */
static int conflict_field_name ( const char * name ,
struct probe_arg * args , int narg )
{
int i ;
for ( i = 0 ; i < ARRAY_SIZE ( reserved_field_names ) ; i + + )
if ( strcmp ( reserved_field_names [ i ] , name ) = = 0 )
return 1 ;
for ( i = 0 ; i < narg ; i + + )
if ( strcmp ( args [ i ] . name , name ) = = 0 )
return 1 ;
return 0 ;
}
2009-08-14 00:35:11 +04:00
static int create_trace_probe ( int argc , char * * argv )
{
/*
* Argument syntax :
2009-09-11 03:53:53 +04:00
* - Add kprobe : p [ : [ GRP / ] EVENT ] KSYM [ + OFFS ] | KADDR [ FETCHARGS ]
* - Add kretprobe : r [ : [ GRP / ] EVENT ] KSYM [ + 0 ] [ FETCHARGS ]
2009-08-14 00:35:11 +04:00
* Fetch args :
2009-10-08 02:27:59 +04:00
* $ argN : fetch Nth of function argument . ( N : 0 - )
* $ retval : fetch return value
* $ stack : fetch stack address
* $ stackN : fetch Nth of stack ( N : 0 - )
2009-08-14 00:35:11 +04:00
* @ ADDR : fetch memory at ADDR ( ADDR should be in kernel )
* @ SYM [ + | - offs ] : fetch memory at SYM + | - offs ( SYM is a data symbol )
* % REG : fetch register REG
* Indirect memory fetch :
* + | - offs ( ARG ) : fetch memory at ARG + | - offs address .
2009-09-11 03:53:38 +04:00
* Alias name of args :
* NAME = FETCHARG : set NAME as alias of FETCHARG .
2009-08-14 00:35:11 +04:00
*/
struct trace_probe * tp ;
int i , ret = 0 ;
int is_return = 0 ;
2009-09-11 03:53:53 +04:00
char * symbol = NULL , * event = NULL , * arg = NULL , * group = NULL ;
2009-09-11 03:53:14 +04:00
unsigned long offset = 0 ;
2009-08-14 00:35:11 +04:00
void * addr = NULL ;
2009-09-11 07:31:21 +04:00
char buf [ MAX_EVENT_NAME_LEN ] ;
2009-08-14 00:35:11 +04:00
2009-10-17 04:07:28 +04:00
if ( argc < 2 ) {
pr_info ( " Probe point is not specified. \n " ) ;
2009-08-14 00:35:11 +04:00
return - EINVAL ;
2009-10-17 04:07:28 +04:00
}
2009-08-14 00:35:11 +04:00
if ( argv [ 0 ] [ 0 ] = = ' p ' )
is_return = 0 ;
else if ( argv [ 0 ] [ 0 ] = = ' r ' )
is_return = 1 ;
2009-10-17 04:07:28 +04:00
else {
pr_info ( " Probe definition must be started with 'p' or 'r'. \n " ) ;
2009-08-14 00:35:11 +04:00
return - EINVAL ;
2009-10-17 04:07:28 +04:00
}
2009-08-14 00:35:11 +04:00
if ( argv [ 0 ] [ 1 ] = = ' : ' ) {
event = & argv [ 0 ] [ 2 ] ;
2009-09-11 03:53:53 +04:00
if ( strchr ( event , ' / ' ) ) {
group = event ;
event = strchr ( group , ' / ' ) + 1 ;
event [ - 1 ] = ' \0 ' ;
if ( strlen ( group ) = = 0 ) {
pr_info ( " Group name is not specifiled \n " ) ;
return - EINVAL ;
}
}
2009-08-14 00:35:11 +04:00
if ( strlen ( event ) = = 0 ) {
pr_info ( " Event name is not specifiled \n " ) ;
return - EINVAL ;
}
}
if ( isdigit ( argv [ 1 ] [ 0 ] ) ) {
2009-10-17 04:07:28 +04:00
if ( is_return ) {
pr_info ( " Return probe point must be a symbol. \n " ) ;
2009-08-14 00:35:11 +04:00
return - EINVAL ;
2009-10-17 04:07:28 +04:00
}
2009-08-14 00:35:11 +04:00
/* an address specified */
ret = strict_strtoul ( & argv [ 0 ] [ 2 ] , 0 , ( unsigned long * ) & addr ) ;
2009-10-17 04:07:28 +04:00
if ( ret ) {
pr_info ( " Failed to parse address. \n " ) ;
2009-08-14 00:35:11 +04:00
return ret ;
2009-10-17 04:07:28 +04:00
}
2009-08-14 00:35:11 +04:00
} else {
/* a symbol specified */
symbol = argv [ 1 ] ;
/* TODO: support .init module functions */
ret = split_symbol_offset ( symbol , & offset ) ;
2009-10-17 04:07:28 +04:00
if ( ret ) {
pr_info ( " Failed to parse symbol. \n " ) ;
2009-08-14 00:35:11 +04:00
return ret ;
2009-10-17 04:07:28 +04:00
}
if ( offset & & is_return ) {
pr_info ( " Return probe must be used without offset. \n " ) ;
2009-08-14 00:35:11 +04:00
return - EINVAL ;
2009-10-17 04:07:28 +04:00
}
2009-08-14 00:35:11 +04:00
}
2009-08-14 00:35:18 +04:00
argc - = 2 ; argv + = 2 ;
2009-08-14 00:35:11 +04:00
/* setup a probe */
2009-09-11 03:53:53 +04:00
if ( ! group )
group = KPROBE_EVENT_SYSTEM ;
2009-08-14 00:35:26 +04:00
if ( ! event ) {
/* Make a new event name */
if ( symbol )
snprintf ( buf , MAX_EVENT_NAME_LEN , " %c@%s%+ld " ,
is_return ? ' r ' : ' p ' , symbol , offset ) ;
else
snprintf ( buf , MAX_EVENT_NAME_LEN , " %c@0x%p " ,
is_return ? ' r ' : ' p ' , addr ) ;
2009-09-11 07:31:21 +04:00
event = buf ;
}
2009-09-11 03:53:53 +04:00
tp = alloc_trace_probe ( group , event , addr , symbol , offset , argc ,
is_return ) ;
2009-10-17 04:07:28 +04:00
if ( IS_ERR ( tp ) ) {
pr_info ( " Failed to allocate trace_probe.(%d) \n " ,
( int ) PTR_ERR ( tp ) ) ;
2009-08-14 00:35:11 +04:00
return PTR_ERR ( tp ) ;
2009-10-17 04:07:28 +04:00
}
2009-08-14 00:35:11 +04:00
/* parse arguments */
2009-08-14 00:35:18 +04:00
ret = 0 ;
for ( i = 0 ; i < argc & & i < MAX_TRACE_ARGS ; i + + ) {
2009-09-11 03:53:38 +04:00
/* Parse argument name */
arg = strchr ( argv [ i ] , ' = ' ) ;
if ( arg )
* arg + + = ' \0 ' ;
else
arg = argv [ i ] ;
2009-10-08 02:28:07 +04:00
if ( conflict_field_name ( argv [ i ] , tp - > args , i ) ) {
2009-10-17 04:07:28 +04:00
pr_info ( " Argument%d name '%s' conflicts with "
" another field. \n " , i , argv [ i ] ) ;
2009-10-08 02:28:07 +04:00
ret = - EINVAL ;
goto error ;
}
2009-09-11 03:53:38 +04:00
tp - > args [ i ] . name = kstrdup ( argv [ i ] , GFP_KERNEL ) ;
2009-12-01 03:19:20 +03:00
if ( ! tp - > args [ i ] . name ) {
pr_info ( " Failed to allocate argument%d name '%s'. \n " ,
i , argv [ i ] ) ;
ret = - ENOMEM ;
2009-08-14 00:35:11 +04:00
goto error ;
}
2009-12-01 03:19:20 +03:00
/* Parse fetch argument */
2009-09-11 03:53:38 +04:00
ret = parse_probe_arg ( arg , & tp - > args [ i ] . fetch , is_return ) ;
2009-10-17 04:07:28 +04:00
if ( ret ) {
pr_info ( " Parse error at argument%d. (%d) \n " , i , ret ) ;
2009-11-25 11:32:21 +03:00
kfree ( tp - > args [ i ] . name ) ;
2009-08-14 00:35:11 +04:00
goto error ;
2009-10-17 04:07:28 +04:00
}
2009-11-25 11:32:21 +03:00
tp - > nr_args + + ;
2009-08-14 00:35:11 +04:00
}
ret = register_trace_probe ( tp ) ;
if ( ret )
goto error ;
return 0 ;
error :
free_trace_probe ( tp ) ;
return ret ;
}
static void cleanup_all_probes ( void )
{
struct trace_probe * tp ;
mutex_lock ( & probe_lock ) ;
/* TODO: Use batch unregistration */
while ( ! list_empty ( & probe_list ) ) {
tp = list_entry ( probe_list . next , struct trace_probe , list ) ;
unregister_trace_probe ( tp ) ;
free_trace_probe ( tp ) ;
}
mutex_unlock ( & probe_lock ) ;
}
/* Probes listing interfaces */
static void * probes_seq_start ( struct seq_file * m , loff_t * pos )
{
mutex_lock ( & probe_lock ) ;
return seq_list_start ( & probe_list , * pos ) ;
}
static void * probes_seq_next ( struct seq_file * m , void * v , loff_t * pos )
{
return seq_list_next ( v , & probe_list , pos ) ;
}
static void probes_seq_stop ( struct seq_file * m , void * v )
{
mutex_unlock ( & probe_lock ) ;
}
static int probes_seq_show ( struct seq_file * m , void * v )
{
struct trace_probe * tp = v ;
int i , ret ;
char buf [ MAX_ARGSTR_LEN + 1 ] ;
seq_printf ( m , " %c " , probe_is_return ( tp ) ? ' r ' : ' p ' ) ;
2009-11-25 11:32:47 +03:00
seq_printf ( m , " :%s/%s " , tp - > call . system , tp - > call . name ) ;
2009-08-14 00:35:11 +04:00
2009-11-25 11:33:15 +03:00
if ( ! tp - > symbol )
seq_printf ( m , " 0x%p " , tp - > rp . kp . addr ) ;
else if ( tp - > rp . kp . offset )
2009-09-11 07:31:21 +04:00
seq_printf ( m , " %s+%u " , probe_symbol ( tp ) , tp - > rp . kp . offset ) ;
2009-08-14 00:35:11 +04:00
else
2009-11-25 11:33:15 +03:00
seq_printf ( m , " %s " , probe_symbol ( tp ) ) ;
2009-08-14 00:35:11 +04:00
for ( i = 0 ; i < tp - > nr_args ; i + + ) {
2009-09-11 03:53:38 +04:00
ret = probe_arg_string ( buf , MAX_ARGSTR_LEN , & tp - > args [ i ] . fetch ) ;
2009-08-14 00:35:11 +04:00
if ( ret < 0 ) {
pr_warning ( " Argument%d decoding error(%d). \n " , i , ret ) ;
return ret ;
}
2009-09-11 03:53:38 +04:00
seq_printf ( m , " %s=%s " , tp - > args [ i ] . name , buf ) ;
2009-08-14 00:35:11 +04:00
}
seq_printf ( m , " \n " ) ;
return 0 ;
}
static const struct seq_operations probes_seq_op = {
. start = probes_seq_start ,
. next = probes_seq_next ,
. stop = probes_seq_stop ,
. show = probes_seq_show
} ;
static int probes_open ( struct inode * inode , struct file * file )
{
if ( ( file - > f_mode & FMODE_WRITE ) & &
( file - > f_flags & O_TRUNC ) )
cleanup_all_probes ( ) ;
return seq_open ( file , & probes_seq_op ) ;
}
static int command_trace_probe ( const char * buf )
{
char * * argv ;
int argc = 0 , ret = 0 ;
argv = argv_split ( GFP_KERNEL , buf , & argc ) ;
if ( ! argv )
return - ENOMEM ;
if ( argc )
ret = create_trace_probe ( argc , argv ) ;
argv_free ( argv ) ;
return ret ;
}
# define WRITE_BUFSIZE 128
static ssize_t probes_write ( struct file * file , const char __user * buffer ,
size_t count , loff_t * ppos )
{
char * kbuf , * tmp ;
int ret ;
size_t done ;
size_t size ;
kbuf = kmalloc ( WRITE_BUFSIZE , GFP_KERNEL ) ;
if ( ! kbuf )
return - ENOMEM ;
ret = done = 0 ;
while ( done < count ) {
size = count - done ;
if ( size > = WRITE_BUFSIZE )
size = WRITE_BUFSIZE - 1 ;
if ( copy_from_user ( kbuf , buffer + done , size ) ) {
ret = - EFAULT ;
goto out ;
}
kbuf [ size ] = ' \0 ' ;
tmp = strchr ( kbuf , ' \n ' ) ;
if ( tmp ) {
* tmp = ' \0 ' ;
size = tmp - kbuf + 1 ;
} else if ( done + size < count ) {
pr_warning ( " Line length is too long: "
" Should be less than %d. " , WRITE_BUFSIZE ) ;
ret = - EINVAL ;
goto out ;
}
done + = size ;
/* Remove comments */
tmp = strchr ( kbuf , ' # ' ) ;
if ( tmp )
* tmp = ' \0 ' ;
ret = command_trace_probe ( kbuf ) ;
if ( ret )
goto out ;
}
ret = done ;
out :
kfree ( kbuf ) ;
return ret ;
}
static const struct file_operations kprobe_events_ops = {
. owner = THIS_MODULE ,
. open = probes_open ,
. read = seq_read ,
. llseek = seq_lseek ,
. release = seq_release ,
. write = probes_write ,
} ;
2009-08-14 00:35:42 +04:00
/* Probes profiling interfaces */
static int probes_profile_seq_show ( struct seq_file * m , void * v )
{
struct trace_probe * tp = v ;
seq_printf ( m , " %-44s %15lu %15lu \n " , tp - > call . name , tp - > nhit ,
2009-09-11 07:31:21 +04:00
tp - > rp . kp . nmissed ) ;
2009-08-14 00:35:42 +04:00
return 0 ;
}
static const struct seq_operations profile_seq_op = {
. start = probes_seq_start ,
. next = probes_seq_next ,
. stop = probes_seq_stop ,
. show = probes_profile_seq_show
} ;
static int profile_open ( struct inode * inode , struct file * file )
{
return seq_open ( file , & profile_seq_op ) ;
}
static const struct file_operations kprobe_profile_ops = {
. owner = THIS_MODULE ,
. open = profile_open ,
. read = seq_read ,
. llseek = seq_lseek ,
. release = seq_release ,
} ;
2009-08-14 00:35:11 +04:00
/* Kprobe handler */
static __kprobes int kprobe_trace_func ( struct kprobe * kp , struct pt_regs * regs )
{
2009-09-11 07:31:21 +04:00
struct trace_probe * tp = container_of ( kp , struct trace_probe , rp . kp ) ;
2009-08-14 00:35:11 +04:00
struct kprobe_trace_entry * entry ;
struct ring_buffer_event * event ;
2009-09-11 03:09:23 +04:00
struct ring_buffer * buffer ;
2009-08-14 00:35:11 +04:00
int size , i , pc ;
unsigned long irq_flags ;
2009-08-14 00:35:26 +04:00
struct ftrace_event_call * call = & tp - > call ;
2009-08-14 00:35:11 +04:00
2009-08-14 00:35:42 +04:00
tp - > nhit + + ;
2009-08-14 00:35:11 +04:00
local_save_flags ( irq_flags ) ;
pc = preempt_count ( ) ;
size = SIZEOF_KPROBE_TRACE_ENTRY ( tp - > nr_args ) ;
2009-09-11 03:09:23 +04:00
event = trace_current_buffer_lock_reserve ( & buffer , call - > id , size ,
2009-08-14 00:35:11 +04:00
irq_flags , pc ) ;
if ( ! event )
return 0 ;
entry = ring_buffer_event_data ( event ) ;
entry - > nargs = tp - > nr_args ;
entry - > ip = ( unsigned long ) kp - > addr ;
for ( i = 0 ; i < tp - > nr_args ; i + + )
2009-09-11 03:53:38 +04:00
entry - > args [ i ] = call_fetch ( & tp - > args [ i ] . fetch , regs ) ;
2009-08-14 00:35:11 +04:00
2009-09-11 03:09:23 +04:00
if ( ! filter_current_check_discard ( buffer , call , entry , event ) )
trace_nowake_buffer_unlock_commit ( buffer , event , irq_flags , pc ) ;
2009-08-14 00:35:11 +04:00
return 0 ;
}
/* Kretprobe handler */
static __kprobes int kretprobe_trace_func ( struct kretprobe_instance * ri ,
struct pt_regs * regs )
{
struct trace_probe * tp = container_of ( ri - > rp , struct trace_probe , rp ) ;
struct kretprobe_trace_entry * entry ;
struct ring_buffer_event * event ;
2009-09-11 03:09:23 +04:00
struct ring_buffer * buffer ;
2009-08-14 00:35:11 +04:00
int size , i , pc ;
unsigned long irq_flags ;
2009-08-14 00:35:26 +04:00
struct ftrace_event_call * call = & tp - > call ;
2009-08-14 00:35:11 +04:00
local_save_flags ( irq_flags ) ;
pc = preempt_count ( ) ;
size = SIZEOF_KRETPROBE_TRACE_ENTRY ( tp - > nr_args ) ;
2009-09-11 03:09:23 +04:00
event = trace_current_buffer_lock_reserve ( & buffer , call - > id , size ,
2009-08-14 00:35:11 +04:00
irq_flags , pc ) ;
if ( ! event )
return 0 ;
entry = ring_buffer_event_data ( event ) ;
entry - > nargs = tp - > nr_args ;
2009-09-11 07:31:21 +04:00
entry - > func = ( unsigned long ) tp - > rp . kp . addr ;
2009-08-14 00:35:11 +04:00
entry - > ret_ip = ( unsigned long ) ri - > ret_addr ;
for ( i = 0 ; i < tp - > nr_args ; i + + )
2009-09-11 03:53:38 +04:00
entry - > args [ i ] = call_fetch ( & tp - > args [ i ] . fetch , regs ) ;
2009-08-14 00:35:11 +04:00
2009-09-11 03:09:23 +04:00
if ( ! filter_current_check_discard ( buffer , call , entry , event ) )
trace_nowake_buffer_unlock_commit ( buffer , event , irq_flags , pc ) ;
2009-08-14 00:35:11 +04:00
return 0 ;
}
/* Event entry printers */
enum print_line_t
print_kprobe_event ( struct trace_iterator * iter , int flags )
{
struct kprobe_trace_entry * field ;
struct trace_seq * s = & iter - > seq ;
2009-09-11 03:53:38 +04:00
struct trace_event * event ;
struct trace_probe * tp ;
2009-08-14 00:35:11 +04:00
int i ;
2009-08-14 00:35:34 +04:00
field = ( struct kprobe_trace_entry * ) iter - > ent ;
2009-09-11 03:53:38 +04:00
event = ftrace_find_event ( field - > ent . type ) ;
tp = container_of ( event , struct trace_probe , event ) ;
2009-08-14 00:35:11 +04:00
2009-09-11 03:53:45 +04:00
if ( ! trace_seq_printf ( s , " %s: ( " , tp - > call . name ) )
goto partial ;
2009-08-14 00:35:11 +04:00
if ( ! seq_print_ip_sym ( s , field - > ip , flags | TRACE_ITER_SYM_OFFSET ) )
goto partial ;
2009-09-11 03:53:45 +04:00
if ( ! trace_seq_puts ( s , " ) " ) )
2009-08-14 00:35:11 +04:00
goto partial ;
for ( i = 0 ; i < field - > nargs ; i + + )
2009-09-11 03:53:38 +04:00
if ( ! trace_seq_printf ( s , " %s=%lx " ,
tp - > args [ i ] . name , field - > args [ i ] ) )
2009-08-14 00:35:11 +04:00
goto partial ;
if ( ! trace_seq_puts ( s , " \n " ) )
goto partial ;
return TRACE_TYPE_HANDLED ;
partial :
return TRACE_TYPE_PARTIAL_LINE ;
}
enum print_line_t
print_kretprobe_event ( struct trace_iterator * iter , int flags )
{
struct kretprobe_trace_entry * field ;
struct trace_seq * s = & iter - > seq ;
2009-09-11 03:53:38 +04:00
struct trace_event * event ;
struct trace_probe * tp ;
2009-08-14 00:35:11 +04:00
int i ;
2009-08-14 00:35:34 +04:00
field = ( struct kretprobe_trace_entry * ) iter - > ent ;
2009-09-11 03:53:38 +04:00
event = ftrace_find_event ( field - > ent . type ) ;
tp = container_of ( event , struct trace_probe , event ) ;
2009-08-14 00:35:11 +04:00
2009-09-11 03:53:45 +04:00
if ( ! trace_seq_printf ( s , " %s: ( " , tp - > call . name ) )
goto partial ;
2009-08-14 00:35:11 +04:00
if ( ! seq_print_ip_sym ( s , field - > ret_ip , flags | TRACE_ITER_SYM_OFFSET ) )
goto partial ;
if ( ! trace_seq_puts ( s , " <- " ) )
goto partial ;
if ( ! seq_print_ip_sym ( s , field - > func , flags & ~ TRACE_ITER_SYM_OFFSET ) )
goto partial ;
2009-09-11 03:53:45 +04:00
if ( ! trace_seq_puts ( s , " ) " ) )
2009-08-14 00:35:11 +04:00
goto partial ;
for ( i = 0 ; i < field - > nargs ; i + + )
2009-09-11 03:53:38 +04:00
if ( ! trace_seq_printf ( s , " %s=%lx " ,
tp - > args [ i ] . name , field - > args [ i ] ) )
2009-08-14 00:35:11 +04:00
goto partial ;
if ( ! trace_seq_puts ( s , " \n " ) )
goto partial ;
return TRACE_TYPE_HANDLED ;
partial :
return TRACE_TYPE_PARTIAL_LINE ;
}
static int probe_event_enable ( struct ftrace_event_call * call )
{
struct trace_probe * tp = ( struct trace_probe * ) call - > data ;
2009-09-15 00:49:20 +04:00
tp - > flags | = TP_FLAG_TRACE ;
if ( probe_is_return ( tp ) )
2009-08-14 00:35:11 +04:00
return enable_kretprobe ( & tp - > rp ) ;
2009-09-15 00:49:20 +04:00
else
2009-09-11 07:31:21 +04:00
return enable_kprobe ( & tp - > rp . kp ) ;
2009-08-14 00:35:11 +04:00
}
static void probe_event_disable ( struct ftrace_event_call * call )
{
struct trace_probe * tp = ( struct trace_probe * ) call - > data ;
2009-09-15 00:49:20 +04:00
tp - > flags & = ~ TP_FLAG_TRACE ;
if ( ! ( tp - > flags & ( TP_FLAG_TRACE | TP_FLAG_PROFILE ) ) ) {
if ( probe_is_return ( tp ) )
disable_kretprobe ( & tp - > rp ) ;
else
disable_kprobe ( & tp - > rp . kp ) ;
}
2009-08-14 00:35:11 +04:00
}
static int probe_event_raw_init ( struct ftrace_event_call * event_call )
{
INIT_LIST_HEAD ( & event_call - > fields ) ;
2009-09-11 03:09:23 +04:00
2009-08-14 00:35:11 +04:00
return 0 ;
}
# undef DEFINE_FIELD
# define DEFINE_FIELD(type, item, name, is_signed) \
do { \
ret = trace_define_field ( event_call , # type , name , \
offsetof ( typeof ( field ) , item ) , \
sizeof ( field . item ) , is_signed , \
FILTER_OTHER ) ; \
if ( ret ) \
return ret ; \
} while ( 0 )
static int kprobe_event_define_fields ( struct ftrace_event_call * event_call )
{
int ret , i ;
struct kprobe_trace_entry field ;
struct trace_probe * tp = ( struct trace_probe * ) event_call - > data ;
ret = trace_define_common_fields ( event_call ) ;
if ( ! ret )
return ret ;
2009-10-08 02:28:07 +04:00
DEFINE_FIELD ( unsigned long , ip , FIELD_STRING_IP , 0 ) ;
DEFINE_FIELD ( int , nargs , FIELD_STRING_NARGS , 1 ) ;
2009-09-11 03:53:38 +04:00
/* Set argument names as fields */
for ( i = 0 ; i < tp - > nr_args ; i + + )
DEFINE_FIELD ( unsigned long , args [ i ] , tp - > args [ i ] . name , 0 ) ;
2009-08-14 00:35:11 +04:00
return 0 ;
}
static int kretprobe_event_define_fields ( struct ftrace_event_call * event_call )
{
int ret , i ;
struct kretprobe_trace_entry field ;
struct trace_probe * tp = ( struct trace_probe * ) event_call - > data ;
ret = trace_define_common_fields ( event_call ) ;
if ( ! ret )
return ret ;
2009-10-08 02:28:07 +04:00
DEFINE_FIELD ( unsigned long , func , FIELD_STRING_FUNC , 0 ) ;
DEFINE_FIELD ( unsigned long , ret_ip , FIELD_STRING_RETIP , 0 ) ;
DEFINE_FIELD ( int , nargs , FIELD_STRING_NARGS , 1 ) ;
2009-09-11 03:53:38 +04:00
/* Set argument names as fields */
for ( i = 0 ; i < tp - > nr_args ; i + + )
DEFINE_FIELD ( unsigned long , args [ i ] , tp - > args [ i ] . name , 0 ) ;
2009-08-14 00:35:11 +04:00
return 0 ;
}
static int __probe_event_show_format ( struct trace_seq * s ,
struct trace_probe * tp , const char * fmt ,
const char * arg )
{
2009-09-11 03:53:38 +04:00
int i ;
2009-08-14 00:35:11 +04:00
/* Show format */
if ( ! trace_seq_printf ( s , " \n print fmt: \" %s " , fmt ) )
return 0 ;
for ( i = 0 ; i < tp - > nr_args ; i + + )
2009-09-11 03:53:38 +04:00
if ( ! trace_seq_printf ( s , " %s=%%lx " , tp - > args [ i ] . name ) )
2009-08-14 00:35:11 +04:00
return 0 ;
if ( ! trace_seq_printf ( s , " \" , %s " , arg ) )
return 0 ;
for ( i = 0 ; i < tp - > nr_args ; i + + )
2009-09-11 03:53:38 +04:00
if ( ! trace_seq_printf ( s , " , REC->%s " , tp - > args [ i ] . name ) )
2009-08-14 00:35:11 +04:00
return 0 ;
return trace_seq_puts ( s , " \n " ) ;
}
# undef SHOW_FIELD
# define SHOW_FIELD(type, item, name) \
do { \
ret = trace_seq_printf ( s , " \t field: " # type " %s; \t " \
2009-08-21 23:43:43 +04:00
" offset:%u; \t size:%u; \n " , name , \
2009-08-14 00:35:11 +04:00
( unsigned int ) offsetof ( typeof ( field ) , item ) , \
( unsigned int ) sizeof ( type ) ) ; \
if ( ! ret ) \
return 0 ; \
} while ( 0 )
static int kprobe_event_show_format ( struct ftrace_event_call * call ,
struct trace_seq * s )
{
struct kprobe_trace_entry field __attribute__ ( ( unused ) ) ;
int ret , i ;
struct trace_probe * tp = ( struct trace_probe * ) call - > data ;
2009-10-08 02:28:07 +04:00
SHOW_FIELD ( unsigned long , ip , FIELD_STRING_IP ) ;
SHOW_FIELD ( int , nargs , FIELD_STRING_NARGS ) ;
2009-08-14 00:35:11 +04:00
/* Show fields */
2009-09-11 03:53:38 +04:00
for ( i = 0 ; i < tp - > nr_args ; i + + )
SHOW_FIELD ( unsigned long , args [ i ] , tp - > args [ i ] . name ) ;
2009-08-14 00:35:11 +04:00
trace_seq_puts ( s , " \n " ) ;
2009-10-08 02:28:07 +04:00
return __probe_event_show_format ( s , tp , " (%lx) " ,
" REC-> " FIELD_STRING_IP ) ;
2009-08-14 00:35:11 +04:00
}
static int kretprobe_event_show_format ( struct ftrace_event_call * call ,
struct trace_seq * s )
{
struct kretprobe_trace_entry field __attribute__ ( ( unused ) ) ;
int ret , i ;
struct trace_probe * tp = ( struct trace_probe * ) call - > data ;
2009-10-08 02:28:07 +04:00
SHOW_FIELD ( unsigned long , func , FIELD_STRING_FUNC ) ;
SHOW_FIELD ( unsigned long , ret_ip , FIELD_STRING_RETIP ) ;
SHOW_FIELD ( int , nargs , FIELD_STRING_NARGS ) ;
2009-08-14 00:35:11 +04:00
/* Show fields */
2009-09-11 03:53:38 +04:00
for ( i = 0 ; i < tp - > nr_args ; i + + )
SHOW_FIELD ( unsigned long , args [ i ] , tp - > args [ i ] . name ) ;
2009-08-14 00:35:11 +04:00
trace_seq_puts ( s , " \n " ) ;
2009-09-11 03:53:45 +04:00
return __probe_event_show_format ( s , tp , " (%lx <- %lx) " ,
2009-10-08 02:28:07 +04:00
" REC-> " FIELD_STRING_FUNC
" , REC-> " FIELD_STRING_RETIP ) ;
2009-08-14 00:35:11 +04:00
}
2009-09-11 03:53:30 +04:00
# ifdef CONFIG_EVENT_PROFILE
/* Kprobe profile handler */
static __kprobes int kprobe_profile_func ( struct kprobe * kp ,
struct pt_regs * regs )
{
struct trace_probe * tp = container_of ( kp , struct trace_probe , rp . kp ) ;
struct ftrace_event_call * call = & tp - > call ;
struct kprobe_trace_entry * entry ;
2009-09-25 22:20:12 +04:00
struct trace_entry * ent ;
int size , __size , i , pc , __cpu ;
2009-09-11 03:53:30 +04:00
unsigned long irq_flags ;
2009-11-22 07:26:55 +03:00
char * trace_buf ;
2009-09-25 22:20:12 +04:00
char * raw_data ;
2009-11-23 13:37:29 +03:00
int rctx ;
2009-09-11 03:53:30 +04:00
pc = preempt_count ( ) ;
2009-09-15 00:49:28 +04:00
__size = SIZEOF_KPROBE_TRACE_ENTRY ( tp - > nr_args ) ;
size = ALIGN ( __size + sizeof ( u32 ) , sizeof ( u64 ) ) ;
size - = sizeof ( u32 ) ;
2009-09-25 22:20:12 +04:00
if ( WARN_ONCE ( size > FTRACE_MAX_PROFILE_SIZE ,
" profile buffer not large enough " ) )
return 0 ;
2009-09-11 03:53:30 +04:00
2009-09-25 22:20:12 +04:00
/*
* Protect the non nmi buffer
* This also protects the rcu read side
*/
local_irq_save ( irq_flags ) ;
2009-11-22 07:26:55 +03:00
2009-11-23 13:37:29 +03:00
rctx = perf_swevent_get_recursion_context ( ) ;
if ( rctx < 0 )
2009-11-22 07:26:55 +03:00
goto end_recursion ;
2009-09-25 22:20:12 +04:00
__cpu = smp_processor_id ( ) ;
if ( in_nmi ( ) )
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
trace_buf = rcu_dereference ( perf_trace_buf_nmi ) ;
2009-09-25 22:20:12 +04:00
else
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
trace_buf = rcu_dereference ( perf_trace_buf ) ;
2009-09-25 22:20:12 +04:00
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
if ( ! trace_buf )
2009-09-25 22:20:12 +04:00
goto end ;
2009-11-22 07:26:55 +03:00
raw_data = per_cpu_ptr ( trace_buf , __cpu ) ;
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
2009-09-25 22:20:12 +04:00
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
* ( u64 * ) ( & raw_data [ size - sizeof ( u64 ) ] ) = 0ULL ;
entry = ( struct kprobe_trace_entry * ) raw_data ;
ent = & entry - > ent ;
tracing_generic_entry_update ( ent , irq_flags , pc ) ;
ent - > type = call - > id ;
entry - > nargs = tp - > nr_args ;
entry - > ip = ( unsigned long ) kp - > addr ;
for ( i = 0 ; i < tp - > nr_args ; i + + )
entry - > args [ i ] = call_fetch ( & tp - > args [ i ] . fetch , regs ) ;
perf_tp_event ( call - > id , entry - > ip , 1 , entry , size ) ;
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
2009-09-25 22:20:12 +04:00
end :
2009-11-23 13:37:29 +03:00
perf_swevent_put_recursion_context ( rctx ) ;
2009-11-22 07:26:55 +03:00
end_recursion :
2009-09-25 22:20:12 +04:00
local_irq_restore ( irq_flags ) ;
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
2009-09-11 03:53:30 +04:00
return 0 ;
}
/* Kretprobe profile handler */
static __kprobes int kretprobe_profile_func ( struct kretprobe_instance * ri ,
struct pt_regs * regs )
{
struct trace_probe * tp = container_of ( ri - > rp , struct trace_probe , rp ) ;
struct ftrace_event_call * call = & tp - > call ;
struct kretprobe_trace_entry * entry ;
2009-09-25 22:20:12 +04:00
struct trace_entry * ent ;
int size , __size , i , pc , __cpu ;
2009-09-11 03:53:30 +04:00
unsigned long irq_flags ;
2009-11-22 07:26:55 +03:00
char * trace_buf ;
2009-09-25 22:20:12 +04:00
char * raw_data ;
2009-11-23 13:37:29 +03:00
int rctx ;
2009-09-11 03:53:30 +04:00
pc = preempt_count ( ) ;
2009-09-15 00:49:28 +04:00
__size = SIZEOF_KRETPROBE_TRACE_ENTRY ( tp - > nr_args ) ;
size = ALIGN ( __size + sizeof ( u32 ) , sizeof ( u64 ) ) ;
size - = sizeof ( u32 ) ;
2009-09-25 22:20:12 +04:00
if ( WARN_ONCE ( size > FTRACE_MAX_PROFILE_SIZE ,
" profile buffer not large enough " ) )
return 0 ;
/*
* Protect the non nmi buffer
* This also protects the rcu read side
*/
local_irq_save ( irq_flags ) ;
2009-11-22 07:26:55 +03:00
2009-11-23 13:37:29 +03:00
rctx = perf_swevent_get_recursion_context ( ) ;
if ( rctx < 0 )
2009-11-22 07:26:55 +03:00
goto end_recursion ;
2009-09-25 22:20:12 +04:00
__cpu = smp_processor_id ( ) ;
if ( in_nmi ( ) )
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
trace_buf = rcu_dereference ( perf_trace_buf_nmi ) ;
2009-09-25 22:20:12 +04:00
else
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
trace_buf = rcu_dereference ( perf_trace_buf ) ;
2009-09-25 22:20:12 +04:00
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
if ( ! trace_buf )
2009-09-25 22:20:12 +04:00
goto end ;
2009-11-22 07:26:55 +03:00
raw_data = per_cpu_ptr ( trace_buf , __cpu ) ;
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
2009-09-25 22:20:12 +04:00
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
* ( u64 * ) ( & raw_data [ size - sizeof ( u64 ) ] ) = 0ULL ;
entry = ( struct kretprobe_trace_entry * ) raw_data ;
ent = & entry - > ent ;
2009-09-11 03:53:30 +04:00
2009-09-25 22:20:12 +04:00
tracing_generic_entry_update ( ent , irq_flags , pc ) ;
ent - > type = call - > id ;
entry - > nargs = tp - > nr_args ;
entry - > func = ( unsigned long ) tp - > rp . kp . addr ;
entry - > ret_ip = ( unsigned long ) ri - > ret_addr ;
for ( i = 0 ; i < tp - > nr_args ; i + + )
entry - > args [ i ] = call_fetch ( & tp - > args [ i ] . fetch , regs ) ;
perf_tp_event ( call - > id , entry - > ret_ip , 1 , entry , size ) ;
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
2009-09-25 22:20:12 +04:00
end :
2009-11-23 13:37:29 +03:00
perf_swevent_put_recursion_context ( rctx ) ;
2009-11-22 07:26:55 +03:00
end_recursion :
2009-09-25 22:20:12 +04:00
local_irq_restore ( irq_flags ) ;
tracing, perf_events: Protect the buffer from recursion in perf
While tracing using events with perf, if one enables the
lockdep:lock_acquire event, it will infect every other perf
trace events.
Basically, you can enable whatever set of trace events through
perf but if this event is part of the set, the only result we
can get is a long list of lock_acquire events of rcu read lock,
and only that.
This is because of a recursion inside perf.
1) When a trace event is triggered, it will fill a per cpu
buffer and submit it to perf.
2) Perf will commit this event but will also protect some data
using rcu_read_lock
3) A recursion appears: rcu_read_lock triggers a lock_acquire
event that will fill the per cpu event and then submit the
buffer to perf.
4) Perf detects a recursion and ignores it
5) Perf continues its work on the previous event, but its buffer
has been overwritten by the lock_acquire event, it has then
been turned into a lock_acquire event of rcu read lock
Such scenario also happens with lock_release with
rcu_read_unlock().
We could turn the rcu_read_lock() into __rcu_read_lock() to drop
the lock debugging from perf fast path, but that would make us
lose the rcu debugging and that doesn't prevent from other
possible kind of recursion from perf in the future.
This patch adds a recursion protection based on a counter on the
perf trace per cpu buffers to solve the problem.
-v2: Fixed lost whitespace, added reviewed-by tag
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-06 06:13:05 +03:00
2009-09-11 03:53:30 +04:00
return 0 ;
}
static int probe_profile_enable ( struct ftrace_event_call * call )
{
struct trace_probe * tp = ( struct trace_probe * ) call - > data ;
2009-09-15 00:49:20 +04:00
tp - > flags | = TP_FLAG_PROFILE ;
2009-09-24 01:08:43 +04:00
2009-09-15 00:49:20 +04:00
if ( probe_is_return ( tp ) )
2009-09-11 03:53:30 +04:00
return enable_kretprobe ( & tp - > rp ) ;
2009-09-15 00:49:20 +04:00
else
2009-09-11 03:53:30 +04:00
return enable_kprobe ( & tp - > rp . kp ) ;
}
static void probe_profile_disable ( struct ftrace_event_call * call )
{
2009-09-15 00:49:20 +04:00
struct trace_probe * tp = ( struct trace_probe * ) call - > data ;
2009-09-24 01:08:43 +04:00
tp - > flags & = ~ TP_FLAG_PROFILE ;
2009-09-15 00:49:20 +04:00
2009-09-24 01:08:43 +04:00
if ( ! ( tp - > flags & TP_FLAG_TRACE ) ) {
2009-09-15 00:49:20 +04:00
if ( probe_is_return ( tp ) )
disable_kretprobe ( & tp - > rp ) ;
else
disable_kprobe ( & tp - > rp . kp ) ;
}
2009-09-11 03:53:30 +04:00
}
2009-09-15 00:49:20 +04:00
# endif /* CONFIG_EVENT_PROFILE */
static __kprobes
int kprobe_dispatcher ( struct kprobe * kp , struct pt_regs * regs )
{
struct trace_probe * tp = container_of ( kp , struct trace_probe , rp . kp ) ;
2009-09-11 03:53:30 +04:00
2009-09-15 00:49:20 +04:00
if ( tp - > flags & TP_FLAG_TRACE )
kprobe_trace_func ( kp , regs ) ;
# ifdef CONFIG_EVENT_PROFILE
if ( tp - > flags & TP_FLAG_PROFILE )
kprobe_profile_func ( kp , regs ) ;
2009-09-11 03:53:30 +04:00
# endif /* CONFIG_EVENT_PROFILE */
2009-09-15 00:49:20 +04:00
return 0 ; /* We don't tweek kernel, so just return 0 */
}
static __kprobes
int kretprobe_dispatcher ( struct kretprobe_instance * ri , struct pt_regs * regs )
{
struct trace_probe * tp = container_of ( ri - > rp , struct trace_probe , rp ) ;
if ( tp - > flags & TP_FLAG_TRACE )
kretprobe_trace_func ( ri , regs ) ;
# ifdef CONFIG_EVENT_PROFILE
if ( tp - > flags & TP_FLAG_PROFILE )
kretprobe_profile_func ( ri , regs ) ;
# endif /* CONFIG_EVENT_PROFILE */
return 0 ; /* We don't tweek kernel, so just return 0 */
}
2009-09-11 03:53:30 +04:00
2009-08-14 00:35:11 +04:00
static int register_probe_event ( struct trace_probe * tp )
{
struct ftrace_event_call * call = & tp - > call ;
int ret ;
/* Initialize ftrace_event_call */
if ( probe_is_return ( tp ) ) {
2009-08-14 00:35:34 +04:00
tp - > event . trace = print_kretprobe_event ;
2009-08-14 00:35:11 +04:00
call - > raw_init = probe_event_raw_init ;
call - > show_format = kretprobe_event_show_format ;
call - > define_fields = kretprobe_event_define_fields ;
} else {
2009-08-14 00:35:34 +04:00
tp - > event . trace = print_kprobe_event ;
2009-08-14 00:35:11 +04:00
call - > raw_init = probe_event_raw_init ;
call - > show_format = kprobe_event_show_format ;
call - > define_fields = kprobe_event_define_fields ;
}
2009-08-14 00:35:34 +04:00
call - > event = & tp - > event ;
call - > id = register_ftrace_event ( & tp - > event ) ;
if ( ! call - > id )
return - ENODEV ;
2009-09-15 00:49:37 +04:00
call - > enabled = 0 ;
2009-08-14 00:35:11 +04:00
call - > regfunc = probe_event_enable ;
call - > unregfunc = probe_event_disable ;
2009-09-11 03:53:30 +04:00
# ifdef CONFIG_EVENT_PROFILE
atomic_set ( & call - > profile_count , - 1 ) ;
call - > profile_enable = probe_profile_enable ;
call - > profile_disable = probe_profile_disable ;
# endif
2009-08-14 00:35:11 +04:00
call - > data = tp ;
ret = trace_add_event_call ( call ) ;
2009-08-14 00:35:34 +04:00
if ( ret ) {
2009-08-14 00:35:11 +04:00
pr_info ( " Failed to register kprobe event: %s \n " , call - > name ) ;
2009-08-14 00:35:34 +04:00
unregister_ftrace_event ( & tp - > event ) ;
}
2009-08-14 00:35:11 +04:00
return ret ;
}
static void unregister_probe_event ( struct trace_probe * tp )
{
2009-08-14 00:35:34 +04:00
/* tp->event is unregistered in trace_remove_event_call() */
2009-08-14 00:35:11 +04:00
trace_remove_event_call ( & tp - > call ) ;
}
/* Make a debugfs interface for controling probe points */
static __init int init_kprobe_trace ( void )
{
struct dentry * d_tracer ;
struct dentry * entry ;
d_tracer = tracing_init_dentry ( ) ;
if ( ! d_tracer )
return 0 ;
entry = debugfs_create_file ( " kprobe_events " , 0644 , d_tracer ,
NULL , & kprobe_events_ops ) ;
2009-08-14 00:35:42 +04:00
/* Event list interface */
2009-08-14 00:35:11 +04:00
if ( ! entry )
pr_warning ( " Could not create debugfs "
" 'kprobe_events' entry \n " ) ;
2009-08-14 00:35:42 +04:00
/* Profile interface */
entry = debugfs_create_file ( " kprobe_profile " , 0444 , d_tracer ,
NULL , & kprobe_profile_ops ) ;
if ( ! entry )
pr_warning ( " Could not create debugfs "
" 'kprobe_profile' entry \n " ) ;
2009-08-14 00:35:11 +04:00
return 0 ;
}
fs_initcall ( init_kprobe_trace ) ;
# ifdef CONFIG_FTRACE_STARTUP_TEST
static int kprobe_trace_selftest_target ( int a1 , int a2 , int a3 ,
int a4 , int a5 , int a6 )
{
return a1 + a2 + a3 + a4 + a5 + a6 ;
}
static __init int kprobe_trace_self_tests_init ( void )
{
int ret ;
int ( * target ) ( int , int , int , int , int , int ) ;
target = kprobe_trace_selftest_target ;
pr_info ( " Testing kprobe tracing: " ) ;
ret = command_trace_probe ( " p:testprobe kprobe_trace_selftest_target "
2009-10-17 04:07:20 +04:00
" $arg1 $arg2 $arg3 $arg4 $stack $stack0 " ) ;
2009-08-14 00:35:11 +04:00
if ( WARN_ON_ONCE ( ret ) )
pr_warning ( " error enabling function entry \n " ) ;
ret = command_trace_probe ( " r:testprobe2 kprobe_trace_selftest_target "
2009-10-17 04:07:20 +04:00
" $retval " ) ;
2009-08-14 00:35:11 +04:00
if ( WARN_ON_ONCE ( ret ) )
pr_warning ( " error enabling function return \n " ) ;
ret = target ( 1 , 2 , 3 , 4 , 5 , 6 ) ;
cleanup_all_probes ( ) ;
pr_cont ( " OK \n " ) ;
return 0 ;
}
late_initcall ( kprobe_trace_self_tests_init ) ;
# endif