2009-10-26 19:23:18 -02:00
# include <linux/types.h>
2014-05-19 15:13:49 -04:00
# include <sys/mman.h>
2009-10-26 19:23:18 -02:00
# include "event.h"
# include "debug.h"
2014-03-17 16:59:21 -03:00
# include "hist.h"
2012-10-06 15:44:59 -03:00
# include "machine.h"
2009-12-15 20:04:41 -02:00
# include "sort.h"
2009-10-26 19:23:18 -02:00
# include "string.h"
2009-12-15 20:04:41 -02:00
# include "strlist.h"
2009-11-27 16:29:22 -02:00
# include "thread.h"
2011-02-11 11:45:54 -02:00
# include "thread_map.h"
2013-12-11 09:15:00 -03:00
# include "symbol/kallsyms.h"
2015-10-25 15:51:28 +01:00
# include "asm/bug.h"
# include "stat.h"
2009-10-26 19:23:18 -02:00
2011-01-29 14:01:45 -02:00
static const char * perf_event__names [ ] = {
2011-05-23 13:06:27 +02:00
[ 0 ] = " TOTAL " ,
[ PERF_RECORD_MMAP ] = " MMAP " ,
2013-08-21 12:10:25 +02:00
[ PERF_RECORD_MMAP2 ] = " MMAP2 " ,
2011-05-23 13:06:27 +02:00
[ PERF_RECORD_LOST ] = " LOST " ,
[ PERF_RECORD_COMM ] = " COMM " ,
[ PERF_RECORD_EXIT ] = " EXIT " ,
[ PERF_RECORD_THROTTLE ] = " THROTTLE " ,
[ PERF_RECORD_UNTHROTTLE ] = " UNTHROTTLE " ,
[ PERF_RECORD_FORK ] = " FORK " ,
[ PERF_RECORD_READ ] = " READ " ,
[ PERF_RECORD_SAMPLE ] = " SAMPLE " ,
2015-04-30 17:37:29 +03:00
[ PERF_RECORD_AUX ] = " AUX " ,
2015-04-30 17:37:30 +03:00
[ PERF_RECORD_ITRACE_START ] = " ITRACE_START " ,
2015-05-10 15:13:15 -04:00
[ PERF_RECORD_LOST_SAMPLES ] = " LOST_SAMPLES " ,
2015-07-21 12:44:03 +03:00
[ PERF_RECORD_SWITCH ] = " SWITCH " ,
[ PERF_RECORD_SWITCH_CPU_WIDE ] = " SWITCH_CPU_WIDE " ,
2011-05-23 13:06:27 +02:00
[ PERF_RECORD_HEADER_ATTR ] = " ATTR " ,
[ PERF_RECORD_HEADER_EVENT_TYPE ] = " EVENT_TYPE " ,
[ PERF_RECORD_HEADER_TRACING_DATA ] = " TRACING_DATA " ,
[ PERF_RECORD_HEADER_BUILD_ID ] = " BUILD_ID " ,
[ PERF_RECORD_FINISHED_ROUND ] = " FINISHED_ROUND " ,
2014-10-27 15:49:22 +02:00
[ PERF_RECORD_ID_INDEX ] = " ID_INDEX " ,
2015-04-09 18:53:43 +03:00
[ PERF_RECORD_AUXTRACE_INFO ] = " AUXTRACE_INFO " ,
[ PERF_RECORD_AUXTRACE ] = " AUXTRACE " ,
2015-04-09 18:53:47 +03:00
[ PERF_RECORD_AUXTRACE_ERROR ] = " AUXTRACE_ERROR " ,
2015-10-25 15:51:19 +01:00
[ PERF_RECORD_THREAD_MAP ] = " THREAD_MAP " ,
2015-10-25 15:51:23 +01:00
[ PERF_RECORD_CPU_MAP ] = " CPU_MAP " ,
2015-10-25 15:51:27 +01:00
[ PERF_RECORD_STAT_CONFIG ] = " STAT_CONFIG " ,
2015-10-25 15:51:30 +01:00
[ PERF_RECORD_STAT ] = " STAT " ,
2015-10-25 15:51:33 +01:00
[ PERF_RECORD_STAT_ROUND ] = " STAT_ROUND " ,
2015-10-25 15:51:36 +01:00
[ PERF_RECORD_EVENT_UPDATE ] = " EVENT_UPDATE " ,
2016-03-08 10:38:44 +02:00
[ PERF_RECORD_TIME_CONV ] = " TIME_CONV " ,
2010-05-14 10:36:42 -03:00
} ;
2011-01-29 14:01:45 -02:00
const char * perf_event__name ( unsigned int id )
2010-12-07 12:48:42 +00:00
{
2011-01-29 14:01:45 -02:00
if ( id > = ARRAY_SIZE ( perf_event__names ) )
2010-12-07 12:48:42 +00:00
return " INVALID " ;
2011-01-29 14:01:45 -02:00
if ( ! perf_event__names [ id ] )
2010-12-07 12:48:42 +00:00
return " UNKNOWN " ;
2011-01-29 14:01:45 -02:00
return perf_event__names [ id ] ;
2010-12-07 12:48:42 +00:00
}
perf tools: Add missing initialization of perf_sample.cpumode in synthesized samples
In 473398a21d28 ("perf tools: Add cpumode to struct perf_sample"), I
missed some places where perf_sample fields are directly initialized in
addition to what is done in perf_evsel__parse_sample(), namely when
synthesizing PERF_RECORD_{MMAP*,COMM,FORK,EXIT} for pre-existing threads
and also in intel_pt and intel_bts when synthesizing events from
processor trace, the jitdump code also was affected, fix it.
The problem was noticed with running:
# perf record -e intel_pt//u true
# perf script
Where the samples wouldn't get resolved because perf_sample.cpumode
would be left as zero, i.e. PERF_RECORD_MISC_CPUMODE_UNKNOWN, not
resolving as kernel, hypervisor or user cpu modes.
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 473398a21d28 ("perf tools: Add cpumode to struct perf_sample")
Link: http://lkml.kernel.org/n/tip-n5sdauxgk24d5nun8kuuu2mh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-03-29 18:46:04 -03:00
static int perf_tool__process_synth_event ( struct perf_tool * tool ,
union perf_event * event ,
struct machine * machine ,
perf_event__handler_t process )
{
struct perf_sample synth_sample = {
perf session: Parse sample earlier
At perf_session__process_event, so that we reduce the number of lines in eache
tool sample processing routine that now receives a sample_data pointer already
parsed.
This will also be useful in the next patch, where we'll allow sample the
identity fields in MMAP, FORK, EXIT, etc, when it will be possible to see (cpu,
timestamp) just after before every event.
Also validate callchains in perf_session__process_event, i.e. as early as
possible, and keep a counter of the number of events discarded due to invalid
callchains, warning the user about it if it happens.
There is an assumption that was kept that all events have the same sample_type,
that will be dealt with in the future, when this preexisting limitation will be
removed.
Tested-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ian Munsie <imunsie@au1.ibm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ian Munsie <imunsie@au1.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <1291318772-30880-4-git-send-email-acme@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 14:10:21 -02:00
. pid = - 1 ,
. tid = - 1 ,
. time = - 1 ,
. stream_id = - 1 ,
. cpu = - 1 ,
. period = 1 ,
perf tools: Add missing initialization of perf_sample.cpumode in synthesized samples
In 473398a21d28 ("perf tools: Add cpumode to struct perf_sample"), I
missed some places where perf_sample fields are directly initialized in
addition to what is done in perf_evsel__parse_sample(), namely when
synthesizing PERF_RECORD_{MMAP*,COMM,FORK,EXIT} for pre-existing threads
and also in intel_pt and intel_bts when synthesizing events from
processor trace, the jitdump code also was affected, fix it.
The problem was noticed with running:
# perf record -e intel_pt//u true
# perf script
Where the samples wouldn't get resolved because perf_sample.cpumode
would be left as zero, i.e. PERF_RECORD_MISC_CPUMODE_UNKNOWN, not
resolving as kernel, hypervisor or user cpu modes.
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 473398a21d28 ("perf tools: Add cpumode to struct perf_sample")
Link: http://lkml.kernel.org/n/tip-n5sdauxgk24d5nun8kuuu2mh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-03-29 18:46:04 -03:00
. cpumode = event - > header . misc & PERF_RECORD_MISC_CPUMODE_MASK ,
} ;
return process ( tool , event , & synth_sample , machine ) ;
perf session: Parse sample earlier
At perf_session__process_event, so that we reduce the number of lines in eache
tool sample processing routine that now receives a sample_data pointer already
parsed.
This will also be useful in the next patch, where we'll allow sample the
identity fields in MMAP, FORK, EXIT, etc, when it will be possible to see (cpu,
timestamp) just after before every event.
Also validate callchains in perf_session__process_event, i.e. as early as
possible, and keep a counter of the number of events discarded due to invalid
callchains, warning the user about it if it happens.
There is an assumption that was kept that all events have the same sample_type,
that will be dealt with in the future, when this preexisting limitation will be
removed.
Tested-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ian Munsie <imunsie@au1.ibm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ian Munsie <imunsie@au1.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <1291318772-30880-4-git-send-email-acme@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2010-12-02 14:10:21 -02:00
} ;
2015-03-30 14:35:57 -06:00
/*
* Assumes that the first 4095 bytes of / proc / pid / stat contains
2015-03-30 14:35:58 -06:00
* the comm , tgid and ppid .
2015-03-30 14:35:57 -06:00
*/
2015-03-30 14:35:58 -06:00
static int perf_event__get_comm_ids ( pid_t pid , char * comm , size_t len ,
pid_t * tgid , pid_t * ppid )
2009-10-26 19:23:18 -02:00
{
char filename [ PATH_MAX ] ;
2015-03-30 14:35:57 -06:00
char bf [ 4096 ] ;
int fd ;
2015-10-06 11:00:17 +02:00
size_t size = 0 ;
ssize_t n ;
2015-03-30 14:35:58 -06:00
char * nl , * name , * tgids , * ppids ;
* tgid = - 1 ;
* ppid = - 1 ;
2009-10-26 19:23:18 -02:00
snprintf ( filename , sizeof ( filename ) , " /proc/%d/status " , pid ) ;
2015-03-30 14:35:57 -06:00
fd = open ( filename , O_RDONLY ) ;
if ( fd < 0 ) {
2009-10-26 19:23:18 -02:00
pr_debug ( " couldn't open %s \n " , filename ) ;
2015-03-30 14:35:58 -06:00
return - 1 ;
2009-10-26 19:23:18 -02:00
}
2015-03-30 14:35:57 -06:00
n = read ( fd , bf , sizeof ( bf ) - 1 ) ;
close ( fd ) ;
if ( n < = 0 ) {
2015-03-30 14:35:58 -06:00
pr_warning ( " Couldn't get COMM, tigd and ppid for pid %d \n " ,
2015-03-30 14:35:57 -06:00
pid ) ;
return - 1 ;
2009-10-26 19:23:18 -02:00
}
2015-03-30 14:35:57 -06:00
bf [ n ] = ' \0 ' ;
2009-10-26 19:23:18 -02:00
2015-03-30 14:35:57 -06:00
name = strstr ( bf , " Name: " ) ;
tgids = strstr ( bf , " Tgid: " ) ;
2015-03-30 14:35:58 -06:00
ppids = strstr ( bf , " PPid: " ) ;
2015-03-30 14:35:57 -06:00
if ( name ) {
name + = 5 ; /* strlen("Name:") */
while ( * name & & isspace ( * name ) )
+ + name ;
nl = strchr ( name , ' \n ' ) ;
if ( nl )
* nl = ' \0 ' ;
size = strlen ( name ) ;
if ( size > = len )
size = len - 1 ;
memcpy ( comm , name , size ) ;
comm [ size ] = ' \0 ' ;
} else {
pr_debug ( " Name: string not found for pid %d \n " , pid ) ;
}
if ( tgids ) {
tgids + = 5 ; /* strlen("Tgid:") */
2015-03-30 14:35:58 -06:00
* tgid = atoi ( tgids ) ;
2015-03-30 14:35:57 -06:00
} else {
pr_debug ( " Tgid: string not found for pid %d \n " , pid ) ;
}
2011-12-22 11:30:02 -07:00
2015-03-30 14:35:58 -06:00
if ( ppids ) {
ppids + = 5 ; /* strlen("PPid:") */
* ppid = atoi ( ppids ) ;
} else {
pr_debug ( " PPid: string not found for pid %d \n " , pid ) ;
}
return 0 ;
2011-12-22 11:30:02 -07:00
}
2015-03-30 14:35:58 -06:00
static int perf_event__prepare_comm ( union perf_event * event , pid_t pid ,
struct machine * machine ,
pid_t * tgid , pid_t * ppid )
2011-12-22 11:30:02 -07:00
{
size_t size ;
2015-03-30 14:35:58 -06:00
* ppid = - 1 ;
2011-12-22 11:30:02 -07:00
memset ( & event - > comm , 0 , sizeof ( event - > comm ) ) ;
2015-03-30 14:35:58 -06:00
if ( machine__is_host ( machine ) ) {
if ( perf_event__get_comm_ids ( pid , event - > comm . comm ,
sizeof ( event - > comm . comm ) ,
tgid , ppid ) ! = 0 ) {
return - 1 ;
}
} else {
* tgid = machine - > pid ;
}
2013-12-20 15:52:59 -05:00
2015-03-30 14:35:58 -06:00
if ( * tgid < 0 )
return - 1 ;
2011-12-22 11:30:02 -07:00
2015-03-30 14:35:58 -06:00
event - > comm . pid = * tgid ;
2010-12-02 10:25:28 -02:00
event - > comm . header . type = PERF_RECORD_COMM ;
2011-12-22 11:30:02 -07:00
size = strlen ( event - > comm . comm ) + 1 ;
2012-09-11 01:15:01 +03:00
size = PERF_ALIGN ( size , sizeof ( u64 ) ) ;
2011-11-28 07:56:39 -02:00
memset ( event - > comm . comm + size , 0 , machine - > id_hdr_size ) ;
2010-12-02 10:25:28 -02:00
event - > comm . header . size = ( sizeof ( event - > comm ) -
( sizeof ( event - > comm . comm ) - size ) +
2011-11-28 07:56:39 -02:00
machine - > id_hdr_size ) ;
2014-02-26 10:45:26 -05:00
event - > comm . tid = pid ;
2015-03-30 14:35:58 -06:00
return 0 ;
perf tools: Fix FORK after COMM when synthesizing records for pre-existing threads
In this commit:
commit 363b785f3805a2632eb09a8b430842461c21a640
Author: Don Zickus <dzickus@redhat.com>
Date: Fri Mar 14 10:43:44 2014 -0400
perf tools: Speed up thread map generation
We ended up emitting PERF_RECORD_FORK events after their corresponding
PERF_RECORD_COMM, so the code below will remove the "existing thread"
and then recreates it, unnecessarily:
[root@ssdandy ~]# perf probe -x ~/bin/perf -L machine__process_fork_event
<machine__process_fork_event@/home/acme/git/linux/tools/perf/util/machine.c:0>
0 int machine__process_fork_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample)
2 {
3 struct thread *thread = machine__find_thread(machine,
event->fork.pid,
event->fork.tid);
6 struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
/* if a thread currently exists for the thread id remove it */
if (thread != NULL)
12 machine__remove_thread(machine, thread);
14 thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);
16 if (dump_trace)
17 perf_event__fprintf_task(event, stdout);
19 if (thread == NULL || parent == NULL ||
20 thread__fork(thread, parent, sample->time) < 0) {
21 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
22 return -1;
}
25 return 0;
26 }
[root@ssdandy ~]# perf probe -x ~/bin/perf fork_after_comm=machine__process_fork_event:12
Added new event:
probe_perf:fork_after_comm (on machine__process_fork_event:12 in /home/acme/bin/perf)
You can now use it in all perf tools, such as:
perf record -e probe_perf:fork_after_comm -aR sleep 1
[root@ssdandy ~]#
[root@ssdandy ~]# perf record -g -e probe_perf:* trace -o /tmp/bla
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.021 MB perf.data (30 samples) ]
Terminated
[root@ssdandy ~]#
[root@ssdandy ~]# perf report --no-children --show-total-period --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
# Samples: 30 of event 'probe_perf:fork_after_comm'
# Event count (approx.): 30
#
# Overhead Period Command Shared Object Symbol
# ........ ............ ....... ............. ...............................
#
100.00% 30 trace trace [.] machine__process_fork_event
|
---machine__process_fork_event
__event__synthesize_thread.part.2
perf_event__synthesize_threads
cmd_trace
main
__libc_start_main
[root@ssdandy ~]#
And Looking at 'perf report -D' output we see it:
0 0 0x8698 [0x30]: PERF_RECORD_COMM: auditd:703/707
0 0 0x86c8 [0x38]: PERF_RECORD_FORK(703:707):(703:703)
Fix it by more closely mimicking how the kernel generates those records
when a new fork happens, i.e. first a PERF_RECORD_FORK, then a
PERF_RECORD_COMM.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-h0emvymi2t3mw8dlqd6d6z73@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-02-27 19:52:10 -03:00
}
2015-09-22 09:24:55 +09:00
pid_t perf_event__synthesize_comm ( struct perf_tool * tool ,
perf tools: Fix FORK after COMM when synthesizing records for pre-existing threads
In this commit:
commit 363b785f3805a2632eb09a8b430842461c21a640
Author: Don Zickus <dzickus@redhat.com>
Date: Fri Mar 14 10:43:44 2014 -0400
perf tools: Speed up thread map generation
We ended up emitting PERF_RECORD_FORK events after their corresponding
PERF_RECORD_COMM, so the code below will remove the "existing thread"
and then recreates it, unnecessarily:
[root@ssdandy ~]# perf probe -x ~/bin/perf -L machine__process_fork_event
<machine__process_fork_event@/home/acme/git/linux/tools/perf/util/machine.c:0>
0 int machine__process_fork_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample)
2 {
3 struct thread *thread = machine__find_thread(machine,
event->fork.pid,
event->fork.tid);
6 struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
/* if a thread currently exists for the thread id remove it */
if (thread != NULL)
12 machine__remove_thread(machine, thread);
14 thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);
16 if (dump_trace)
17 perf_event__fprintf_task(event, stdout);
19 if (thread == NULL || parent == NULL ||
20 thread__fork(thread, parent, sample->time) < 0) {
21 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
22 return -1;
}
25 return 0;
26 }
[root@ssdandy ~]# perf probe -x ~/bin/perf fork_after_comm=machine__process_fork_event:12
Added new event:
probe_perf:fork_after_comm (on machine__process_fork_event:12 in /home/acme/bin/perf)
You can now use it in all perf tools, such as:
perf record -e probe_perf:fork_after_comm -aR sleep 1
[root@ssdandy ~]#
[root@ssdandy ~]# perf record -g -e probe_perf:* trace -o /tmp/bla
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.021 MB perf.data (30 samples) ]
Terminated
[root@ssdandy ~]#
[root@ssdandy ~]# perf report --no-children --show-total-period --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
# Samples: 30 of event 'probe_perf:fork_after_comm'
# Event count (approx.): 30
#
# Overhead Period Command Shared Object Symbol
# ........ ............ ....... ............. ...............................
#
100.00% 30 trace trace [.] machine__process_fork_event
|
---machine__process_fork_event
__event__synthesize_thread.part.2
perf_event__synthesize_threads
cmd_trace
main
__libc_start_main
[root@ssdandy ~]#
And Looking at 'perf report -D' output we see it:
0 0 0x8698 [0x30]: PERF_RECORD_COMM: auditd:703/707
0 0 0x86c8 [0x38]: PERF_RECORD_FORK(703:707):(703:703)
Fix it by more closely mimicking how the kernel generates those records
when a new fork happens, i.e. first a PERF_RECORD_FORK, then a
PERF_RECORD_COMM.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-h0emvymi2t3mw8dlqd6d6z73@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-02-27 19:52:10 -03:00
union perf_event * event , pid_t pid ,
perf_event__handler_t process ,
struct machine * machine )
{
2015-03-30 14:35:58 -06:00
pid_t tgid , ppid ;
perf tools: Fix FORK after COMM when synthesizing records for pre-existing threads
In this commit:
commit 363b785f3805a2632eb09a8b430842461c21a640
Author: Don Zickus <dzickus@redhat.com>
Date: Fri Mar 14 10:43:44 2014 -0400
perf tools: Speed up thread map generation
We ended up emitting PERF_RECORD_FORK events after their corresponding
PERF_RECORD_COMM, so the code below will remove the "existing thread"
and then recreates it, unnecessarily:
[root@ssdandy ~]# perf probe -x ~/bin/perf -L machine__process_fork_event
<machine__process_fork_event@/home/acme/git/linux/tools/perf/util/machine.c:0>
0 int machine__process_fork_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample)
2 {
3 struct thread *thread = machine__find_thread(machine,
event->fork.pid,
event->fork.tid);
6 struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
/* if a thread currently exists for the thread id remove it */
if (thread != NULL)
12 machine__remove_thread(machine, thread);
14 thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);
16 if (dump_trace)
17 perf_event__fprintf_task(event, stdout);
19 if (thread == NULL || parent == NULL ||
20 thread__fork(thread, parent, sample->time) < 0) {
21 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
22 return -1;
}
25 return 0;
26 }
[root@ssdandy ~]# perf probe -x ~/bin/perf fork_after_comm=machine__process_fork_event:12
Added new event:
probe_perf:fork_after_comm (on machine__process_fork_event:12 in /home/acme/bin/perf)
You can now use it in all perf tools, such as:
perf record -e probe_perf:fork_after_comm -aR sleep 1
[root@ssdandy ~]#
[root@ssdandy ~]# perf record -g -e probe_perf:* trace -o /tmp/bla
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.021 MB perf.data (30 samples) ]
Terminated
[root@ssdandy ~]#
[root@ssdandy ~]# perf report --no-children --show-total-period --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
# Samples: 30 of event 'probe_perf:fork_after_comm'
# Event count (approx.): 30
#
# Overhead Period Command Shared Object Symbol
# ........ ............ ....... ............. ...............................
#
100.00% 30 trace trace [.] machine__process_fork_event
|
---machine__process_fork_event
__event__synthesize_thread.part.2
perf_event__synthesize_threads
cmd_trace
main
__libc_start_main
[root@ssdandy ~]#
And Looking at 'perf report -D' output we see it:
0 0 0x8698 [0x30]: PERF_RECORD_COMM: auditd:703/707
0 0 0x86c8 [0x38]: PERF_RECORD_FORK(703:707):(703:703)
Fix it by more closely mimicking how the kernel generates those records
when a new fork happens, i.e. first a PERF_RECORD_FORK, then a
PERF_RECORD_COMM.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-h0emvymi2t3mw8dlqd6d6z73@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-02-27 19:52:10 -03:00
2015-03-30 14:35:58 -06:00
if ( perf_event__prepare_comm ( event , pid , machine , & tgid , & ppid ) ! = 0 )
return - 1 ;
2011-12-22 11:30:02 -07:00
perf tools: Add missing initialization of perf_sample.cpumode in synthesized samples
In 473398a21d28 ("perf tools: Add cpumode to struct perf_sample"), I
missed some places where perf_sample fields are directly initialized in
addition to what is done in perf_evsel__parse_sample(), namely when
synthesizing PERF_RECORD_{MMAP*,COMM,FORK,EXIT} for pre-existing threads
and also in intel_pt and intel_bts when synthesizing events from
processor trace, the jitdump code also was affected, fix it.
The problem was noticed with running:
# perf record -e intel_pt//u true
# perf script
Where the samples wouldn't get resolved because perf_sample.cpumode
would be left as zero, i.e. PERF_RECORD_MISC_CPUMODE_UNKNOWN, not
resolving as kernel, hypervisor or user cpu modes.
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 473398a21d28 ("perf tools: Add cpumode to struct perf_sample")
Link: http://lkml.kernel.org/n/tip-n5sdauxgk24d5nun8kuuu2mh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-03-29 18:46:04 -03:00
if ( perf_tool__process_synth_event ( tool , event , machine , process ) ! = 0 )
2014-02-26 10:45:26 -05:00
return - 1 ;
2009-10-26 19:23:18 -02:00
2010-12-02 10:25:28 -02:00
return tgid ;
2009-10-26 19:23:18 -02:00
}
2014-03-14 10:43:44 -04:00
static int perf_event__synthesize_fork ( struct perf_tool * tool ,
2015-03-30 14:35:58 -06:00
union perf_event * event ,
pid_t pid , pid_t tgid , pid_t ppid ,
perf_event__handler_t process ,
2014-03-14 10:43:44 -04:00
struct machine * machine )
{
memset ( & event - > fork , 0 , sizeof ( event - > fork ) + machine - > id_hdr_size ) ;
2015-04-09 12:48:27 -04:00
/*
* for main thread set parent to ppid from status file . For other
* threads set parent pid to main thread . ie . , assume main thread
* spawns all threads in a process
*/
if ( tgid = = pid ) {
event - > fork . ppid = ppid ;
event - > fork . ptid = ppid ;
} else {
event - > fork . ppid = tgid ;
event - > fork . ptid = tgid ;
}
2014-03-14 10:43:44 -04:00
event - > fork . pid = tgid ;
event - > fork . tid = pid ;
event - > fork . header . type = PERF_RECORD_FORK ;
event - > fork . header . size = ( sizeof ( event - > fork ) + machine - > id_hdr_size ) ;
perf tools: Add missing initialization of perf_sample.cpumode in synthesized samples
In 473398a21d28 ("perf tools: Add cpumode to struct perf_sample"), I
missed some places where perf_sample fields are directly initialized in
addition to what is done in perf_evsel__parse_sample(), namely when
synthesizing PERF_RECORD_{MMAP*,COMM,FORK,EXIT} for pre-existing threads
and also in intel_pt and intel_bts when synthesizing events from
processor trace, the jitdump code also was affected, fix it.
The problem was noticed with running:
# perf record -e intel_pt//u true
# perf script
Where the samples wouldn't get resolved because perf_sample.cpumode
would be left as zero, i.e. PERF_RECORD_MISC_CPUMODE_UNKNOWN, not
resolving as kernel, hypervisor or user cpu modes.
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 473398a21d28 ("perf tools: Add cpumode to struct perf_sample")
Link: http://lkml.kernel.org/n/tip-n5sdauxgk24d5nun8kuuu2mh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-03-29 18:46:04 -03:00
if ( perf_tool__process_synth_event ( tool , event , machine , process ) ! = 0 )
2014-03-14 10:43:44 -04:00
return - 1 ;
return 0 ;
}
2014-01-07 13:47:20 +01:00
int perf_event__synthesize_mmap_events ( struct perf_tool * tool ,
union perf_event * event ,
pid_t pid , pid_t tgid ,
perf_event__handler_t process ,
struct machine * machine ,
2015-06-17 09:51:11 -04:00
bool mmap_data ,
unsigned int proc_map_timeout )
2009-10-26 19:23:18 -02:00
{
char filename [ PATH_MAX ] ;
FILE * fp ;
2015-06-17 09:51:10 -04:00
unsigned long long t ;
bool truncation = false ;
2015-06-17 09:51:11 -04:00
unsigned long long timeout = proc_map_timeout * 1000000ULL ;
2012-08-26 12:24:42 -06:00
int rc = 0 ;
2009-10-26 19:23:18 -02:00
2013-12-20 15:53:00 -05:00
if ( machine__is_default_guest ( machine ) )
return 0 ;
2013-12-20 15:52:57 -05:00
snprintf ( filename , sizeof ( filename ) , " %s/proc/%d/maps " ,
machine - > root_dir , pid ) ;
2009-10-26 19:23:18 -02:00
fp = fopen ( filename , " r " ) ;
if ( fp = = NULL ) {
/*
* We raced with a task exiting - just return :
*/
pr_debug ( " couldn't open %s \n " , filename ) ;
return - 1 ;
}
2014-05-30 10:49:42 -04:00
event - > header . type = PERF_RECORD_MMAP2 ;
2015-06-17 09:51:10 -04:00
t = rdclock ( ) ;
2010-12-02 10:25:28 -02:00
2009-10-26 19:23:18 -02:00
while ( 1 ) {
2012-11-11 23:20:50 +09:00
char bf [ BUFSIZ ] ;
char prot [ 5 ] ;
char execname [ PATH_MAX ] ;
char anonstr [ ] = " //anon " ;
2014-05-30 10:49:42 -04:00
unsigned int ino ;
2009-10-26 19:23:18 -02:00
size_t size ;
2013-08-21 12:10:25 +02:00
ssize_t n ;
2012-11-11 23:20:50 +09:00
2009-10-26 19:23:18 -02:00
if ( fgets ( bf , sizeof ( bf ) , fp ) = = NULL )
break ;
2015-06-17 09:51:11 -04:00
if ( ( rdclock ( ) - t ) > timeout ) {
pr_warning ( " Reading %s time out. "
" You may want to increase "
" the time limit by --proc-map-timeout \n " ,
filename ) ;
2015-06-17 09:51:10 -04:00
truncation = true ;
goto out ;
}
2012-11-11 23:20:50 +09:00
/* ensure null termination since stack will be reused. */
strcpy ( execname , " " ) ;
2009-10-26 19:23:18 -02:00
/* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
2016-01-19 20:03:03 +01:00
n = sscanf ( bf , " % " PRIx64 " -% " PRIx64 " %s % " PRIx64 " %x:%x %u %[^ \n ] \n " ,
2014-05-30 10:49:42 -04:00
& event - > mmap2 . start , & event - > mmap2 . len , prot ,
& event - > mmap2 . pgoff , & event - > mmap2 . maj ,
& event - > mmap2 . min ,
& ino , execname ) ;
2013-11-13 15:32:06 -03:00
/*
* Anon maps don ' t have the execname .
*/
2014-05-30 10:49:42 -04:00
if ( n < 7 )
2013-08-21 12:10:25 +02:00
continue ;
2014-05-30 10:49:42 -04:00
event - > mmap2 . ino = ( u64 ) ino ;
2013-11-11 09:44:09 -03:00
/*
* Just like the kernel , see __perf_event_mmap in kernel / perf_event . c
*/
2013-12-20 15:52:58 -05:00
if ( machine__is_host ( machine ) )
event - > header . misc = PERF_RECORD_MISC_USER ;
else
event - > header . misc = PERF_RECORD_MISC_GUEST_USER ;
2012-11-11 23:20:50 +09:00
2014-05-19 15:13:49 -04:00
/* map protection and flags bits */
event - > mmap2 . prot = 0 ;
event - > mmap2 . flags = 0 ;
if ( prot [ 0 ] = = ' r ' )
event - > mmap2 . prot | = PROT_READ ;
if ( prot [ 1 ] = = ' w ' )
event - > mmap2 . prot | = PROT_WRITE ;
if ( prot [ 2 ] = = ' x ' )
event - > mmap2 . prot | = PROT_EXEC ;
if ( prot [ 3 ] = = ' s ' )
event - > mmap2 . flags | = MAP_SHARED ;
else
event - > mmap2 . flags | = MAP_PRIVATE ;
2013-11-11 09:44:09 -03:00
if ( prot [ 2 ] ! = ' x ' ) {
if ( ! mmap_data | | prot [ 0 ] ! = ' r ' )
continue ;
event - > header . misc | = PERF_RECORD_MISC_MMAP_DATA ;
}
2012-11-11 23:20:50 +09:00
2015-06-17 09:51:10 -04:00
out :
if ( truncation )
event - > header . misc | = PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT ;
2012-11-11 23:20:50 +09:00
if ( ! strcmp ( execname , " " ) )
strcpy ( execname , anonstr ) ;
size = strlen ( execname ) + 1 ;
2014-05-30 10:49:42 -04:00
memcpy ( event - > mmap2 . filename , execname , size ) ;
2012-11-11 23:20:50 +09:00
size = PERF_ALIGN ( size , sizeof ( u64 ) ) ;
2014-05-30 10:49:42 -04:00
event - > mmap2 . len - = event - > mmap . start ;
event - > mmap2 . header . size = ( sizeof ( event - > mmap2 ) -
( sizeof ( event - > mmap2 . filename ) - size ) ) ;
memset ( event - > mmap2 . filename + size , 0 , machine - > id_hdr_size ) ;
event - > mmap2 . header . size + = machine - > id_hdr_size ;
event - > mmap2 . pid = tgid ;
event - > mmap2 . tid = pid ;
2012-11-11 23:20:50 +09:00
perf tools: Add missing initialization of perf_sample.cpumode in synthesized samples
In 473398a21d28 ("perf tools: Add cpumode to struct perf_sample"), I
missed some places where perf_sample fields are directly initialized in
addition to what is done in perf_evsel__parse_sample(), namely when
synthesizing PERF_RECORD_{MMAP*,COMM,FORK,EXIT} for pre-existing threads
and also in intel_pt and intel_bts when synthesizing events from
processor trace, the jitdump code also was affected, fix it.
The problem was noticed with running:
# perf record -e intel_pt//u true
# perf script
Where the samples wouldn't get resolved because perf_sample.cpumode
would be left as zero, i.e. PERF_RECORD_MISC_CPUMODE_UNKNOWN, not
resolving as kernel, hypervisor or user cpu modes.
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 473398a21d28 ("perf tools: Add cpumode to struct perf_sample")
Link: http://lkml.kernel.org/n/tip-n5sdauxgk24d5nun8kuuu2mh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-03-29 18:46:04 -03:00
if ( perf_tool__process_synth_event ( tool , event , machine , process ) ! = 0 ) {
2012-11-11 23:20:50 +09:00
rc = - 1 ;
break ;
2009-10-26 19:23:18 -02:00
}
2015-06-17 09:51:10 -04:00
if ( truncation )
break ;
2009-10-26 19:23:18 -02:00
}
fclose ( fp ) ;
2012-08-26 12:24:42 -06:00
return rc ;
2009-10-26 19:23:18 -02:00
}
2011-11-28 08:30:20 -02:00
int perf_event__synthesize_modules ( struct perf_tool * tool ,
2011-11-25 08:19:45 -02:00
perf_event__handler_t process ,
2011-01-29 14:01:45 -02:00
struct machine * machine )
perf tools: Encode kernel module mappings in perf.data
We were always looking at the running machine /proc/modules,
even when processing a perf.data file, which only makes sense
when we're doing 'perf record' and 'perf report' on the same
machine, and in close sucession, or if we don't use modules at
all, right Peter? ;-)
Now, at 'perf record' time we read /proc/modules, find the long
path for modules, and put them as PERF_MMAP events, just like we
did to encode the reloc reference symbol for vmlinux. Talking
about that now it is encoded in .pgoff, so that we can use
.{start,len} to store the address boundaries for the kernel so
that when we reconstruct the kmaps tree we can do lookups right
away, without having to fixup the end of the kernel maps like we
did in the past (and now only in perf record).
One more step in the 'perf archive' direction when we'll finally
be able to collect data in one machine and analyse in another.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263396139-4798-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-13 13:22:17 -02:00
{
2012-08-26 12:24:42 -06:00
int rc = 0 ;
2015-05-22 11:52:22 -03:00
struct map * pos ;
2010-04-27 21:17:50 -03:00
struct map_groups * kmaps = & machine - > kmaps ;
2015-05-22 12:58:53 -03:00
struct maps * maps = & kmaps - > maps [ MAP__FUNCTION ] ;
2011-01-29 14:01:45 -02:00
union perf_event * event = zalloc ( ( sizeof ( event - > mmap ) +
2011-11-28 07:56:39 -02:00
machine - > id_hdr_size ) ) ;
2010-12-02 10:25:28 -02:00
if ( event = = NULL ) {
pr_debug ( " Not enough memory synthesizing mmap event "
" for kernel modules \n " ) ;
return - 1 ;
}
event - > header . type = PERF_RECORD_MMAP ;
perf tools: Encode kernel module mappings in perf.data
We were always looking at the running machine /proc/modules,
even when processing a perf.data file, which only makes sense
when we're doing 'perf record' and 'perf report' on the same
machine, and in close sucession, or if we don't use modules at
all, right Peter? ;-)
Now, at 'perf record' time we read /proc/modules, find the long
path for modules, and put them as PERF_MMAP events, just like we
did to encode the reloc reference symbol for vmlinux. Talking
about that now it is encoded in .pgoff, so that we can use
.{start,len} to store the address boundaries for the kernel so
that when we reconstruct the kmaps tree we can do lookups right
away, without having to fixup the end of the kernel maps like we
did in the past (and now only in perf record).
One more step in the 'perf archive' direction when we'll finally
be able to collect data in one machine and analyse in another.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263396139-4798-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-13 13:22:17 -02:00
2010-04-19 13:32:50 +08:00
/*
* kernel uses 0 for user space maps , see kernel / perf_event . c
* __perf_event_mmap
*/
2010-04-27 21:17:50 -03:00
if ( machine__is_host ( machine ) )
2010-12-02 10:25:28 -02:00
event - > header . misc = PERF_RECORD_MISC_KERNEL ;
2010-04-19 13:32:50 +08:00
else
2010-12-02 10:25:28 -02:00
event - > header . misc = PERF_RECORD_MISC_GUEST_KERNEL ;
2010-04-19 13:32:50 +08:00
2015-05-22 11:52:22 -03:00
for ( pos = maps__first ( maps ) ; pos ; pos = map__next ( pos ) ) {
perf tools: Encode kernel module mappings in perf.data
We were always looking at the running machine /proc/modules,
even when processing a perf.data file, which only makes sense
when we're doing 'perf record' and 'perf report' on the same
machine, and in close sucession, or if we don't use modules at
all, right Peter? ;-)
Now, at 'perf record' time we read /proc/modules, find the long
path for modules, and put them as PERF_MMAP events, just like we
did to encode the reloc reference symbol for vmlinux. Talking
about that now it is encoded in .pgoff, so that we can use
.{start,len} to store the address boundaries for the kernel so
that when we reconstruct the kmaps tree we can do lookups right
away, without having to fixup the end of the kernel maps like we
did in the past (and now only in perf record).
One more step in the 'perf archive' direction when we'll finally
be able to collect data in one machine and analyse in another.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263396139-4798-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-13 13:22:17 -02:00
size_t size ;
2015-09-23 15:45:20 -03:00
if ( __map__is_kernel ( pos ) )
perf tools: Encode kernel module mappings in perf.data
We were always looking at the running machine /proc/modules,
even when processing a perf.data file, which only makes sense
when we're doing 'perf record' and 'perf report' on the same
machine, and in close sucession, or if we don't use modules at
all, right Peter? ;-)
Now, at 'perf record' time we read /proc/modules, find the long
path for modules, and put them as PERF_MMAP events, just like we
did to encode the reloc reference symbol for vmlinux. Talking
about that now it is encoded in .pgoff, so that we can use
.{start,len} to store the address boundaries for the kernel so
that when we reconstruct the kmaps tree we can do lookups right
away, without having to fixup the end of the kernel maps like we
did in the past (and now only in perf record).
One more step in the 'perf archive' direction when we'll finally
be able to collect data in one machine and analyse in another.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263396139-4798-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-13 13:22:17 -02:00
continue ;
2012-09-11 01:15:01 +03:00
size = PERF_ALIGN ( pos - > dso - > long_name_len + 1 , sizeof ( u64 ) ) ;
2010-12-02 10:25:28 -02:00
event - > mmap . header . type = PERF_RECORD_MMAP ;
event - > mmap . header . size = ( sizeof ( event - > mmap ) -
( sizeof ( event - > mmap . filename ) - size ) ) ;
2011-11-28 07:56:39 -02:00
memset ( event - > mmap . filename + size , 0 , machine - > id_hdr_size ) ;
event - > mmap . header . size + = machine - > id_hdr_size ;
2010-12-02 10:25:28 -02:00
event - > mmap . start = pos - > start ;
event - > mmap . len = pos - > end - pos - > start ;
event - > mmap . pid = machine - > pid ;
memcpy ( event - > mmap . filename , pos - > dso - > long_name ,
perf tools: Encode kernel module mappings in perf.data
We were always looking at the running machine /proc/modules,
even when processing a perf.data file, which only makes sense
when we're doing 'perf record' and 'perf report' on the same
machine, and in close sucession, or if we don't use modules at
all, right Peter? ;-)
Now, at 'perf record' time we read /proc/modules, find the long
path for modules, and put them as PERF_MMAP events, just like we
did to encode the reloc reference symbol for vmlinux. Talking
about that now it is encoded in .pgoff, so that we can use
.{start,len} to store the address boundaries for the kernel so
that when we reconstruct the kmaps tree we can do lookups right
away, without having to fixup the end of the kernel maps like we
did in the past (and now only in perf record).
One more step in the 'perf archive' direction when we'll finally
be able to collect data in one machine and analyse in another.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263396139-4798-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-13 13:22:17 -02:00
pos - > dso - > long_name_len + 1 ) ;
perf tools: Add missing initialization of perf_sample.cpumode in synthesized samples
In 473398a21d28 ("perf tools: Add cpumode to struct perf_sample"), I
missed some places where perf_sample fields are directly initialized in
addition to what is done in perf_evsel__parse_sample(), namely when
synthesizing PERF_RECORD_{MMAP*,COMM,FORK,EXIT} for pre-existing threads
and also in intel_pt and intel_bts when synthesizing events from
processor trace, the jitdump code also was affected, fix it.
The problem was noticed with running:
# perf record -e intel_pt//u true
# perf script
Where the samples wouldn't get resolved because perf_sample.cpumode
would be left as zero, i.e. PERF_RECORD_MISC_CPUMODE_UNKNOWN, not
resolving as kernel, hypervisor or user cpu modes.
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 473398a21d28 ("perf tools: Add cpumode to struct perf_sample")
Link: http://lkml.kernel.org/n/tip-n5sdauxgk24d5nun8kuuu2mh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-03-29 18:46:04 -03:00
if ( perf_tool__process_synth_event ( tool , event , machine , process ) ! = 0 ) {
2012-08-26 12:24:42 -06:00
rc = - 1 ;
break ;
}
perf tools: Encode kernel module mappings in perf.data
We were always looking at the running machine /proc/modules,
even when processing a perf.data file, which only makes sense
when we're doing 'perf record' and 'perf report' on the same
machine, and in close sucession, or if we don't use modules at
all, right Peter? ;-)
Now, at 'perf record' time we read /proc/modules, find the long
path for modules, and put them as PERF_MMAP events, just like we
did to encode the reloc reference symbol for vmlinux. Talking
about that now it is encoded in .pgoff, so that we can use
.{start,len} to store the address boundaries for the kernel so
that when we reconstruct the kmaps tree we can do lookups right
away, without having to fixup the end of the kernel maps like we
did in the past (and now only in perf record).
One more step in the 'perf archive' direction when we'll finally
be able to collect data in one machine and analyse in another.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263396139-4798-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-13 13:22:17 -02:00
}
2010-12-02 10:25:28 -02:00
free ( event ) ;
2012-08-26 12:24:42 -06:00
return rc ;
perf tools: Encode kernel module mappings in perf.data
We were always looking at the running machine /proc/modules,
even when processing a perf.data file, which only makes sense
when we're doing 'perf record' and 'perf report' on the same
machine, and in close sucession, or if we don't use modules at
all, right Peter? ;-)
Now, at 'perf record' time we read /proc/modules, find the long
path for modules, and put them as PERF_MMAP events, just like we
did to encode the reloc reference symbol for vmlinux. Talking
about that now it is encoded in .pgoff, so that we can use
.{start,len} to store the address boundaries for the kernel so
that when we reconstruct the kmaps tree we can do lookups right
away, without having to fixup the end of the kernel maps like we
did in the past (and now only in perf record).
One more step in the 'perf archive' direction when we'll finally
be able to collect data in one machine and analyse in another.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263396139-4798-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-13 13:22:17 -02:00
}
2011-01-29 14:01:45 -02:00
static int __event__synthesize_thread ( union perf_event * comm_event ,
union perf_event * mmap_event ,
2014-03-14 10:43:44 -04:00
union perf_event * fork_event ,
2011-12-22 11:30:01 -07:00
pid_t pid , int full ,
perf_event__handler_t process ,
2011-11-28 08:30:20 -02:00
struct perf_tool * tool ,
2015-06-17 09:51:11 -04:00
struct machine * machine ,
bool mmap_data ,
unsigned int proc_map_timeout )
2009-10-26 19:23:18 -02:00
{
2014-02-26 10:45:26 -05:00
char filename [ PATH_MAX ] ;
DIR * tasks ;
perf tools: Use readdir() instead of deprecated readdir_r()
The readdir() function is thread safe as long as just one thread uses a
DIR, which is the case when synthesizing events for pre-existing threads
by traversing /proc, so, to avoid breaking the build with glibc-2.23.90
(upcoming 2.24), use it instead of readdir_r().
See: http://man7.org/linux/man-pages/man3/readdir.3.html
"However, in modern implementations (including the glibc implementation),
concurrent calls to readdir() that specify different directory streams
are thread-safe. In cases where multiple threads must read from the
same directory stream, using readdir() with external synchronization is
still preferable to the use of the deprecated readdir_r(3) function."
Noticed while building on a Fedora Rawhide docker container.
CC /tmp/build/perf/util/event.o
util/event.c: In function '__event__synthesize_thread':
util/event.c:466:2: error: 'readdir_r' is deprecated [-Werror=deprecated-declarations]
while (!readdir_r(tasks, &dirent, &next) && next) {
^~~~~
In file included from /usr/include/features.h:368:0,
from /usr/include/stdint.h:25,
from /usr/lib/gcc/x86_64-redhat-linux/6.0.0/include/stdint.h:9,
from /git/linux/tools/include/linux/types.h:6,
from util/event.c:1:
/usr/include/dirent.h:189:12: note: declared here
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-i1vj7nyjp2p750rirxgrfd3c@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-08 11:32:15 -03:00
struct dirent * dirent ;
2015-03-30 14:35:58 -06:00
pid_t tgid , ppid ;
2015-04-08 11:57:03 -03:00
int rc = 0 ;
2014-02-26 10:45:26 -05:00
/* special case: only send one comm event using passed in pid */
if ( ! full ) {
tgid = perf_event__synthesize_comm ( tool , comm_event , pid ,
process , machine ) ;
if ( tgid = = - 1 )
return - 1 ;
return perf_event__synthesize_mmap_events ( tool , mmap_event , pid , tgid ,
2015-06-17 09:51:11 -04:00
process , machine , mmap_data ,
proc_map_timeout ) ;
2014-02-26 10:45:26 -05:00
}
if ( machine__is_default_guest ( machine ) )
return 0 ;
snprintf ( filename , sizeof ( filename ) , " %s/proc/%d/task " ,
machine - > root_dir , pid ) ;
tasks = opendir ( filename ) ;
if ( tasks = = NULL ) {
pr_debug ( " couldn't open %s \n " , filename ) ;
return 0 ;
}
perf tools: Use readdir() instead of deprecated readdir_r()
The readdir() function is thread safe as long as just one thread uses a
DIR, which is the case when synthesizing events for pre-existing threads
by traversing /proc, so, to avoid breaking the build with glibc-2.23.90
(upcoming 2.24), use it instead of readdir_r().
See: http://man7.org/linux/man-pages/man3/readdir.3.html
"However, in modern implementations (including the glibc implementation),
concurrent calls to readdir() that specify different directory streams
are thread-safe. In cases where multiple threads must read from the
same directory stream, using readdir() with external synchronization is
still preferable to the use of the deprecated readdir_r(3) function."
Noticed while building on a Fedora Rawhide docker container.
CC /tmp/build/perf/util/event.o
util/event.c: In function '__event__synthesize_thread':
util/event.c:466:2: error: 'readdir_r' is deprecated [-Werror=deprecated-declarations]
while (!readdir_r(tasks, &dirent, &next) && next) {
^~~~~
In file included from /usr/include/features.h:368:0,
from /usr/include/stdint.h:25,
from /usr/lib/gcc/x86_64-redhat-linux/6.0.0/include/stdint.h:9,
from /git/linux/tools/include/linux/types.h:6,
from util/event.c:1:
/usr/include/dirent.h:189:12: note: declared here
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-i1vj7nyjp2p750rirxgrfd3c@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-08 11:32:15 -03:00
while ( ( dirent = readdir ( tasks ) ) ! = NULL ) {
2014-02-26 10:45:26 -05:00
char * end ;
pid_t _pid ;
perf tools: Use readdir() instead of deprecated readdir_r()
The readdir() function is thread safe as long as just one thread uses a
DIR, which is the case when synthesizing events for pre-existing threads
by traversing /proc, so, to avoid breaking the build with glibc-2.23.90
(upcoming 2.24), use it instead of readdir_r().
See: http://man7.org/linux/man-pages/man3/readdir.3.html
"However, in modern implementations (including the glibc implementation),
concurrent calls to readdir() that specify different directory streams
are thread-safe. In cases where multiple threads must read from the
same directory stream, using readdir() with external synchronization is
still preferable to the use of the deprecated readdir_r(3) function."
Noticed while building on a Fedora Rawhide docker container.
CC /tmp/build/perf/util/event.o
util/event.c: In function '__event__synthesize_thread':
util/event.c:466:2: error: 'readdir_r' is deprecated [-Werror=deprecated-declarations]
while (!readdir_r(tasks, &dirent, &next) && next) {
^~~~~
In file included from /usr/include/features.h:368:0,
from /usr/include/stdint.h:25,
from /usr/lib/gcc/x86_64-redhat-linux/6.0.0/include/stdint.h:9,
from /git/linux/tools/include/linux/types.h:6,
from util/event.c:1:
/usr/include/dirent.h:189:12: note: declared here
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-i1vj7nyjp2p750rirxgrfd3c@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-08 11:32:15 -03:00
_pid = strtol ( dirent - > d_name , & end , 10 ) ;
2014-02-26 10:45:26 -05:00
if ( * end )
continue ;
2015-04-08 11:57:03 -03:00
rc = - 1 ;
2015-03-30 14:35:58 -06:00
if ( perf_event__prepare_comm ( comm_event , _pid , machine ,
& tgid , & ppid ) ! = 0 )
2015-04-08 11:57:03 -03:00
break ;
2014-02-26 10:45:26 -05:00
perf tools: Fix FORK after COMM when synthesizing records for pre-existing threads
In this commit:
commit 363b785f3805a2632eb09a8b430842461c21a640
Author: Don Zickus <dzickus@redhat.com>
Date: Fri Mar 14 10:43:44 2014 -0400
perf tools: Speed up thread map generation
We ended up emitting PERF_RECORD_FORK events after their corresponding
PERF_RECORD_COMM, so the code below will remove the "existing thread"
and then recreates it, unnecessarily:
[root@ssdandy ~]# perf probe -x ~/bin/perf -L machine__process_fork_event
<machine__process_fork_event@/home/acme/git/linux/tools/perf/util/machine.c:0>
0 int machine__process_fork_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample)
2 {
3 struct thread *thread = machine__find_thread(machine,
event->fork.pid,
event->fork.tid);
6 struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
/* if a thread currently exists for the thread id remove it */
if (thread != NULL)
12 machine__remove_thread(machine, thread);
14 thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);
16 if (dump_trace)
17 perf_event__fprintf_task(event, stdout);
19 if (thread == NULL || parent == NULL ||
20 thread__fork(thread, parent, sample->time) < 0) {
21 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
22 return -1;
}
25 return 0;
26 }
[root@ssdandy ~]# perf probe -x ~/bin/perf fork_after_comm=machine__process_fork_event:12
Added new event:
probe_perf:fork_after_comm (on machine__process_fork_event:12 in /home/acme/bin/perf)
You can now use it in all perf tools, such as:
perf record -e probe_perf:fork_after_comm -aR sleep 1
[root@ssdandy ~]#
[root@ssdandy ~]# perf record -g -e probe_perf:* trace -o /tmp/bla
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.021 MB perf.data (30 samples) ]
Terminated
[root@ssdandy ~]#
[root@ssdandy ~]# perf report --no-children --show-total-period --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
# Samples: 30 of event 'probe_perf:fork_after_comm'
# Event count (approx.): 30
#
# Overhead Period Command Shared Object Symbol
# ........ ............ ....... ............. ...............................
#
100.00% 30 trace trace [.] machine__process_fork_event
|
---machine__process_fork_event
__event__synthesize_thread.part.2
perf_event__synthesize_threads
cmd_trace
main
__libc_start_main
[root@ssdandy ~]#
And Looking at 'perf report -D' output we see it:
0 0 0x8698 [0x30]: PERF_RECORD_COMM: auditd:703/707
0 0 0x86c8 [0x38]: PERF_RECORD_FORK(703:707):(703:703)
Fix it by more closely mimicking how the kernel generates those records
when a new fork happens, i.e. first a PERF_RECORD_FORK, then a
PERF_RECORD_COMM.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-h0emvymi2t3mw8dlqd6d6z73@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-02-27 19:52:10 -03:00
if ( perf_event__synthesize_fork ( tool , fork_event , _pid , tgid ,
2015-03-30 14:35:58 -06:00
ppid , process , machine ) < 0 )
2015-04-08 11:57:03 -03:00
break ;
perf tools: Fix FORK after COMM when synthesizing records for pre-existing threads
In this commit:
commit 363b785f3805a2632eb09a8b430842461c21a640
Author: Don Zickus <dzickus@redhat.com>
Date: Fri Mar 14 10:43:44 2014 -0400
perf tools: Speed up thread map generation
We ended up emitting PERF_RECORD_FORK events after their corresponding
PERF_RECORD_COMM, so the code below will remove the "existing thread"
and then recreates it, unnecessarily:
[root@ssdandy ~]# perf probe -x ~/bin/perf -L machine__process_fork_event
<machine__process_fork_event@/home/acme/git/linux/tools/perf/util/machine.c:0>
0 int machine__process_fork_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample)
2 {
3 struct thread *thread = machine__find_thread(machine,
event->fork.pid,
event->fork.tid);
6 struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
/* if a thread currently exists for the thread id remove it */
if (thread != NULL)
12 machine__remove_thread(machine, thread);
14 thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);
16 if (dump_trace)
17 perf_event__fprintf_task(event, stdout);
19 if (thread == NULL || parent == NULL ||
20 thread__fork(thread, parent, sample->time) < 0) {
21 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
22 return -1;
}
25 return 0;
26 }
[root@ssdandy ~]# perf probe -x ~/bin/perf fork_after_comm=machine__process_fork_event:12
Added new event:
probe_perf:fork_after_comm (on machine__process_fork_event:12 in /home/acme/bin/perf)
You can now use it in all perf tools, such as:
perf record -e probe_perf:fork_after_comm -aR sleep 1
[root@ssdandy ~]#
[root@ssdandy ~]# perf record -g -e probe_perf:* trace -o /tmp/bla
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.021 MB perf.data (30 samples) ]
Terminated
[root@ssdandy ~]#
[root@ssdandy ~]# perf report --no-children --show-total-period --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
# Samples: 30 of event 'probe_perf:fork_after_comm'
# Event count (approx.): 30
#
# Overhead Period Command Shared Object Symbol
# ........ ............ ....... ............. ...............................
#
100.00% 30 trace trace [.] machine__process_fork_event
|
---machine__process_fork_event
__event__synthesize_thread.part.2
perf_event__synthesize_threads
cmd_trace
main
__libc_start_main
[root@ssdandy ~]#
And Looking at 'perf report -D' output we see it:
0 0 0x8698 [0x30]: PERF_RECORD_COMM: auditd:703/707
0 0 0x86c8 [0x38]: PERF_RECORD_FORK(703:707):(703:703)
Fix it by more closely mimicking how the kernel generates those records
when a new fork happens, i.e. first a PERF_RECORD_FORK, then a
PERF_RECORD_COMM.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-h0emvymi2t3mw8dlqd6d6z73@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-02-27 19:52:10 -03:00
/*
* Send the prepared comm event
*/
perf tools: Add missing initialization of perf_sample.cpumode in synthesized samples
In 473398a21d28 ("perf tools: Add cpumode to struct perf_sample"), I
missed some places where perf_sample fields are directly initialized in
addition to what is done in perf_evsel__parse_sample(), namely when
synthesizing PERF_RECORD_{MMAP*,COMM,FORK,EXIT} for pre-existing threads
and also in intel_pt and intel_bts when synthesizing events from
processor trace, the jitdump code also was affected, fix it.
The problem was noticed with running:
# perf record -e intel_pt//u true
# perf script
Where the samples wouldn't get resolved because perf_sample.cpumode
would be left as zero, i.e. PERF_RECORD_MISC_CPUMODE_UNKNOWN, not
resolving as kernel, hypervisor or user cpu modes.
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 473398a21d28 ("perf tools: Add cpumode to struct perf_sample")
Link: http://lkml.kernel.org/n/tip-n5sdauxgk24d5nun8kuuu2mh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-03-29 18:46:04 -03:00
if ( perf_tool__process_synth_event ( tool , comm_event , machine , process ) ! = 0 )
2015-04-08 11:57:03 -03:00
break ;
perf tools: Fix FORK after COMM when synthesizing records for pre-existing threads
In this commit:
commit 363b785f3805a2632eb09a8b430842461c21a640
Author: Don Zickus <dzickus@redhat.com>
Date: Fri Mar 14 10:43:44 2014 -0400
perf tools: Speed up thread map generation
We ended up emitting PERF_RECORD_FORK events after their corresponding
PERF_RECORD_COMM, so the code below will remove the "existing thread"
and then recreates it, unnecessarily:
[root@ssdandy ~]# perf probe -x ~/bin/perf -L machine__process_fork_event
<machine__process_fork_event@/home/acme/git/linux/tools/perf/util/machine.c:0>
0 int machine__process_fork_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample)
2 {
3 struct thread *thread = machine__find_thread(machine,
event->fork.pid,
event->fork.tid);
6 struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
/* if a thread currently exists for the thread id remove it */
if (thread != NULL)
12 machine__remove_thread(machine, thread);
14 thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);
16 if (dump_trace)
17 perf_event__fprintf_task(event, stdout);
19 if (thread == NULL || parent == NULL ||
20 thread__fork(thread, parent, sample->time) < 0) {
21 dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
22 return -1;
}
25 return 0;
26 }
[root@ssdandy ~]# perf probe -x ~/bin/perf fork_after_comm=machine__process_fork_event:12
Added new event:
probe_perf:fork_after_comm (on machine__process_fork_event:12 in /home/acme/bin/perf)
You can now use it in all perf tools, such as:
perf record -e probe_perf:fork_after_comm -aR sleep 1
[root@ssdandy ~]#
[root@ssdandy ~]# perf record -g -e probe_perf:* trace -o /tmp/bla
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.021 MB perf.data (30 samples) ]
Terminated
[root@ssdandy ~]#
[root@ssdandy ~]# perf report --no-children --show-total-period --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
# Samples: 30 of event 'probe_perf:fork_after_comm'
# Event count (approx.): 30
#
# Overhead Period Command Shared Object Symbol
# ........ ............ ....... ............. ...............................
#
100.00% 30 trace trace [.] machine__process_fork_event
|
---machine__process_fork_event
__event__synthesize_thread.part.2
perf_event__synthesize_threads
cmd_trace
main
__libc_start_main
[root@ssdandy ~]#
And Looking at 'perf report -D' output we see it:
0 0 0x8698 [0x30]: PERF_RECORD_COMM: auditd:703/707
0 0 0x86c8 [0x38]: PERF_RECORD_FORK(703:707):(703:703)
Fix it by more closely mimicking how the kernel generates those records
when a new fork happens, i.e. first a PERF_RECORD_FORK, then a
PERF_RECORD_COMM.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-h0emvymi2t3mw8dlqd6d6z73@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-02-27 19:52:10 -03:00
2015-04-08 11:57:03 -03:00
rc = 0 ;
2014-03-14 10:43:44 -04:00
if ( _pid = = pid ) {
/* process the parent's maps too */
rc = perf_event__synthesize_mmap_events ( tool , mmap_event , pid , tgid ,
2015-06-17 09:51:11 -04:00
process , machine , mmap_data , proc_map_timeout ) ;
2015-04-08 11:57:03 -03:00
if ( rc )
break ;
2014-03-14 10:43:44 -04:00
}
2014-02-26 10:45:26 -05:00
}
closedir ( tasks ) ;
2015-04-08 11:57:03 -03:00
return rc ;
2009-10-26 19:23:18 -02:00
}
2011-11-28 08:30:20 -02:00
int perf_event__synthesize_thread_map ( struct perf_tool * tool ,
2011-11-25 08:19:45 -02:00
struct thread_map * threads ,
2011-02-11 11:45:54 -02:00
perf_event__handler_t process ,
2013-11-11 09:44:09 -03:00
struct machine * machine ,
2015-06-17 09:51:11 -04:00
bool mmap_data ,
unsigned int proc_map_timeout )
2010-12-02 10:25:28 -02:00
{
2014-03-14 10:43:44 -04:00
union perf_event * comm_event , * mmap_event , * fork_event ;
2011-12-22 11:30:01 -07:00
int err = - 1 , thread , j ;
2010-12-02 10:25:28 -02:00
2011-11-28 07:56:39 -02:00
comm_event = malloc ( sizeof ( comm_event - > comm ) + machine - > id_hdr_size ) ;
2010-12-02 10:25:28 -02:00
if ( comm_event = = NULL )
goto out ;
2016-01-12 10:12:04 +00:00
mmap_event = malloc ( sizeof ( mmap_event - > mmap2 ) + machine - > id_hdr_size ) ;
2010-12-02 10:25:28 -02:00
if ( mmap_event = = NULL )
goto out_free_comm ;
2014-03-14 10:43:44 -04:00
fork_event = malloc ( sizeof ( fork_event - > fork ) + machine - > id_hdr_size ) ;
if ( fork_event = = NULL )
goto out_free_mmap ;
2011-02-10 12:52:47 -02:00
err = 0 ;
for ( thread = 0 ; thread < threads - > nr ; + + thread ) {
if ( __event__synthesize_thread ( comm_event , mmap_event ,
2014-03-14 10:43:44 -04:00
fork_event ,
2015-06-23 00:36:02 +02:00
thread_map__pid ( threads , thread ) , 0 ,
2013-11-11 09:44:09 -03:00
process , tool , machine ,
2015-06-17 09:51:11 -04:00
mmap_data , proc_map_timeout ) ) {
2011-02-10 12:52:47 -02:00
err = - 1 ;
break ;
}
2011-12-22 11:30:01 -07:00
/*
* comm . pid is set to thread group id by
* perf_event__synthesize_comm
*/
2015-06-23 00:36:02 +02:00
if ( ( int ) comm_event - > comm . pid ! = thread_map__pid ( threads , thread ) ) {
2011-12-22 11:30:01 -07:00
bool need_leader = true ;
/* is thread group leader in thread_map? */
for ( j = 0 ; j < threads - > nr ; + + j ) {
2015-06-23 00:36:02 +02:00
if ( ( int ) comm_event - > comm . pid = = thread_map__pid ( threads , j ) ) {
2011-12-22 11:30:01 -07:00
need_leader = false ;
break ;
}
}
/* if not, generate events for it */
if ( need_leader & &
2013-11-11 09:44:09 -03:00
__event__synthesize_thread ( comm_event , mmap_event ,
2014-03-14 10:43:44 -04:00
fork_event ,
2013-11-11 09:44:09 -03:00
comm_event - > comm . pid , 0 ,
process , tool , machine ,
2015-06-17 09:51:11 -04:00
mmap_data , proc_map_timeout ) ) {
2011-12-22 11:30:01 -07:00
err = - 1 ;
break ;
}
}
2011-02-10 12:52:47 -02:00
}
2014-03-14 10:43:44 -04:00
free ( fork_event ) ;
out_free_mmap :
2010-12-02 10:25:28 -02:00
free ( mmap_event ) ;
out_free_comm :
free ( comm_event ) ;
out :
return err ;
}
2011-11-28 08:30:20 -02:00
int perf_event__synthesize_threads ( struct perf_tool * tool ,
2011-11-25 08:19:45 -02:00
perf_event__handler_t process ,
2015-06-17 09:51:11 -04:00
struct machine * machine ,
bool mmap_data ,
unsigned int proc_map_timeout )
2009-10-26 19:23:18 -02:00
{
DIR * proc ;
2013-12-20 15:52:57 -05:00
char proc_path [ PATH_MAX ] ;
perf tools: Use readdir() instead of deprecated readdir_r()
The readdir() function is thread safe as long as just one thread uses a
DIR, which is the case when synthesizing events for pre-existing threads
by traversing /proc, so, to avoid breaking the build with glibc-2.23.90
(upcoming 2.24), use it instead of readdir_r().
See: http://man7.org/linux/man-pages/man3/readdir.3.html
"However, in modern implementations (including the glibc implementation),
concurrent calls to readdir() that specify different directory streams
are thread-safe. In cases where multiple threads must read from the
same directory stream, using readdir() with external synchronization is
still preferable to the use of the deprecated readdir_r(3) function."
Noticed while building on a Fedora Rawhide docker container.
CC /tmp/build/perf/util/event.o
util/event.c: In function '__event__synthesize_thread':
util/event.c:466:2: error: 'readdir_r' is deprecated [-Werror=deprecated-declarations]
while (!readdir_r(tasks, &dirent, &next) && next) {
^~~~~
In file included from /usr/include/features.h:368:0,
from /usr/include/stdint.h:25,
from /usr/lib/gcc/x86_64-redhat-linux/6.0.0/include/stdint.h:9,
from /git/linux/tools/include/linux/types.h:6,
from util/event.c:1:
/usr/include/dirent.h:189:12: note: declared here
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-i1vj7nyjp2p750rirxgrfd3c@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-08 11:32:15 -03:00
struct dirent * dirent ;
2014-03-14 10:43:44 -04:00
union perf_event * comm_event , * mmap_event , * fork_event ;
2010-12-02 10:25:28 -02:00
int err = - 1 ;
2014-03-17 10:45:49 +09:00
if ( machine__is_default_guest ( machine ) )
return 0 ;
2011-11-28 07:56:39 -02:00
comm_event = malloc ( sizeof ( comm_event - > comm ) + machine - > id_hdr_size ) ;
2010-12-02 10:25:28 -02:00
if ( comm_event = = NULL )
goto out ;
2016-01-12 10:12:04 +00:00
mmap_event = malloc ( sizeof ( mmap_event - > mmap2 ) + machine - > id_hdr_size ) ;
2010-12-02 10:25:28 -02:00
if ( mmap_event = = NULL )
goto out_free_comm ;
2009-10-26 19:23:18 -02:00
2014-03-14 10:43:44 -04:00
fork_event = malloc ( sizeof ( fork_event - > fork ) + machine - > id_hdr_size ) ;
if ( fork_event = = NULL )
goto out_free_mmap ;
2013-12-20 15:52:57 -05:00
snprintf ( proc_path , sizeof ( proc_path ) , " %s/proc " , machine - > root_dir ) ;
proc = opendir ( proc_path ) ;
2010-12-02 10:25:28 -02:00
if ( proc = = NULL )
2014-03-14 10:43:44 -04:00
goto out_free_fork ;
2009-10-26 19:23:18 -02:00
perf tools: Use readdir() instead of deprecated readdir_r()
The readdir() function is thread safe as long as just one thread uses a
DIR, which is the case when synthesizing events for pre-existing threads
by traversing /proc, so, to avoid breaking the build with glibc-2.23.90
(upcoming 2.24), use it instead of readdir_r().
See: http://man7.org/linux/man-pages/man3/readdir.3.html
"However, in modern implementations (including the glibc implementation),
concurrent calls to readdir() that specify different directory streams
are thread-safe. In cases where multiple threads must read from the
same directory stream, using readdir() with external synchronization is
still preferable to the use of the deprecated readdir_r(3) function."
Noticed while building on a Fedora Rawhide docker container.
CC /tmp/build/perf/util/event.o
util/event.c: In function '__event__synthesize_thread':
util/event.c:466:2: error: 'readdir_r' is deprecated [-Werror=deprecated-declarations]
while (!readdir_r(tasks, &dirent, &next) && next) {
^~~~~
In file included from /usr/include/features.h:368:0,
from /usr/include/stdint.h:25,
from /usr/lib/gcc/x86_64-redhat-linux/6.0.0/include/stdint.h:9,
from /git/linux/tools/include/linux/types.h:6,
from util/event.c:1:
/usr/include/dirent.h:189:12: note: declared here
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-i1vj7nyjp2p750rirxgrfd3c@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-08 11:32:15 -03:00
while ( ( dirent = readdir ( proc ) ) ! = NULL ) {
2009-10-26 19:23:18 -02:00
char * end ;
perf tools: Use readdir() instead of deprecated readdir_r()
The readdir() function is thread safe as long as just one thread uses a
DIR, which is the case when synthesizing events for pre-existing threads
by traversing /proc, so, to avoid breaking the build with glibc-2.23.90
(upcoming 2.24), use it instead of readdir_r().
See: http://man7.org/linux/man-pages/man3/readdir.3.html
"However, in modern implementations (including the glibc implementation),
concurrent calls to readdir() that specify different directory streams
are thread-safe. In cases where multiple threads must read from the
same directory stream, using readdir() with external synchronization is
still preferable to the use of the deprecated readdir_r(3) function."
Noticed while building on a Fedora Rawhide docker container.
CC /tmp/build/perf/util/event.o
util/event.c: In function '__event__synthesize_thread':
util/event.c:466:2: error: 'readdir_r' is deprecated [-Werror=deprecated-declarations]
while (!readdir_r(tasks, &dirent, &next) && next) {
^~~~~
In file included from /usr/include/features.h:368:0,
from /usr/include/stdint.h:25,
from /usr/lib/gcc/x86_64-redhat-linux/6.0.0/include/stdint.h:9,
from /git/linux/tools/include/linux/types.h:6,
from util/event.c:1:
/usr/include/dirent.h:189:12: note: declared here
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-i1vj7nyjp2p750rirxgrfd3c@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-08 11:32:15 -03:00
pid_t pid = strtol ( dirent - > d_name , & end , 10 ) ;
2009-10-26 19:23:18 -02:00
if ( * end ) /* only interested in proper numerical dirents */
continue ;
2012-10-17 18:50:13 -03:00
/*
* We may race with exiting thread , so don ' t stop just because
* one thread couldn ' t be synthesized .
*/
2014-03-14 10:43:44 -04:00
__event__synthesize_thread ( comm_event , mmap_event , fork_event , pid ,
2015-06-17 09:51:11 -04:00
1 , process , tool , machine , mmap_data ,
proc_map_timeout ) ;
2009-10-26 19:23:18 -02:00
}
2010-12-02 10:25:28 -02:00
err = 0 ;
2012-08-26 12:24:42 -06:00
closedir ( proc ) ;
2014-03-14 10:43:44 -04:00
out_free_fork :
free ( fork_event ) ;
2010-12-02 10:25:28 -02:00
out_free_mmap :
free ( mmap_event ) ;
out_free_comm :
free ( comm_event ) ;
out :
return err ;
2009-10-26 19:23:18 -02:00
}
2009-11-27 16:29:22 -02:00
2010-01-05 16:50:31 -02:00
struct process_symbol_args {
const char * name ;
u64 start ;
} ;
2010-12-22 01:08:36 -02:00
static int find_symbol_cb ( void * arg , const char * name , char type ,
2012-08-10 15:22:48 -07:00
u64 start )
2010-01-05 16:50:31 -02:00
{
struct process_symbol_args * args = arg ;
2010-01-15 18:08:27 -02:00
/*
* Must be a function or at least an alias , as in PARISC64 , where " _text " is
* an ' A ' to the same address as " _stext " .
*/
if ( ! ( symbol_type__is_a ( type , MAP__FUNCTION ) | |
type = = ' A ' ) | | strcmp ( name , args - > name ) )
2010-01-05 16:50:31 -02:00
return 0 ;
args - > start = start ;
return 1 ;
}
2014-01-29 16:14:37 +02:00
u64 kallsyms__get_function_start ( const char * kallsyms_filename ,
const char * symbol_name )
{
struct process_symbol_args args = { . name = symbol_name , } ;
if ( kallsyms__parse ( kallsyms_filename , & args , find_symbol_cb ) < = 0 )
return 0 ;
return args . start ;
}
2011-11-28 08:30:20 -02:00
int perf_event__synthesize_kernel_mmap ( struct perf_tool * tool ,
2011-11-25 08:19:45 -02:00
perf_event__handler_t process ,
2014-01-29 16:14:40 +02:00
struct machine * machine )
2010-01-05 16:50:31 -02:00
{
size_t size ;
2014-01-29 16:14:40 +02:00
const char * mmap_name ;
2010-04-19 13:32:50 +08:00
char name_buff [ PATH_MAX ] ;
2015-09-30 11:54:04 -03:00
struct map * map = machine__kernel_map ( machine ) ;
2014-01-29 16:14:40 +02:00
struct kmap * kmap ;
2010-12-02 10:25:28 -02:00
int err ;
2014-09-24 14:39:54 -07:00
union perf_event * event ;
2015-09-30 11:08:58 -03:00
if ( map = = NULL )
2014-09-24 14:39:54 -07:00
return - 1 ;
2010-01-05 16:50:31 -02:00
/*
* We should get this from / sys / kernel / sections / . text , but till that is
* available use this , and after it is use this as a fallback for older
* kernels .
*/
2014-09-24 14:39:54 -07:00
event = zalloc ( ( sizeof ( event - > mmap ) + machine - > id_hdr_size ) ) ;
2010-12-02 10:25:28 -02:00
if ( event = = NULL ) {
pr_debug ( " Not enough memory synthesizing mmap event "
" for kernel modules \n " ) ;
return - 1 ;
}
2010-01-05 16:50:31 -02:00
2010-04-27 21:19:05 -03:00
mmap_name = machine__mmap_name ( machine , name_buff , sizeof ( name_buff ) ) ;
2010-04-27 21:17:50 -03:00
if ( machine__is_host ( machine ) ) {
2010-04-19 13:32:50 +08:00
/*
* kernel uses PERF_RECORD_MISC_USER for user space maps ,
* see kernel / perf_event . c __perf_event_mmap
*/
2010-12-02 10:25:28 -02:00
event - > header . misc = PERF_RECORD_MISC_KERNEL ;
2010-04-19 13:32:50 +08:00
} else {
2010-12-02 10:25:28 -02:00
event - > header . misc = PERF_RECORD_MISC_GUEST_KERNEL ;
2013-01-25 11:20:47 +01:00
}
2010-01-05 16:50:31 -02:00
2014-01-29 16:14:40 +02:00
kmap = map__kmap ( map ) ;
2010-12-02 10:25:28 -02:00
size = snprintf ( event - > mmap . filename , sizeof ( event - > mmap . filename ) ,
2014-01-29 16:14:40 +02:00
" %s%s " , mmap_name , kmap - > ref_reloc_sym - > name ) + 1 ;
2012-09-11 01:15:01 +03:00
size = PERF_ALIGN ( size , sizeof ( u64 ) ) ;
2010-12-02 10:25:28 -02:00
event - > mmap . header . type = PERF_RECORD_MMAP ;
event - > mmap . header . size = ( sizeof ( event - > mmap ) -
2011-11-28 07:56:39 -02:00
( sizeof ( event - > mmap . filename ) - size ) + machine - > id_hdr_size ) ;
2014-01-29 16:14:40 +02:00
event - > mmap . pgoff = kmap - > ref_reloc_sym - > addr ;
2010-12-02 10:25:28 -02:00
event - > mmap . start = map - > start ;
event - > mmap . len = map - > end - event - > mmap . start ;
event - > mmap . pid = machine - > pid ;
perf tools: Add missing initialization of perf_sample.cpumode in synthesized samples
In 473398a21d28 ("perf tools: Add cpumode to struct perf_sample"), I
missed some places where perf_sample fields are directly initialized in
addition to what is done in perf_evsel__parse_sample(), namely when
synthesizing PERF_RECORD_{MMAP*,COMM,FORK,EXIT} for pre-existing threads
and also in intel_pt and intel_bts when synthesizing events from
processor trace, the jitdump code also was affected, fix it.
The problem was noticed with running:
# perf record -e intel_pt//u true
# perf script
Where the samples wouldn't get resolved because perf_sample.cpumode
would be left as zero, i.e. PERF_RECORD_MISC_CPUMODE_UNKNOWN, not
resolving as kernel, hypervisor or user cpu modes.
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 473398a21d28 ("perf tools: Add cpumode to struct perf_sample")
Link: http://lkml.kernel.org/n/tip-n5sdauxgk24d5nun8kuuu2mh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-03-29 18:46:04 -03:00
err = perf_tool__process_synth_event ( tool , event , machine , process ) ;
2010-12-02 10:25:28 -02:00
free ( event ) ;
return err ;
2010-01-05 16:50:31 -02:00
}
2015-10-25 15:51:20 +01:00
int perf_event__synthesize_thread_map2 ( struct perf_tool * tool ,
struct thread_map * threads ,
perf_event__handler_t process ,
struct machine * machine )
{
union perf_event * event ;
int i , err , size ;
size = sizeof ( event - > thread_map ) ;
size + = threads - > nr * sizeof ( event - > thread_map . entries [ 0 ] ) ;
event = zalloc ( size ) ;
if ( ! event )
return - ENOMEM ;
event - > header . type = PERF_RECORD_THREAD_MAP ;
event - > header . size = size ;
event - > thread_map . nr = threads - > nr ;
for ( i = 0 ; i < threads - > nr ; i + + ) {
struct thread_map_event_entry * entry = & event - > thread_map . entries [ i ] ;
char * comm = thread_map__comm ( threads , i ) ;
if ( ! comm )
comm = ( char * ) " " ;
entry - > pid = thread_map__pid ( threads , i ) ;
strncpy ( ( char * ) & entry - > comm , comm , sizeof ( entry - > comm ) ) ;
}
err = process ( tool , event , NULL , machine ) ;
free ( event ) ;
return err ;
}
2015-10-25 15:51:24 +01:00
static void synthesize_cpus ( struct cpu_map_entries * cpus ,
struct cpu_map * map )
{
int i ;
cpus - > nr = map - > nr ;
for ( i = 0 ; i < map - > nr ; i + + )
cpus - > cpu [ i ] = map - > map [ i ] ;
}
static void synthesize_mask ( struct cpu_map_mask * mask ,
struct cpu_map * map , int max )
{
int i ;
mask - > nr = BITS_TO_LONGS ( max ) ;
mask - > long_size = sizeof ( long ) ;
for ( i = 0 ; i < map - > nr ; i + + )
set_bit ( map - > map [ i ] , mask - > mask ) ;
}
static size_t cpus_size ( struct cpu_map * map )
{
return sizeof ( struct cpu_map_entries ) + map - > nr * sizeof ( u16 ) ;
}
static size_t mask_size ( struct cpu_map * map , int * max )
{
int i ;
* max = 0 ;
for ( i = 0 ; i < map - > nr ; i + + ) {
/* bit possition of the cpu is + 1 */
int bit = map - > map [ i ] + 1 ;
if ( bit > * max )
* max = bit ;
}
return sizeof ( struct cpu_map_mask ) + BITS_TO_LONGS ( * max ) * sizeof ( long ) ;
}
void * cpu_map_data__alloc ( struct cpu_map * map , size_t * size , u16 * type , int * max )
{
size_t size_cpus , size_mask ;
bool is_dummy = cpu_map__empty ( map ) ;
/*
* Both array and mask data have variable size based
* on the number of cpus and their actual values .
* The size of the ' struct cpu_map_data ' is :
*
* array = size of ' struct cpu_map_entries ' +
* number of cpus * sizeof ( u64 )
*
* mask = size of ' struct cpu_map_mask ' +
* maximum cpu bit converted to size of longs
*
* and finaly + the size of ' struct cpu_map_data ' .
*/
size_cpus = cpus_size ( map ) ;
size_mask = mask_size ( map , max ) ;
if ( is_dummy | | ( size_cpus < size_mask ) ) {
* size + = size_cpus ;
* type = PERF_CPU_MAP__CPUS ;
} else {
* size + = size_mask ;
* type = PERF_CPU_MAP__MASK ;
}
* size + = sizeof ( struct cpu_map_data ) ;
return zalloc ( * size ) ;
}
void cpu_map_data__synthesize ( struct cpu_map_data * data , struct cpu_map * map ,
u16 type , int max )
{
data - > type = type ;
switch ( type ) {
case PERF_CPU_MAP__CPUS :
synthesize_cpus ( ( struct cpu_map_entries * ) data - > data , map ) ;
break ;
case PERF_CPU_MAP__MASK :
synthesize_mask ( ( struct cpu_map_mask * ) data - > data , map , max ) ;
default :
break ;
} ;
}
static struct cpu_map_event * cpu_map_event__new ( struct cpu_map * map )
{
size_t size = sizeof ( struct cpu_map_event ) ;
struct cpu_map_event * event ;
int max ;
u16 type ;
event = cpu_map_data__alloc ( map , & size , & type , & max ) ;
if ( ! event )
return NULL ;
event - > header . type = PERF_RECORD_CPU_MAP ;
event - > header . size = size ;
event - > data . type = type ;
cpu_map_data__synthesize ( & event - > data , map , type , max ) ;
return event ;
}
int perf_event__synthesize_cpu_map ( struct perf_tool * tool ,
struct cpu_map * map ,
perf_event__handler_t process ,
struct machine * machine )
{
struct cpu_map_event * event ;
int err ;
event = cpu_map_event__new ( map ) ;
if ( ! event )
return - ENOMEM ;
err = process ( tool , ( union perf_event * ) event , NULL , machine ) ;
free ( event ) ;
return err ;
}
2015-10-25 15:51:28 +01:00
int perf_event__synthesize_stat_config ( struct perf_tool * tool ,
struct perf_stat_config * config ,
perf_event__handler_t process ,
struct machine * machine )
{
struct stat_config_event * event ;
int size , i = 0 , err ;
size = sizeof ( * event ) ;
size + = ( PERF_STAT_CONFIG_TERM__MAX * sizeof ( event - > data [ 0 ] ) ) ;
event = zalloc ( size ) ;
if ( ! event )
return - ENOMEM ;
event - > header . type = PERF_RECORD_STAT_CONFIG ;
event - > header . size = size ;
event - > nr = PERF_STAT_CONFIG_TERM__MAX ;
# define ADD(__term, __val) \
event - > data [ i ] . tag = PERF_STAT_CONFIG_TERM__ # # __term ; \
event - > data [ i ] . val = __val ; \
i + + ;
ADD ( AGGR_MODE , config - > aggr_mode )
ADD ( INTERVAL , config - > interval )
ADD ( SCALE , config - > scale )
WARN_ONCE ( i ! = PERF_STAT_CONFIG_TERM__MAX ,
" stat config terms unbalanced \n " ) ;
# undef ADD
err = process ( tool , ( union perf_event * ) event , NULL , machine ) ;
free ( event ) ;
return err ;
}
2015-10-25 15:51:31 +01:00
int perf_event__synthesize_stat ( struct perf_tool * tool ,
u32 cpu , u32 thread , u64 id ,
struct perf_counts_values * count ,
perf_event__handler_t process ,
struct machine * machine )
{
struct stat_event event ;
event . header . type = PERF_RECORD_STAT ;
event . header . size = sizeof ( event ) ;
event . header . misc = 0 ;
event . id = id ;
event . cpu = cpu ;
event . thread = thread ;
event . val = count - > val ;
event . ena = count - > ena ;
event . run = count - > run ;
return process ( tool , ( union perf_event * ) & event , NULL , machine ) ;
}
2015-10-25 15:51:34 +01:00
int perf_event__synthesize_stat_round ( struct perf_tool * tool ,
u64 evtime , u64 type ,
perf_event__handler_t process ,
struct machine * machine )
{
struct stat_round_event event ;
event . header . type = PERF_RECORD_STAT_ROUND ;
event . header . size = sizeof ( event ) ;
event . header . misc = 0 ;
event . time = evtime ;
event . type = type ;
return process ( tool , ( union perf_event * ) & event , NULL , machine ) ;
}
2015-10-25 15:51:29 +01:00
void perf_event__read_stat_config ( struct perf_stat_config * config ,
struct stat_config_event * event )
{
unsigned i ;
for ( i = 0 ; i < event - > nr ; i + + ) {
switch ( event - > data [ i ] . tag ) {
# define CASE(__term, __val) \
case PERF_STAT_CONFIG_TERM__ # # __term : \
config - > __val = event - > data [ i ] . val ; \
break ;
CASE ( AGGR_MODE , aggr_mode )
CASE ( SCALE , scale )
CASE ( INTERVAL , interval )
# undef CASE
default :
pr_warning ( " unknown stat config term % " PRIu64 " \n " ,
event - > data [ i ] . tag ) ;
}
}
}
2011-12-02 11:06:37 -02:00
size_t perf_event__fprintf_comm ( union perf_event * event , FILE * fp )
{
2014-07-14 13:02:27 +03:00
const char * s ;
if ( event - > header . misc & PERF_RECORD_MISC_COMM_EXEC )
s = " exec " ;
else
s = " " ;
2015-02-24 17:20:31 -03:00
return fprintf ( fp , " %s: %s:%d/%d \n " , s , event - > comm . comm , event - > comm . pid , event - > comm . tid ) ;
2011-12-02 11:06:37 -02:00
}
2012-09-11 01:15:03 +03:00
int perf_event__process_comm ( struct perf_tool * tool __maybe_unused ,
2011-11-25 08:19:45 -02:00
union perf_event * event ,
2013-09-11 16:18:24 +02:00
struct perf_sample * sample ,
2011-11-28 07:56:39 -02:00
struct machine * machine )
2009-11-27 16:29:22 -02:00
{
2013-09-11 16:18:24 +02:00
return machine__process_comm_event ( machine , event , sample ) ;
2009-11-27 16:29:22 -02:00
}
2012-09-11 01:15:03 +03:00
int perf_event__process_lost ( struct perf_tool * tool __maybe_unused ,
2011-11-25 08:19:45 -02:00
union perf_event * event ,
2013-09-11 16:18:24 +02:00
struct perf_sample * sample ,
2012-10-06 16:26:02 -03:00
struct machine * machine )
2009-11-27 16:29:22 -02:00
{
2013-09-11 16:18:24 +02:00
return machine__process_lost_event ( machine , event , sample ) ;
2010-04-19 13:32:50 +08:00
}
perf tools: Encode kernel module mappings in perf.data
We were always looking at the running machine /proc/modules,
even when processing a perf.data file, which only makes sense
when we're doing 'perf record' and 'perf report' on the same
machine, and in close sucession, or if we don't use modules at
all, right Peter? ;-)
Now, at 'perf record' time we read /proc/modules, find the long
path for modules, and put them as PERF_MMAP events, just like we
did to encode the reloc reference symbol for vmlinux. Talking
about that now it is encoded in .pgoff, so that we can use
.{start,len} to store the address boundaries for the kernel so
that when we reconstruct the kmaps tree we can do lookups right
away, without having to fixup the end of the kernel maps like we
did in the past (and now only in perf record).
One more step in the 'perf archive' direction when we'll finally
be able to collect data in one machine and analyse in another.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1263396139-4798-1-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2010-01-13 13:22:17 -02:00
2015-04-30 17:37:29 +03:00
int perf_event__process_aux ( struct perf_tool * tool __maybe_unused ,
union perf_event * event ,
struct perf_sample * sample __maybe_unused ,
struct machine * machine )
{
return machine__process_aux_event ( machine , event ) ;
}
2015-04-30 17:37:30 +03:00
int perf_event__process_itrace_start ( struct perf_tool * tool __maybe_unused ,
union perf_event * event ,
struct perf_sample * sample __maybe_unused ,
struct machine * machine )
{
return machine__process_itrace_start_event ( machine , event ) ;
}
2015-05-10 15:13:15 -04:00
int perf_event__process_lost_samples ( struct perf_tool * tool __maybe_unused ,
union perf_event * event ,
struct perf_sample * sample ,
struct machine * machine )
{
return machine__process_lost_samples_event ( machine , event , sample ) ;
}
2015-07-21 12:44:03 +03:00
int perf_event__process_switch ( struct perf_tool * tool __maybe_unused ,
union perf_event * event ,
struct perf_sample * sample __maybe_unused ,
struct machine * machine )
{
return machine__process_switch_event ( machine , event ) ;
}
2011-12-02 11:06:37 -02:00
size_t perf_event__fprintf_mmap ( union perf_event * event , FILE * fp )
{
2013-11-11 09:44:09 -03:00
return fprintf ( fp , " %d/%d: [%# " PRIx64 " (%# " PRIx64 " ) @ %# " PRIx64 " ]: %c %s \n " ,
2011-12-02 11:06:37 -02:00
event - > mmap . pid , event - > mmap . tid , event - > mmap . start ,
2013-11-11 09:44:09 -03:00
event - > mmap . len , event - > mmap . pgoff ,
( event - > header . misc & PERF_RECORD_MISC_MMAP_DATA ) ? ' r ' : ' x ' ,
event - > mmap . filename ) ;
2011-12-02 11:06:37 -02:00
}
2013-08-21 12:10:25 +02:00
size_t perf_event__fprintf_mmap2 ( union perf_event * event , FILE * fp )
{
return fprintf ( fp , " %d/%d: [%# " PRIx64 " (%# " PRIx64 " ) @ %# " PRIx64
2014-05-19 15:13:49 -04:00
" %02x:%02x % " PRIu64 " % " PRIu64 " ]: %c%c%c%c %s \n " ,
2013-08-21 12:10:25 +02:00
event - > mmap2 . pid , event - > mmap2 . tid , event - > mmap2 . start ,
event - > mmap2 . len , event - > mmap2 . pgoff , event - > mmap2 . maj ,
event - > mmap2 . min , event - > mmap2 . ino ,
event - > mmap2 . ino_generation ,
2014-05-19 15:13:49 -04:00
( event - > mmap2 . prot & PROT_READ ) ? ' r ' : ' - ' ,
( event - > mmap2 . prot & PROT_WRITE ) ? ' w ' : ' - ' ,
( event - > mmap2 . prot & PROT_EXEC ) ? ' x ' : ' - ' ,
( event - > mmap2 . flags & MAP_SHARED ) ? ' s ' : ' p ' ,
2013-08-21 12:10:25 +02:00
event - > mmap2 . filename ) ;
}
2015-10-25 15:51:22 +01:00
size_t perf_event__fprintf_thread_map ( union perf_event * event , FILE * fp )
{
struct thread_map * threads = thread_map__new_event ( & event - > thread_map ) ;
size_t ret ;
ret = fprintf ( fp , " nr: " ) ;
if ( threads )
ret + = thread_map__fprintf ( threads , fp ) ;
else
ret + = fprintf ( fp , " failed to get threads from event \n " ) ;
thread_map__put ( threads ) ;
return ret ;
}
2015-10-25 15:51:26 +01:00
size_t perf_event__fprintf_cpu_map ( union perf_event * event , FILE * fp )
{
struct cpu_map * cpus = cpu_map__new_data ( & event - > cpu_map . data ) ;
size_t ret ;
ret = fprintf ( fp , " nr: " ) ;
if ( cpus )
ret + = cpu_map__fprintf ( cpus , fp ) ;
else
ret + = fprintf ( fp , " failed to get cpumap from event \n " ) ;
cpu_map__put ( cpus ) ;
return ret ;
}
2012-10-06 16:26:02 -03:00
int perf_event__process_mmap ( struct perf_tool * tool __maybe_unused ,
2011-11-25 08:19:45 -02:00
union perf_event * event ,
2013-09-11 16:18:24 +02:00
struct perf_sample * sample ,
2011-11-28 07:56:39 -02:00
struct machine * machine )
2010-04-19 13:32:50 +08:00
{
2013-09-11 16:18:24 +02:00
return machine__process_mmap_event ( machine , event , sample ) ;
2009-11-27 16:29:22 -02:00
}
2013-08-21 12:10:25 +02:00
int perf_event__process_mmap2 ( struct perf_tool * tool __maybe_unused ,
union perf_event * event ,
2013-09-11 16:18:24 +02:00
struct perf_sample * sample ,
2013-08-21 12:10:25 +02:00
struct machine * machine )
{
2013-09-11 16:18:24 +02:00
return machine__process_mmap2_event ( machine , event , sample ) ;
2013-08-21 12:10:25 +02:00
}
2011-12-02 11:06:37 -02:00
size_t perf_event__fprintf_task ( union perf_event * event , FILE * fp )
{
return fprintf ( fp , " (%d:%d):(%d:%d) \n " ,
event - > fork . pid , event - > fork . tid ,
event - > fork . ppid , event - > fork . ptid ) ;
}
2012-10-06 15:44:59 -03:00
int perf_event__process_fork ( struct perf_tool * tool __maybe_unused ,
2011-11-25 08:19:45 -02:00
union perf_event * event ,
2013-09-11 16:18:24 +02:00
struct perf_sample * sample ,
2012-10-06 15:44:59 -03:00
struct machine * machine )
2009-11-27 16:29:22 -02:00
{
2013-09-11 16:18:24 +02:00
return machine__process_fork_event ( machine , event , sample ) ;
2009-11-27 16:29:22 -02:00
}
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
2012-10-06 15:44:59 -03:00
int perf_event__process_exit ( struct perf_tool * tool __maybe_unused ,
union perf_event * event ,
2013-09-11 16:18:24 +02:00
struct perf_sample * sample ,
2012-10-06 15:44:59 -03:00
struct machine * machine )
{
2013-09-11 16:18:24 +02:00
return machine__process_exit_event ( machine , event , sample ) ;
2012-10-06 15:44:59 -03:00
}
2015-04-30 17:37:29 +03:00
size_t perf_event__fprintf_aux ( union perf_event * event , FILE * fp )
{
return fprintf ( fp , " offset: %# " PRIx64 " size: %# " PRIx64 " flags: %# " PRIx64 " [%s%s] \n " ,
event - > aux . aux_offset , event - > aux . aux_size ,
event - > aux . flags ,
event - > aux . flags & PERF_AUX_FLAG_TRUNCATED ? " T " : " " ,
event - > aux . flags & PERF_AUX_FLAG_OVERWRITE ? " O " : " " ) ;
}
2015-04-30 17:37:30 +03:00
size_t perf_event__fprintf_itrace_start ( union perf_event * event , FILE * fp )
{
return fprintf ( fp , " pid: %u tid: %u \n " ,
event - > itrace_start . pid , event - > itrace_start . tid ) ;
}
2015-07-21 12:44:03 +03:00
size_t perf_event__fprintf_switch ( union perf_event * event , FILE * fp )
{
bool out = event - > header . misc & PERF_RECORD_MISC_SWITCH_OUT ;
const char * in_out = out ? " OUT " : " IN " ;
if ( event - > header . type = = PERF_RECORD_SWITCH )
return fprintf ( fp , " %s \n " , in_out ) ;
return fprintf ( fp , " %s %s pid/tid: %5u/%-5u \n " ,
in_out , out ? " next " : " prev " ,
event - > context_switch . next_prev_pid ,
event - > context_switch . next_prev_tid ) ;
}
2011-12-02 11:06:37 -02:00
size_t perf_event__fprintf ( union perf_event * event , FILE * fp )
{
size_t ret = fprintf ( fp , " PERF_RECORD_%s " ,
perf_event__name ( event - > header . type ) ) ;
switch ( event - > header . type ) {
case PERF_RECORD_COMM :
ret + = perf_event__fprintf_comm ( event , fp ) ;
break ;
case PERF_RECORD_FORK :
case PERF_RECORD_EXIT :
ret + = perf_event__fprintf_task ( event , fp ) ;
break ;
case PERF_RECORD_MMAP :
ret + = perf_event__fprintf_mmap ( event , fp ) ;
break ;
2013-08-21 12:10:25 +02:00
case PERF_RECORD_MMAP2 :
ret + = perf_event__fprintf_mmap2 ( event , fp ) ;
break ;
2015-04-30 17:37:29 +03:00
case PERF_RECORD_AUX :
ret + = perf_event__fprintf_aux ( event , fp ) ;
break ;
2015-04-30 17:37:30 +03:00
case PERF_RECORD_ITRACE_START :
ret + = perf_event__fprintf_itrace_start ( event , fp ) ;
break ;
2015-07-21 12:44:03 +03:00
case PERF_RECORD_SWITCH :
case PERF_RECORD_SWITCH_CPU_WIDE :
ret + = perf_event__fprintf_switch ( event , fp ) ;
break ;
2011-12-02 11:06:37 -02:00
default :
ret + = fprintf ( fp , " \n " ) ;
}
return ret ;
}
2012-10-06 16:26:02 -03:00
int perf_event__process ( struct perf_tool * tool __maybe_unused ,
union perf_event * event ,
2013-09-11 16:18:24 +02:00
struct perf_sample * sample ,
2012-10-06 16:26:02 -03:00
struct machine * machine )
2010-08-02 18:08:51 +05:30
{
2013-09-11 16:18:24 +02:00
return machine__process_event ( machine , event , sample ) ;
2010-08-02 18:08:51 +05:30
}
2014-10-23 12:50:25 -03:00
void thread__find_addr_map ( struct thread * thread , u8 cpumode ,
2011-11-28 07:56:39 -02:00
enum map_type type , u64 addr ,
2013-08-08 14:32:27 +03:00
struct addr_location * al )
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
{
2014-03-21 17:57:01 -03:00
struct map_groups * mg = thread - > mg ;
2014-10-23 12:50:25 -03:00
struct machine * machine = mg - > machine ;
2013-08-07 14:38:46 +03:00
bool load_map = false ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
2013-12-19 17:20:06 -03:00
al - > machine = machine ;
2013-11-05 15:32:36 -03:00
al - > thread = thread ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
al - > addr = addr ;
2010-04-19 13:32:50 +08:00
al - > cpumode = cpumode ;
2014-03-17 16:59:21 -03:00
al - > filtered = 0 ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
2011-11-28 07:56:39 -02:00
if ( machine = = NULL ) {
al - > map = NULL ;
return ;
}
2010-04-19 13:32:50 +08:00
if ( cpumode = = PERF_RECORD_MISC_KERNEL & & perf_host ) {
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
al - > level = ' k ' ;
2010-04-27 21:17:50 -03:00
mg = & machine - > kmaps ;
2013-08-07 14:38:46 +03:00
load_map = true ;
2010-04-19 13:32:50 +08:00
} else if ( cpumode = = PERF_RECORD_MISC_USER & & perf_host ) {
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
al - > level = ' . ' ;
2010-04-19 13:32:50 +08:00
} else if ( cpumode = = PERF_RECORD_MISC_GUEST_KERNEL & & perf_guest ) {
al - > level = ' g ' ;
2010-04-27 21:17:50 -03:00
mg = & machine - > kmaps ;
2013-08-07 14:38:46 +03:00
load_map = true ;
2013-12-20 15:52:56 -05:00
} else if ( cpumode = = PERF_RECORD_MISC_GUEST_USER & & perf_guest ) {
al - > level = ' u ' ;
2010-04-19 13:32:50 +08:00
} else {
2013-12-20 15:52:56 -05:00
al - > level = ' H ' ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
al - > map = NULL ;
2010-04-19 13:32:50 +08:00
if ( ( cpumode = = PERF_RECORD_MISC_GUEST_USER | |
cpumode = = PERF_RECORD_MISC_GUEST_KERNEL ) & &
! perf_guest )
2014-03-17 16:59:21 -03:00
al - > filtered | = ( 1 < < HIST_FILTER__GUEST ) ;
2010-04-19 13:32:50 +08:00
if ( ( cpumode = = PERF_RECORD_MISC_USER | |
cpumode = = PERF_RECORD_MISC_KERNEL ) & &
! perf_host )
2014-03-17 16:59:21 -03:00
al - > filtered | = ( 1 < < HIST_FILTER__HOST ) ;
2010-04-19 13:32:50 +08:00
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
return ;
}
try_again :
2009-12-11 14:50:36 -02:00
al - > map = map_groups__find ( mg , type , al - > addr ) ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
if ( al - > map = = NULL ) {
/*
* If this is outside of all known maps , and is a negative
* address , try to look it up in the kernel dso , as it might be
* a vsyscall or vdso ( which executes in user - mode ) .
*
* XXX This is nasty , we should have a symbol list in the
* " [vdso] " dso , but for now lets use the old trick of looking
* in the whole kernel symbol list .
*/
2014-08-15 22:08:39 +03:00
if ( cpumode = = PERF_RECORD_MISC_USER & & machine & &
mg ! = & machine - > kmaps & &
machine__kernel_ip ( machine , al - > addr ) ) {
2010-04-27 21:17:50 -03:00
mg = & machine - > kmaps ;
2014-07-14 13:02:31 +03:00
load_map = true ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
goto try_again ;
}
2013-08-07 14:38:46 +03:00
} else {
/*
* Kernel maps might be changed when loading symbols so loading
* must be done prior to using kernel maps .
*/
if ( load_map )
2013-08-08 14:32:27 +03:00
map__load ( al - > map , machine - > symbol_filter ) ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
al - > addr = al - > map - > map_ip ( al - > map , al - > addr ) ;
2013-08-07 14:38:46 +03:00
}
2010-01-14 23:45:29 -02:00
}
2014-10-23 12:50:25 -03:00
void thread__find_addr_location ( struct thread * thread ,
2011-11-28 07:56:39 -02:00
u8 cpumode , enum map_type type , u64 addr ,
2013-08-08 14:32:26 +03:00
struct addr_location * al )
2010-01-14 23:45:29 -02:00
{
2014-10-23 12:50:25 -03:00
thread__find_addr_map ( thread , cpumode , type , addr , al ) ;
2010-01-14 23:45:29 -02:00
if ( al - > map ! = NULL )
2013-08-08 14:32:26 +03:00
al - > sym = map__find_symbol ( al - > map , al - > addr ,
2014-10-23 12:50:25 -03:00
thread - > mg - > machine - > symbol_filter ) ;
2010-01-14 23:45:29 -02:00
else
al - > sym = NULL ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
}
perf machine: Protect the machine->threads with a rwlock
In addition to using refcounts for the struct thread lifetime
management, we need to protect access to machine->threads from
concurrent access.
That happens in 'perf top', where a thread processes events, inserting
and deleting entries from that rb_tree while another thread decays
hist_entries, that end up dropping references and ultimately deleting
threads from the rb_tree and releasing its resources when no further
hist_entry (or other data structures, like in 'perf sched') references
it.
So the rule is the same for refcounts + protected trees in the kernel,
get the tree lock, find object, bump the refcount, drop the tree lock,
return, use object, drop the refcount if no more use of it is needed,
keep it if storing it in some other data structure, drop when releasing
that data structure.
I.e. pair "t = machine__find(new)_thread()" with a "thread__put(t)", and
"perf_event__preprocess_sample(&al)" with "addr_location__put(&al)".
The addr_location__put() one is because as we return references to
several data structures, we may end up adding more reference counting
for the other data structures and then we'll drop it at
addr_location__put() time.
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-bs9rt4n0jw3hi9f3zxyy3xln@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-06 20:43:22 -03:00
/*
* Callers need to drop the reference to al - > thread , obtained in
* machine__findnew_thread ( )
*/
2016-03-22 18:39:09 -03:00
int machine__resolve ( struct machine * machine , struct addr_location * al ,
struct perf_sample * sample )
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
{
2013-08-27 11:23:06 +03:00
struct thread * thread = machine__findnew_thread ( machine , sample - > pid ,
2014-05-12 09:56:42 +09:00
sample - > tid ) ;
2010-06-04 08:02:07 -03:00
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
if ( thread = = NULL )
return - 1 ;
2013-09-11 14:46:56 +02:00
dump_printf ( " ... thread: %s:%d \n " , thread__comm_str ( thread ) , thread - > tid ) ;
2010-05-09 19:57:08 -03:00
/*
2011-11-28 07:56:39 -02:00
* Have we already created the kernel maps for this machine ?
2010-05-09 19:57:08 -03:00
*
* This should have happened earlier , when we processed the kernel MMAP
* events , but for older perf . data files there was no such thing , so do
* it now .
*/
2016-03-22 18:23:43 -03:00
if ( sample - > cpumode = = PERF_RECORD_MISC_KERNEL & &
2015-09-30 11:54:04 -03:00
machine__kernel_map ( machine ) = = NULL )
2011-11-28 07:56:39 -02:00
machine__create_kernel_maps ( machine ) ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
2016-03-22 18:23:43 -03:00
thread__find_addr_map ( thread , sample - > cpumode , MAP__FUNCTION , sample - > ip , al ) ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
dump_printf ( " ...... dso: %s \n " ,
al - > map ? al - > map - > dso - > long_name :
al - > level = = ' H ' ? " [hypervisor] " : " <not found> " ) ;
2014-03-17 17:12:27 -03:00
if ( thread__is_filtered ( thread ) )
al - > filtered | = ( 1 < < HIST_FILTER__THREAD ) ;
2010-03-24 16:40:15 -03:00
al - > sym = NULL ;
2011-01-29 13:02:00 -02:00
al - > cpu = sample - > cpu ;
2015-09-04 10:45:42 -04:00
al - > socket = - 1 ;
if ( al - > cpu > = 0 ) {
struct perf_env * env = machine - > env ;
if ( env & & env - > cpu )
al - > socket = env - > cpu [ al - > cpu ] . socket_id ;
}
2010-03-24 16:40:15 -03:00
if ( al - > map ) {
2011-12-13 00:16:55 +09:00
struct dso * dso = al - > map - > dso ;
2010-03-24 16:40:15 -03:00
if ( symbol_conf . dso_list & &
2011-12-13 00:16:55 +09:00
( ! dso | | ! ( strlist__has_entry ( symbol_conf . dso_list ,
dso - > short_name ) | |
( dso - > short_name ! = dso - > long_name & &
strlist__has_entry ( symbol_conf . dso_list ,
2014-03-17 16:59:21 -03:00
dso - > long_name ) ) ) ) ) {
al - > filtered | = ( 1 < < HIST_FILTER__DSO ) ;
}
2010-03-24 16:40:15 -03:00
2013-08-08 14:32:25 +03:00
al - > sym = map__find_symbol ( al - > map , al - > addr ,
machine - > symbol_filter ) ;
2010-03-24 16:40:15 -03:00
}
2009-12-15 20:04:41 -02:00
2012-09-07 16:42:23 +08:00
if ( symbol_conf . sym_list & &
( ! al - > sym | | ! strlist__has_entry ( symbol_conf . sym_list ,
2014-03-17 16:59:21 -03:00
al - > sym - > name ) ) ) {
al - > filtered | = ( 1 < < HIST_FILTER__SYMBOL ) ;
}
2009-12-15 20:04:41 -02:00
return 0 ;
perf tools: Consolidate symbol resolving across all tools
Now we have a very high level routine for simple tools to
process IP sample events:
int event__preprocess_sample(const event_t *self,
struct addr_location *al,
symbol_filter_t filter)
It receives the event itself and will insert new threads in the
global threads list and resolve the map and symbol, filling all
this info into the new addr_location struct, so that tools like
annotate and report can further process the event by creating
hist_entries in their specific way (with or without callgraphs,
etc).
It in turn uses the new next layer function:
void thread__find_addr_location(struct thread *self, u8 cpumode,
enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
This one will, given a thread (userspace or the kernel kthread
one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE
too in the near future) at the given cpumode, taking vdsos into
account (userspace hit, but kernel symbol) and will fill all
these details in the addr_location given.
Tools that need a more compact API for plain function
resolution, like 'kmem', can use this other one:
struct symbol *thread__find_function(struct thread *self, u64 addr,
symbol_filter_t filter)
So, to resolve a kernel symbol, that is all the 'kmem' tool
needs, its just a matter of calling:
sym = thread__find_function(kthread, addr, NULL);
The 'filter' parameter is needed because we do lazy
parsing/loading of ELF symtabs or /proc/kallsyms.
With this we remove more code duplication all around, which is
always good, huh? :-)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: John Kacur <jkacur@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-11-27 16:29:23 -02:00
}
2014-07-22 16:17:39 +03:00
perf machine: Protect the machine->threads with a rwlock
In addition to using refcounts for the struct thread lifetime
management, we need to protect access to machine->threads from
concurrent access.
That happens in 'perf top', where a thread processes events, inserting
and deleting entries from that rb_tree while another thread decays
hist_entries, that end up dropping references and ultimately deleting
threads from the rb_tree and releasing its resources when no further
hist_entry (or other data structures, like in 'perf sched') references
it.
So the rule is the same for refcounts + protected trees in the kernel,
get the tree lock, find object, bump the refcount, drop the tree lock,
return, use object, drop the refcount if no more use of it is needed,
keep it if storing it in some other data structure, drop when releasing
that data structure.
I.e. pair "t = machine__find(new)_thread()" with a "thread__put(t)", and
"perf_event__preprocess_sample(&al)" with "addr_location__put(&al)".
The addr_location__put() one is because as we return references to
several data structures, we may end up adding more reference counting
for the other data structures and then we'll drop it at
addr_location__put() time.
Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-bs9rt4n0jw3hi9f3zxyy3xln@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-06 20:43:22 -03:00
/*
* The preprocess_sample method will return with reference counts for the
* in it , when done using ( and perhaps getting ref counts if needing to
* keep a pointer to one of those entries ) it must be paired with
* addr_location__put ( ) , so that the refcounts can be decremented .
*/
void addr_location__put ( struct addr_location * al )
{
thread__zput ( al - > thread ) ;
}
2014-07-22 16:17:39 +03:00
bool is_bts_event ( struct perf_event_attr * attr )
{
return attr - > type = = PERF_TYPE_HARDWARE & &
( attr - > config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS ) & &
attr - > sample_period = = 1 ;
}
bool sample_addr_correlates_sym ( struct perf_event_attr * attr )
{
if ( attr - > type = = PERF_TYPE_SOFTWARE & &
( attr - > config = = PERF_COUNT_SW_PAGE_FAULTS | |
attr - > config = = PERF_COUNT_SW_PAGE_FAULTS_MIN | |
attr - > config = = PERF_COUNT_SW_PAGE_FAULTS_MAJ ) )
return true ;
if ( is_bts_event ( attr ) )
return true ;
return false ;
}
2016-03-22 18:44:46 -03:00
void thread__resolve ( struct thread * thread , struct addr_location * al ,
struct perf_sample * sample )
2014-07-22 16:17:39 +03:00
{
2016-03-22 18:23:43 -03:00
thread__find_addr_map ( thread , sample - > cpumode , MAP__FUNCTION , sample - > addr , al ) ;
2014-07-22 16:17:39 +03:00
if ( ! al - > map )
2016-03-22 18:23:43 -03:00
thread__find_addr_map ( thread , sample - > cpumode , MAP__VARIABLE ,
2014-07-22 16:17:39 +03:00
sample - > addr , al ) ;
al - > cpu = sample - > cpu ;
al - > sym = NULL ;
if ( al - > map )
al - > sym = map__find_symbol ( al - > map , al - > addr , NULL ) ;
}