2011-01-30 10:46:46 -02:00
/*
* Copyright ( C ) 2011 , Red Hat Inc , Arnaldo Carvalho de Melo < acme @ redhat . com >
*
* Parts came from builtin - { top , stat , record } . c , see those files for further
* copyright notes .
*
* Released under the GPL v2 . ( and only v2 , not any later version )
*/
2011-09-06 09:12:26 -06:00
# include <byteswap.h>
2017-04-18 10:46:11 -03:00
# include <errno.h>
2017-04-17 15:23:08 -03:00
# include <inttypes.h>
2012-08-07 15:20:45 +02:00
# include <linux/bitops.h>
2017-06-20 12:05:38 -03:00
# include <api/fs/fs.h>
2015-09-02 09:56:43 +02:00
# include <api/fs/tracing_path.h>
2013-06-11 17:29:18 +02:00
# include <traceevent/event-parse.h>
# include <linux/hw_breakpoint.h>
# include <linux/perf_event.h>
2017-06-16 12:18:27 -03:00
# include <linux/compiler.h>
2015-09-07 10:38:06 +02:00
# include <linux/err.h>
2017-04-19 19:03:14 -03:00
# include <sys/ioctl.h>
2013-08-04 19:41:26 -07:00
# include <sys/resource.h>
2017-06-20 12:05:38 -03:00
# include <sys/types.h>
# include <dirent.h>
2013-06-11 17:29:18 +02:00
# include "asm/bug.h"
2014-10-09 16:12:24 -03:00
# include "callchain.h"
2014-10-17 12:17:40 -03:00
# include "cgroup.h"
2017-04-25 15:30:47 -03:00
# include "event.h"
2011-01-03 16:39:04 -02:00
# include "evsel.h"
2011-01-12 17:03:24 -02:00
# include "evlist.h"
2011-01-03 16:39:04 -02:00
# include "util.h"
2011-01-03 23:09:46 -02:00
# include "cpumap.h"
2011-01-18 15:15:24 -02:00
# include "thread_map.h"
2012-04-26 14:15:22 +09:00
# include "target.h"
2012-08-07 15:20:47 +02:00
# include "perf_regs.h"
2013-08-14 15:48:24 +03:00
# include "debug.h"
2013-12-03 14:09:24 +01:00
# include "trace-event.h"
2015-06-14 10:19:26 +02:00
# include "stat.h"
2017-11-29 19:43:46 +01:00
# include "memswap.h"
2016-10-12 14:02:06 -07:00
# include "util/parse-branch-options.h"
2011-01-03 16:39:04 -02:00
2017-04-17 16:10:49 -03:00
# include "sane_ctype.h"
2012-12-13 13:13:07 -03:00
static struct {
bool sample_id_all ;
bool exclude_guest ;
2013-08-21 12:10:25 +02:00
bool mmap2 ;
2014-06-30 22:28:47 +02:00
bool cloexec ;
2015-03-31 00:19:31 +02:00
bool clockid ;
bool clockid_wrong ;
2015-12-11 16:12:24 -08:00
bool lbr_flags ;
2016-05-20 16:38:23 +00:00
bool write_backward ;
perf stat: Use group read for event groups
Make perf stat use group read if there are groups defined. The group
read will get the values for all member of groups within a single
syscall instead of calling read syscall for every event.
We can see considerable less amount of kernel cycles spent on single
group read, than reading each event separately, like for following perf
stat command:
# perf stat -e {cycles,instructions} -I 10 -a sleep 1
Monitored with "perf stat -r 5 -e '{cycles:u,cycles:k}'"
Before:
24,325,676 cycles:u
297,040,775 cycles:k
1.038554134 seconds time elapsed
After:
25,034,418 cycles:u
158,256,395 cycles:k
1.036864497 seconds time elapsed
The perf_evsel__open fallback changes contributed by Andi Kleen.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20170726120206.9099-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-07-26 14:02:06 +02:00
bool group_read ;
2012-12-13 13:13:07 -03:00
} perf_missing_features ;
2015-03-31 00:19:31 +02:00
static clockid_t clockid ;
2014-10-09 15:29:51 -03:00
static int perf_evsel__no_extra_init ( struct perf_evsel * evsel __maybe_unused )
{
return 0 ;
}
2017-07-03 16:50:18 +02:00
void __weak test_attr__ready ( void ) { }
2014-10-09 15:29:51 -03:00
static void perf_evsel__no_extra_fini ( struct perf_evsel * evsel __maybe_unused )
{
}
static struct {
size_t size ;
int ( * init ) ( struct perf_evsel * evsel ) ;
void ( * fini ) ( struct perf_evsel * evsel ) ;
} perf_evsel__object = {
. size = sizeof ( struct perf_evsel ) ,
. init = perf_evsel__no_extra_init ,
. fini = perf_evsel__no_extra_fini ,
} ;
int perf_evsel__object_config ( size_t object_size ,
int ( * init ) ( struct perf_evsel * evsel ) ,
void ( * fini ) ( struct perf_evsel * evsel ) )
{
if ( object_size = = 0 )
goto set_methods ;
if ( perf_evsel__object . size > object_size )
return - EINVAL ;
perf_evsel__object . size = object_size ;
set_methods :
if ( init ! = NULL )
perf_evsel__object . init = init ;
if ( fini ! = NULL )
perf_evsel__object . fini = fini ;
return 0 ;
}
2011-01-03 17:45:52 -02:00
# define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
2013-08-27 11:23:09 +03:00
int __perf_evsel__sample_size ( u64 sample_type )
2011-06-02 11:04:54 -03:00
{
u64 mask = sample_type & PERF_SAMPLE_MASK ;
int size = 0 ;
int i ;
for ( i = 0 ; i < 64 ; i + + ) {
if ( mask & ( 1ULL < < i ) )
size + + ;
}
size * = sizeof ( u64 ) ;
return size ;
}
2013-08-27 11:23:09 +03:00
/**
* __perf_evsel__calc_id_pos - calculate id_pos .
* @ sample_type : sample type
*
* This function returns the position of the event id ( PERF_SAMPLE_ID or
* PERF_SAMPLE_IDENTIFIER ) in a sample event i . e . in the array of struct
* sample_event .
*/
static int __perf_evsel__calc_id_pos ( u64 sample_type )
{
int idx = 0 ;
if ( sample_type & PERF_SAMPLE_IDENTIFIER )
return 0 ;
if ( ! ( sample_type & PERF_SAMPLE_ID ) )
return - 1 ;
if ( sample_type & PERF_SAMPLE_IP )
idx + = 1 ;
if ( sample_type & PERF_SAMPLE_TID )
idx + = 1 ;
if ( sample_type & PERF_SAMPLE_TIME )
idx + = 1 ;
if ( sample_type & PERF_SAMPLE_ADDR )
idx + = 1 ;
return idx ;
}
/**
* __perf_evsel__calc_is_pos - calculate is_pos .
* @ sample_type : sample type
*
* This function returns the position ( counting backwards ) of the event id
* ( PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER ) in a non - sample event i . e . if
* sample_id_all is used there is an id sample appended to non - sample events .
*/
static int __perf_evsel__calc_is_pos ( u64 sample_type )
{
int idx = 1 ;
if ( sample_type & PERF_SAMPLE_IDENTIFIER )
return 1 ;
if ( ! ( sample_type & PERF_SAMPLE_ID ) )
return - 1 ;
if ( sample_type & PERF_SAMPLE_CPU )
idx + = 1 ;
if ( sample_type & PERF_SAMPLE_STREAM_ID )
idx + = 1 ;
return idx ;
}
void perf_evsel__calc_id_pos ( struct perf_evsel * evsel )
{
evsel - > id_pos = __perf_evsel__calc_id_pos ( evsel - > attr . sample_type ) ;
evsel - > is_pos = __perf_evsel__calc_is_pos ( evsel - > attr . sample_type ) ;
}
2012-12-10 14:53:43 -03:00
void __perf_evsel__set_sample_bit ( struct perf_evsel * evsel ,
enum perf_event_sample_format bit )
{
if ( ! ( evsel - > attr . sample_type & bit ) ) {
evsel - > attr . sample_type | = bit ;
evsel - > sample_size + = sizeof ( u64 ) ;
2013-08-27 11:23:09 +03:00
perf_evsel__calc_id_pos ( evsel ) ;
2012-12-10 14:53:43 -03:00
}
}
void __perf_evsel__reset_sample_bit ( struct perf_evsel * evsel ,
enum perf_event_sample_format bit )
{
if ( evsel - > attr . sample_type & bit ) {
evsel - > attr . sample_type & = ~ bit ;
evsel - > sample_size - = sizeof ( u64 ) ;
2013-08-27 11:23:09 +03:00
perf_evsel__calc_id_pos ( evsel ) ;
2012-12-10 14:53:43 -03:00
}
}
2013-08-27 11:23:09 +03:00
void perf_evsel__set_sample_id ( struct perf_evsel * evsel ,
bool can_sample_identifier )
2012-12-10 15:21:30 -03:00
{
2013-08-27 11:23:09 +03:00
if ( can_sample_identifier ) {
perf_evsel__reset_sample_bit ( evsel , ID ) ;
perf_evsel__set_sample_bit ( evsel , IDENTIFIER ) ;
} else {
perf_evsel__set_sample_bit ( evsel , ID ) ;
}
2012-12-10 15:21:30 -03:00
evsel - > attr . read_format | = PERF_FORMAT_ID ;
}
2016-07-07 11:51:47 -03:00
/**
* perf_evsel__is_function_event - Return whether given evsel is a function
* trace event
*
* @ evsel - evsel selector to be tested
*
* Return % true if event is function trace event
*/
bool perf_evsel__is_function_event ( struct perf_evsel * evsel )
{
# define FUNCTION_EVENT "ftrace:function"
return evsel - > name & &
! strncmp ( FUNCTION_EVENT , evsel - > name , sizeof ( FUNCTION_EVENT ) ) ;
# undef FUNCTION_EVENT
}
2011-01-18 21:41:45 -02:00
void perf_evsel__init ( struct perf_evsel * evsel ,
struct perf_event_attr * attr , int idx )
{
evsel - > idx = idx ;
2014-07-31 09:00:52 +03:00
evsel - > tracking = ! idx ;
2011-01-18 21:41:45 -02:00
evsel - > attr = * attr ;
2012-11-29 15:38:29 +09:00
evsel - > leader = evsel ;
2013-11-12 17:58:49 +01:00
evsel - > unit = " " ;
evsel - > scale = 1.0 ;
2015-08-27 08:07:40 -04:00
evsel - > evlist = NULL ;
perf bpf: Attach eBPF filter to perf event
This is the final patch which makes basic BPF filter work. After
applying this patch, users are allowed to use BPF filter like:
# perf record --event ./hello_world.o ls
A bpf_fd field is appended to 'struct evsel', and setup during the
callback function add_bpf_event() for each 'probe_trace_event'.
PERF_EVENT_IOC_SET_BPF ioctl is used to attach eBPF program to a newly
created perf event. The file descriptor of the eBPF program is passed to
perf record using previous patches, and stored into evsel->bpf_fd.
It is possible that different perf event are created for one kprobe
events for different CPUs. In this case, when trying to call the ioctl,
EEXIST will be return. This patch doesn't treat it as an error.
Committer note:
The bpf proggie used so far:
__attribute__((section("fork=_do_fork"), used))
int fork(void *ctx)
{
return 0;
}
char _license[] __attribute__((section("license"), used)) = "GPL";
int _version __attribute__((section("version"), used)) = 0x40300;
failed to produce any samples, even with forks happening and it being
running in system wide mode.
That is because now the filter is being associated, and the code above
always returns zero, meaning that all forks will be probed but filtered
away ;-/
Change it to 'return 1;' instead and after that:
# trace --no-syscalls --event /tmp/foo.o
0.000 perf_bpf_probe:fork:(ffffffff8109be30))
2.333 perf_bpf_probe:fork:(ffffffff8109be30))
3.725 perf_bpf_probe:fork:(ffffffff8109be30))
4.550 perf_bpf_probe:fork:(ffffffff8109be30))
^C#
And it works with all tools, including 'perf trace'.
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1444826502-49291-8-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 12:41:18 +00:00
evsel - > bpf_fd = - 1 ;
2011-01-18 21:41:45 -02:00
INIT_LIST_HEAD ( & evsel - > node ) ;
2015-07-29 05:42:10 -04:00
INIT_LIST_HEAD ( & evsel - > config_terms ) ;
2014-10-09 15:29:51 -03:00
perf_evsel__object . init ( evsel ) ;
2012-08-01 18:53:11 -03:00
evsel - > sample_size = __perf_evsel__sample_size ( attr - > sample_type ) ;
2013-08-27 11:23:09 +03:00
perf_evsel__calc_id_pos ( evsel ) ;
2015-07-10 07:36:09 +00:00
evsel - > cmdline_group_boundary = false ;
perf stat: Output JSON MetricExpr metric
Add generic infrastructure to perf stat to output ratios for
"MetricExpr" entries in the event lists. Many events are more useful as
ratios than in raw form, typically some count in relation to total
ticks.
Transfer the MetricExpr information from the alias to the evsel.
We mark the events that need to be collected for MetricExpr, and also
link the events using them with a pointer. The code is careful to always
prefer the right event in the same group to minimize multiplexing
errors. At the moment only a single relation is supported.
Then add a rblist to the stat shadow code that remembers stats based on
the cpu and context.
Then finally update and retrieve and print these values similarly to the
existing hardcoded perf metrics. We use the simple expression parser
added earlier to evaluate the expression.
Normally we just output the result without further commentary, but for
--metric-only this would lead to empty columns. So for this case use the
original event as description.
There is no attempt to automatically add the MetricExpr event, if it is
missing, however we suggest it to the user, because the user tool
doesn't have enough information to reliably construct a group that is
guaranteed to schedule. So we leave that to the user.
% perf stat -a -I 1000 -e '{unc_p_clockticks,unc_p_freq_max_os_cycles}'
1.000147889 800,085,181 unc_p_clockticks
1.000147889 93,126,241 unc_p_freq_max_os_cycles # 11.6
2.000448381 800,218,217 unc_p_clockticks
2.000448381 142,516,095 unc_p_freq_max_os_cycles # 17.8
3.000639852 800,243,057 unc_p_clockticks
3.000639852 162,292,689 unc_p_freq_max_os_cycles # 20.3
% perf stat -a -I 1000 -e '{unc_p_clockticks,unc_p_freq_max_os_cycles}' --metric-only
# time freq_max_os_cycles %
1.000127077 0.9
2.000301436 0.7
3.000456379 0.0
v2: Change from DivideBy to MetricExpr
v3: Use expr__ prefix. Support more than one other event.
v4: Update description
v5: Only print warning message once for multiple PMUs.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20170320201711.14142-11-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-03-20 13:17:08 -07:00
evsel - > metric_expr = NULL ;
2017-03-20 13:17:10 -07:00
evsel - > metric_name = NULL ;
perf stat: Output JSON MetricExpr metric
Add generic infrastructure to perf stat to output ratios for
"MetricExpr" entries in the event lists. Many events are more useful as
ratios than in raw form, typically some count in relation to total
ticks.
Transfer the MetricExpr information from the alias to the evsel.
We mark the events that need to be collected for MetricExpr, and also
link the events using them with a pointer. The code is careful to always
prefer the right event in the same group to minimize multiplexing
errors. At the moment only a single relation is supported.
Then add a rblist to the stat shadow code that remembers stats based on
the cpu and context.
Then finally update and retrieve and print these values similarly to the
existing hardcoded perf metrics. We use the simple expression parser
added earlier to evaluate the expression.
Normally we just output the result without further commentary, but for
--metric-only this would lead to empty columns. So for this case use the
original event as description.
There is no attempt to automatically add the MetricExpr event, if it is
missing, however we suggest it to the user, because the user tool
doesn't have enough information to reliably construct a group that is
guaranteed to schedule. So we leave that to the user.
% perf stat -a -I 1000 -e '{unc_p_clockticks,unc_p_freq_max_os_cycles}'
1.000147889 800,085,181 unc_p_clockticks
1.000147889 93,126,241 unc_p_freq_max_os_cycles # 11.6
2.000448381 800,218,217 unc_p_clockticks
2.000448381 142,516,095 unc_p_freq_max_os_cycles # 17.8
3.000639852 800,243,057 unc_p_clockticks
3.000639852 162,292,689 unc_p_freq_max_os_cycles # 20.3
% perf stat -a -I 1000 -e '{unc_p_clockticks,unc_p_freq_max_os_cycles}' --metric-only
# time freq_max_os_cycles %
1.000127077 0.9
2.000301436 0.7
3.000456379 0.0
v2: Change from DivideBy to MetricExpr
v3: Use expr__ prefix. Support more than one other event.
v4: Update description
v5: Only print warning message once for multiple PMUs.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20170320201711.14142-11-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-03-20 13:17:08 -07:00
evsel - > metric_events = NULL ;
evsel - > collect_stat = false ;
2011-01-18 21:41:45 -02:00
}
2013-11-07 16:41:19 -03:00
struct perf_evsel * perf_evsel__new_idx ( struct perf_event_attr * attr , int idx )
2011-01-03 16:39:04 -02:00
{
2014-10-09 15:29:51 -03:00
struct perf_evsel * evsel = zalloc ( perf_evsel__object . size ) ;
2011-01-03 16:39:04 -02:00
2011-01-18 21:41:45 -02:00
if ( evsel ! = NULL )
perf_evsel__init ( evsel , attr , idx ) ;
2011-01-03 16:39:04 -02:00
perf tools: Introduce bpf-output event
Commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper")
adds a helper to enable a BPF program to output data to a perf ring
buffer through a new type of perf event, PERF_COUNT_SW_BPF_OUTPUT. This
patch enables perf to create events of that type. Now a perf user can
use the following cmdline to receive output data from BPF programs:
# perf record -a -e bpf-output/no-inherit,name=evt/ \
-e ./test_bpf_output.c/map:channel.event=evt/ ls /
# perf script
perf 1560 [004] 347747.086295: evt: ffffffff811fd201 sys_write ...
perf 1560 [004] 347747.086300: evt: ffffffff811fd201 sys_write ...
perf 1560 [004] 347747.086315: evt: ffffffff811fd201 sys_write ...
...
Test result:
# cat test_bpf_output.c
/************************ BEGIN **************************/
#include <uapi/linux/bpf.h>
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
#define SEC(NAME) __attribute__((section(NAME), used))
static u64 (*ktime_get_ns)(void) =
(void *)BPF_FUNC_ktime_get_ns;
static int (*trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)BPF_FUNC_trace_printk;
static int (*get_smp_processor_id)(void) =
(void *)BPF_FUNC_get_smp_processor_id;
static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) =
(void *)BPF_FUNC_perf_event_output;
struct bpf_map_def SEC("maps") channel = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(u32),
.max_entries = __NR_CPUS__,
};
SEC("func_write=sys_write")
int func_write(void *ctx)
{
struct {
u64 ktime;
int cpuid;
} __attribute__((packed)) output_data;
char error_data[] = "Error: failed to output: %d\n";
output_data.cpuid = get_smp_processor_id();
output_data.ktime = ktime_get_ns();
int err = perf_event_output(ctx, &channel, get_smp_processor_id(),
&output_data, sizeof(output_data));
if (err)
trace_printk(error_data, sizeof(error_data), err);
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************ END ***************************/
# perf record -a -e bpf-output/no-inherit,name=evt/ \
-e ./test_bpf_output.c/map:channel.event=evt/ ls /
# perf script | grep ls
ls 2242 [003] 347851.557563: evt: ffffffff811fd201 sys_write ...
ls 2242 [003] 347851.557571: evt: ffffffff811fd201 sys_write ...
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Cody P Schafer <dev@codyps.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kirill Smelkov <kirr@nexedi.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1456132275-98875-11-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 09:10:37 +00:00
if ( perf_evsel__is_bpf_output ( evsel ) ) {
2016-04-01 13:26:42 +00:00
evsel - > attr . sample_type | = ( PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD ) ,
perf tools: Introduce bpf-output event
Commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper")
adds a helper to enable a BPF program to output data to a perf ring
buffer through a new type of perf event, PERF_COUNT_SW_BPF_OUTPUT. This
patch enables perf to create events of that type. Now a perf user can
use the following cmdline to receive output data from BPF programs:
# perf record -a -e bpf-output/no-inherit,name=evt/ \
-e ./test_bpf_output.c/map:channel.event=evt/ ls /
# perf script
perf 1560 [004] 347747.086295: evt: ffffffff811fd201 sys_write ...
perf 1560 [004] 347747.086300: evt: ffffffff811fd201 sys_write ...
perf 1560 [004] 347747.086315: evt: ffffffff811fd201 sys_write ...
...
Test result:
# cat test_bpf_output.c
/************************ BEGIN **************************/
#include <uapi/linux/bpf.h>
struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};
#define SEC(NAME) __attribute__((section(NAME), used))
static u64 (*ktime_get_ns)(void) =
(void *)BPF_FUNC_ktime_get_ns;
static int (*trace_printk)(const char *fmt, int fmt_size, ...) =
(void *)BPF_FUNC_trace_printk;
static int (*get_smp_processor_id)(void) =
(void *)BPF_FUNC_get_smp_processor_id;
static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) =
(void *)BPF_FUNC_perf_event_output;
struct bpf_map_def SEC("maps") channel = {
.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(u32),
.max_entries = __NR_CPUS__,
};
SEC("func_write=sys_write")
int func_write(void *ctx)
{
struct {
u64 ktime;
int cpuid;
} __attribute__((packed)) output_data;
char error_data[] = "Error: failed to output: %d\n";
output_data.cpuid = get_smp_processor_id();
output_data.ktime = ktime_get_ns();
int err = perf_event_output(ctx, &channel, get_smp_processor_id(),
&output_data, sizeof(output_data));
if (err)
trace_printk(error_data, sizeof(error_data), err);
return 0;
}
char _license[] SEC("license") = "GPL";
int _version SEC("version") = LINUX_VERSION_CODE;
/************************ END ***************************/
# perf record -a -e bpf-output/no-inherit,name=evt/ \
-e ./test_bpf_output.c/map:channel.event=evt/ ls /
# perf script | grep ls
ls 2242 [003] 347851.557563: evt: ffffffff811fd201 sys_write ...
ls 2242 [003] 347851.557571: evt: ffffffff811fd201 sys_write ...
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Cody P Schafer <dev@codyps.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kirill Smelkov <kirr@nexedi.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1456132275-98875-11-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-02-22 09:10:37 +00:00
evsel - > attr . sample_period = 1 ;
}
2011-01-03 16:39:04 -02:00
return evsel ;
}
perf evsel: Fix attr.exclude_kernel setting for default cycles:p
Yet another fix for probing the max attr.precise_ip setting: it is not
enough settting attr.exclude_kernel for !root users, as they _can_
profile the kernel if the kernel.perf_event_paranoid sysctl is set to
-1, so check that as well.
Testing it:
As non root:
$ sysctl kernel.perf_event_paranoid
kernel.perf_event_paranoid = 2
$ perf record sleep 1
$ perf evlist -v
cycles:uppp: ..., exclude_kernel: 1, ... precise_ip: 3, ...
Now as non-root, but with kernel.perf_event_paranoid set set to the
most permissive value, -1:
$ sysctl kernel.perf_event_paranoid
kernel.perf_event_paranoid = -1
$ perf record sleep 1
$ perf evlist -v
cycles:ppp: ..., exclude_kernel: 0, ... precise_ip: 3, ...
$
I.e. non-root, default kernel.perf_event_paranoid: :uppp modifier = not allowed to sample the kernel,
non-root, most permissible kernel.perf_event_paranoid: :ppp = allowed to sample the kernel.
In both cases, use the highest available precision: attr.precise_ip = 3.
Reported-and-Tested-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: d37a36979077 ("perf evsel: Fix attr.exclude_kernel setting for default cycles:p")
Link: http://lkml.kernel.org/n/tip-nj2qkf75xsd6pw6hhjzfqqdx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-09-22 15:41:44 -03:00
static bool perf_event_can_profile_kernel ( void )
{
return geteuid ( ) = = 0 | | perf_event_paranoid ( ) = = - 1 ;
}
2017-07-03 13:05:43 -03:00
struct perf_evsel * perf_evsel__new_cycles ( bool precise )
2016-07-28 18:33:20 -03:00
{
struct perf_event_attr attr = {
. type = PERF_TYPE_HARDWARE ,
. config = PERF_COUNT_HW_CPU_CYCLES ,
perf evsel: Fix attr.exclude_kernel setting for default cycles:p
Yet another fix for probing the max attr.precise_ip setting: it is not
enough settting attr.exclude_kernel for !root users, as they _can_
profile the kernel if the kernel.perf_event_paranoid sysctl is set to
-1, so check that as well.
Testing it:
As non root:
$ sysctl kernel.perf_event_paranoid
kernel.perf_event_paranoid = 2
$ perf record sleep 1
$ perf evlist -v
cycles:uppp: ..., exclude_kernel: 1, ... precise_ip: 3, ...
Now as non-root, but with kernel.perf_event_paranoid set set to the
most permissive value, -1:
$ sysctl kernel.perf_event_paranoid
kernel.perf_event_paranoid = -1
$ perf record sleep 1
$ perf evlist -v
cycles:ppp: ..., exclude_kernel: 0, ... precise_ip: 3, ...
$
I.e. non-root, default kernel.perf_event_paranoid: :uppp modifier = not allowed to sample the kernel,
non-root, most permissible kernel.perf_event_paranoid: :ppp = allowed to sample the kernel.
In both cases, use the highest available precision: attr.precise_ip = 3.
Reported-and-Tested-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: d37a36979077 ("perf evsel: Fix attr.exclude_kernel setting for default cycles:p")
Link: http://lkml.kernel.org/n/tip-nj2qkf75xsd6pw6hhjzfqqdx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-09-22 15:41:44 -03:00
. exclude_kernel = ! perf_event_can_profile_kernel ( ) ,
2016-07-28 18:33:20 -03:00
} ;
struct perf_evsel * evsel ;
event_attr_init ( & attr ) ;
2017-07-03 13:05:43 -03:00
if ( ! precise )
goto new_event ;
perf evsel: Fix probing of precise_ip level for default cycles event
Since commit 18e7a45af91a ("perf/x86: Reject non sampling events with
precise_ip") returns -EINVAL for sys_perf_event_open() with an attribute
with (attr.precise_ip > 0 && attr.sample_period == 0), just like is done
in the routine used to probe the max precise level when no events were
passed to 'perf record' or 'perf top', i.e.:
perf_evsel__new_cycles()
perf_event_attr__set_max_precise_ip()
The x86 code, in x86_pmu_hw_config(), which is called all the way from
sys_perf_event_open() did, starting with the aforementioned commit:
/* There's no sense in having PEBS for non sampling events: */
if (!is_sampling_event(event))
return -EINVAL;
Which makes it fail for cycles:ppp, cycles:pp and cycles:p, always using
just the non precise cycles variant.
To make sure that this is the case, I tested it, before this patch,
with:
# perf probe -L x86_pmu_hw_config
<x86_pmu_hw_config@/home/acme/git/linux/arch/x86/events/core.c:0>
0 int x86_pmu_hw_config(struct perf_event *event)
1 {
2 if (event->attr.precise_ip) {
<SNIP>
17 if (event->attr.precise_ip > precise)
18 return -EOPNOTSUPP;
/* There's no sense in having PEBS for non sampling events: */
21 if (!is_sampling_event(event))
22 return -EINVAL;
}
<SNIP>
# perf probe x86_pmu_hw_config:22
Added new events:
probe:x86_pmu_hw_config (on x86_pmu_hw_config:22)
probe:x86_pmu_hw_config_1 (on x86_pmu_hw_config:22)
You can now use it in all perf tools, such as:
perf record -e probe:x86_pmu_hw_config_1 -aR sleep 1
# perf trace -e perf_event_open,probe:x86_pmu_hwconfig*/max-stack=16/ perf record usleep 1
0.000 ( 0.015 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, cpu: -1, group_fd: -1 ) ...
0.015 ( ): probe:x86_pmu_hw_config:(ffffffff9c0065e1))
x86_pmu_hw_config ([kernel.kallsyms])
hsw_hw_config ([kernel.kallsyms])
x86_pmu_event_init ([kernel.kallsyms])
perf_try_init_event ([kernel.kallsyms])
perf_event_alloc ([kernel.kallsyms])
SYSC_perf_event_open ([kernel.kallsyms])
sys_perf_event_open ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
return_from_SYSCALL_64 ([kernel.kallsyms])
syscall (/usr/lib64/libc-2.24.so)
perf_event_attr__set_max_precise_ip (/home/acme/bin/perf)
perf_evsel__new_cycles (/home/acme/bin/perf)
perf_evlist__add_default (/home/acme/bin/perf)
cmd_record (/home/acme/bin/perf)
run_builtin (/home/acme/bin/perf)
handle_internal_command (/home/acme/bin/perf)
0.000 ( 0.021 ms): perf/4150 ... [continued]: perf_event_open()) = -1 EINVAL Invalid argument
0.023 ( 0.002 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, cpu: -1, group_fd: -1 ) ...
0.025 ( ): probe:x86_pmu_hw_config:(ffffffff9c0065e1))
x86_pmu_hw_config ([kernel.kallsyms])
hsw_hw_config ([kernel.kallsyms])
x86_pmu_event_init ([kernel.kallsyms])
perf_try_init_event ([kernel.kallsyms])
perf_event_alloc ([kernel.kallsyms])
SYSC_perf_event_open ([kernel.kallsyms])
sys_perf_event_open ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
return_from_SYSCALL_64 ([kernel.kallsyms])
syscall (/usr/lib64/libc-2.24.so)
perf_event_attr__set_max_precise_ip (/home/acme/bin/perf)
perf_evsel__new_cycles (/home/acme/bin/perf)
perf_evlist__add_default (/home/acme/bin/perf)
cmd_record (/home/acme/bin/perf)
run_builtin (/home/acme/bin/perf)
handle_internal_command (/home/acme/bin/perf)
0.023 ( 0.004 ms): perf/4150 ... [continued]: perf_event_open()) = -1 EINVAL Invalid argument
0.028 ( 0.002 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, cpu: -1, group_fd: -1 ) ...
0.030 ( ): probe:x86_pmu_hw_config:(ffffffff9c0065e1))
x86_pmu_hw_config ([kernel.kallsyms])
hsw_hw_config ([kernel.kallsyms])
x86_pmu_event_init ([kernel.kallsyms])
perf_try_init_event ([kernel.kallsyms])
perf_event_alloc ([kernel.kallsyms])
SYSC_perf_event_open ([kernel.kallsyms])
sys_perf_event_open ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
return_from_SYSCALL_64 ([kernel.kallsyms])
syscall (/usr/lib64/libc-2.24.so)
perf_event_attr__set_max_precise_ip (/home/acme/bin/perf)
perf_evsel__new_cycles (/home/acme/bin/perf)
perf_evlist__add_default (/home/acme/bin/perf)
cmd_record (/home/acme/bin/perf)
run_builtin (/home/acme/bin/perf)
handle_internal_command (/home/acme/bin/perf)
0.028 ( 0.004 ms): perf/4150 ... [continued]: perf_event_open()) = -1 EINVAL Invalid argument
41.018 ( 0.012 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8b5dd0, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
41.065 ( 0.011 ms): perf/4150 perf_event_open(attr_uptr: 0x3c7db78, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
41.080 ( 0.006 ms): perf/4150 perf_event_open(attr_uptr: 0x3c7db78, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
41.103 ( 0.010 ms): perf/4150 perf_event_open(attr_uptr: 0x3c4e748, pid: 4151 (perf), group_fd: -1, flags: FD_CLOEXEC) = 4
41.115 ( 0.006 ms): perf/4150 perf_event_open(attr_uptr: 0x3c4e748, pid: 4151 (perf), cpu: 1, group_fd: -1, flags: FD_CLOEXEC) = 5
41.122 ( 0.004 ms): perf/4150 perf_event_open(attr_uptr: 0x3c4e748, pid: 4151 (perf), cpu: 2, group_fd: -1, flags: FD_CLOEXEC) = 6
41.128 ( 0.008 ms): perf/4150 perf_event_open(attr_uptr: 0x3c4e748, pid: 4151 (perf), cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 8
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.017 MB perf.data (2 samples) ]
#
I.e. that return -EINVAL in x86_pmu_hw_config() is hit three times.
So fix it by just setting attr.sample_period
Now, after this patch:
# perf trace --max-stack=2 -e perf_event_open,probe:x86_pmu_hw_config* perf record usleep 1
[ perf record: Woken up 1 times to write data ]
0.000 ( 0.017 ms): perf/8469 perf_event_open(attr_uptr: 0x7ffe36c27d10, pid: -1, cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 4
syscall (/usr/lib64/libc-2.24.so)
perf_event_open_cloexec_flag (/home/acme/bin/perf)
0.050 ( 0.031 ms): perf/8469 perf_event_open(attr_uptr: 0x24ebb78, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
syscall (/usr/lib64/libc-2.24.so)
perf_evlist__config (/home/acme/bin/perf)
0.092 ( 0.040 ms): perf/8469 perf_event_open(attr_uptr: 0x24ebb78, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
syscall (/usr/lib64/libc-2.24.so)
perf_evlist__config (/home/acme/bin/perf)
0.143 ( 0.007 ms): perf/8469 perf_event_open(attr_uptr: 0x24bc748, cpu: -1, group_fd: -1 ) = 4
syscall (/usr/lib64/libc-2.24.so)
perf_event_attr__set_max_precise_ip (/home/acme/bin/perf)
0.161 ( 0.007 ms): perf/8469 perf_event_open(attr_uptr: 0x24bc748, pid: 8470 (perf), group_fd: -1, flags: FD_CLOEXEC) = 4
syscall (/usr/lib64/libc-2.24.so)
perf_evsel__open (/home/acme/bin/perf)
0.171 ( 0.005 ms): perf/8469 perf_event_open(attr_uptr: 0x24bc748, pid: 8470 (perf), cpu: 1, group_fd: -1, flags: FD_CLOEXEC) = 5
syscall (/usr/lib64/libc-2.24.so)
perf_evsel__open (/home/acme/bin/perf)
0.180 ( 0.007 ms): perf/8469 perf_event_open(attr_uptr: 0x24bc748, pid: 8470 (perf), cpu: 2, group_fd: -1, flags: FD_CLOEXEC) = 6
syscall (/usr/lib64/libc-2.24.so)
perf_evsel__open (/home/acme/bin/perf)
0.190 ( 0.005 ms): perf/8469 perf_event_open(attr_uptr: 0x24bc748, pid: 8470 (perf), cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 8
syscall (/usr/lib64/libc-2.24.so)
perf_evsel__open (/home/acme/bin/perf)
[ perf record: Captured and wrote 0.017 MB perf.data (7 samples) ]
#
The probe one called from perf_event_attr__set_max_precise_ip() works
the first time, with attr.precise_ip = 3, wit hthe next ones being the
per cpu ones for the cycles:ppp event.
And here is the text from a report and alternative proposed patch by
Thomas-Mich Richter:
---
On s390 the counter and sampling facility do not support a precise IP
skid level and sometimes returns EOPNOTSUPP when structure member
precise_ip in struct perf_event_attr is not set to zero.
On s390 commnd 'perf record -- true' fails with error EOPNOTSUPP. This
happens only when no events are specified on command line.
The functions called are
...
--> perf_evlist__add_default
--> perf_evsel__new_cycles
--> perf_event_attr__set_max_precise_ip
The last function determines the value of structure member precise_ip by
invoking the perf_event_open() system call and checking the return code.
The first successful open is the value for precise_ip.
However the value is determined without setting member sample_period and
indicates no sampling.
On s390 the counter facility and sampling facility are different. The
above procedure determines a precise_ip value of 3 using the counter
facility. Later it uses the sampling facility with a value of 3 and
fails with EOPNOTSUPP.
---
v2: Older compilers (e.g. gcc 4.4.7) don't support referencing members
of unnamed union members in the container struct initialization, so
move from:
struct perf_event_attr attr = {
...
.sample_period = 1,
};
to right after it as:
struct perf_event_attr attr = {
...
};
attr.sample_period = 1;
v3: We need to reset .sample_period to 0 to let the users of
perf_evsel__new_cycles() to properly setup attr.sample_period or
attr.sample_freq. Reported by Ingo Molnar.
Reported-and-Acked-by: Thomas-Mich Richter <tmricht@linux.vnet.ibm.com>
Acked-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 18e7a45af91a ("perf/x86: Reject non sampling events with precise_ip")
Link: http://lkml.kernel.org/n/tip-yv6nnkl7tzqocrm0hl3x7vf1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-06-09 16:54:28 -03:00
/*
* Unnamed union member , not supported as struct member named
* initializer in older compilers such as gcc 4.4 .7
*
* Just for probing the precise_ip :
*/
attr . sample_period = 1 ;
2016-07-28 18:33:20 -03:00
perf_event_attr__set_max_precise_ip ( & attr ) ;
perf evsel: Fix probing of precise_ip level for default cycles event
Since commit 18e7a45af91a ("perf/x86: Reject non sampling events with
precise_ip") returns -EINVAL for sys_perf_event_open() with an attribute
with (attr.precise_ip > 0 && attr.sample_period == 0), just like is done
in the routine used to probe the max precise level when no events were
passed to 'perf record' or 'perf top', i.e.:
perf_evsel__new_cycles()
perf_event_attr__set_max_precise_ip()
The x86 code, in x86_pmu_hw_config(), which is called all the way from
sys_perf_event_open() did, starting with the aforementioned commit:
/* There's no sense in having PEBS for non sampling events: */
if (!is_sampling_event(event))
return -EINVAL;
Which makes it fail for cycles:ppp, cycles:pp and cycles:p, always using
just the non precise cycles variant.
To make sure that this is the case, I tested it, before this patch,
with:
# perf probe -L x86_pmu_hw_config
<x86_pmu_hw_config@/home/acme/git/linux/arch/x86/events/core.c:0>
0 int x86_pmu_hw_config(struct perf_event *event)
1 {
2 if (event->attr.precise_ip) {
<SNIP>
17 if (event->attr.precise_ip > precise)
18 return -EOPNOTSUPP;
/* There's no sense in having PEBS for non sampling events: */
21 if (!is_sampling_event(event))
22 return -EINVAL;
}
<SNIP>
# perf probe x86_pmu_hw_config:22
Added new events:
probe:x86_pmu_hw_config (on x86_pmu_hw_config:22)
probe:x86_pmu_hw_config_1 (on x86_pmu_hw_config:22)
You can now use it in all perf tools, such as:
perf record -e probe:x86_pmu_hw_config_1 -aR sleep 1
# perf trace -e perf_event_open,probe:x86_pmu_hwconfig*/max-stack=16/ perf record usleep 1
0.000 ( 0.015 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, cpu: -1, group_fd: -1 ) ...
0.015 ( ): probe:x86_pmu_hw_config:(ffffffff9c0065e1))
x86_pmu_hw_config ([kernel.kallsyms])
hsw_hw_config ([kernel.kallsyms])
x86_pmu_event_init ([kernel.kallsyms])
perf_try_init_event ([kernel.kallsyms])
perf_event_alloc ([kernel.kallsyms])
SYSC_perf_event_open ([kernel.kallsyms])
sys_perf_event_open ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
return_from_SYSCALL_64 ([kernel.kallsyms])
syscall (/usr/lib64/libc-2.24.so)
perf_event_attr__set_max_precise_ip (/home/acme/bin/perf)
perf_evsel__new_cycles (/home/acme/bin/perf)
perf_evlist__add_default (/home/acme/bin/perf)
cmd_record (/home/acme/bin/perf)
run_builtin (/home/acme/bin/perf)
handle_internal_command (/home/acme/bin/perf)
0.000 ( 0.021 ms): perf/4150 ... [continued]: perf_event_open()) = -1 EINVAL Invalid argument
0.023 ( 0.002 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, cpu: -1, group_fd: -1 ) ...
0.025 ( ): probe:x86_pmu_hw_config:(ffffffff9c0065e1))
x86_pmu_hw_config ([kernel.kallsyms])
hsw_hw_config ([kernel.kallsyms])
x86_pmu_event_init ([kernel.kallsyms])
perf_try_init_event ([kernel.kallsyms])
perf_event_alloc ([kernel.kallsyms])
SYSC_perf_event_open ([kernel.kallsyms])
sys_perf_event_open ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
return_from_SYSCALL_64 ([kernel.kallsyms])
syscall (/usr/lib64/libc-2.24.so)
perf_event_attr__set_max_precise_ip (/home/acme/bin/perf)
perf_evsel__new_cycles (/home/acme/bin/perf)
perf_evlist__add_default (/home/acme/bin/perf)
cmd_record (/home/acme/bin/perf)
run_builtin (/home/acme/bin/perf)
handle_internal_command (/home/acme/bin/perf)
0.023 ( 0.004 ms): perf/4150 ... [continued]: perf_event_open()) = -1 EINVAL Invalid argument
0.028 ( 0.002 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8ba110, cpu: -1, group_fd: -1 ) ...
0.030 ( ): probe:x86_pmu_hw_config:(ffffffff9c0065e1))
x86_pmu_hw_config ([kernel.kallsyms])
hsw_hw_config ([kernel.kallsyms])
x86_pmu_event_init ([kernel.kallsyms])
perf_try_init_event ([kernel.kallsyms])
perf_event_alloc ([kernel.kallsyms])
SYSC_perf_event_open ([kernel.kallsyms])
sys_perf_event_open ([kernel.kallsyms])
do_syscall_64 ([kernel.kallsyms])
return_from_SYSCALL_64 ([kernel.kallsyms])
syscall (/usr/lib64/libc-2.24.so)
perf_event_attr__set_max_precise_ip (/home/acme/bin/perf)
perf_evsel__new_cycles (/home/acme/bin/perf)
perf_evlist__add_default (/home/acme/bin/perf)
cmd_record (/home/acme/bin/perf)
run_builtin (/home/acme/bin/perf)
handle_internal_command (/home/acme/bin/perf)
0.028 ( 0.004 ms): perf/4150 ... [continued]: perf_event_open()) = -1 EINVAL Invalid argument
41.018 ( 0.012 ms): perf/4150 perf_event_open(attr_uptr: 0x7ffebc8b5dd0, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
41.065 ( 0.011 ms): perf/4150 perf_event_open(attr_uptr: 0x3c7db78, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
41.080 ( 0.006 ms): perf/4150 perf_event_open(attr_uptr: 0x3c7db78, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
41.103 ( 0.010 ms): perf/4150 perf_event_open(attr_uptr: 0x3c4e748, pid: 4151 (perf), group_fd: -1, flags: FD_CLOEXEC) = 4
41.115 ( 0.006 ms): perf/4150 perf_event_open(attr_uptr: 0x3c4e748, pid: 4151 (perf), cpu: 1, group_fd: -1, flags: FD_CLOEXEC) = 5
41.122 ( 0.004 ms): perf/4150 perf_event_open(attr_uptr: 0x3c4e748, pid: 4151 (perf), cpu: 2, group_fd: -1, flags: FD_CLOEXEC) = 6
41.128 ( 0.008 ms): perf/4150 perf_event_open(attr_uptr: 0x3c4e748, pid: 4151 (perf), cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 8
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.017 MB perf.data (2 samples) ]
#
I.e. that return -EINVAL in x86_pmu_hw_config() is hit three times.
So fix it by just setting attr.sample_period
Now, after this patch:
# perf trace --max-stack=2 -e perf_event_open,probe:x86_pmu_hw_config* perf record usleep 1
[ perf record: Woken up 1 times to write data ]
0.000 ( 0.017 ms): perf/8469 perf_event_open(attr_uptr: 0x7ffe36c27d10, pid: -1, cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 4
syscall (/usr/lib64/libc-2.24.so)
perf_event_open_cloexec_flag (/home/acme/bin/perf)
0.050 ( 0.031 ms): perf/8469 perf_event_open(attr_uptr: 0x24ebb78, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
syscall (/usr/lib64/libc-2.24.so)
perf_evlist__config (/home/acme/bin/perf)
0.092 ( 0.040 ms): perf/8469 perf_event_open(attr_uptr: 0x24ebb78, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
syscall (/usr/lib64/libc-2.24.so)
perf_evlist__config (/home/acme/bin/perf)
0.143 ( 0.007 ms): perf/8469 perf_event_open(attr_uptr: 0x24bc748, cpu: -1, group_fd: -1 ) = 4
syscall (/usr/lib64/libc-2.24.so)
perf_event_attr__set_max_precise_ip (/home/acme/bin/perf)
0.161 ( 0.007 ms): perf/8469 perf_event_open(attr_uptr: 0x24bc748, pid: 8470 (perf), group_fd: -1, flags: FD_CLOEXEC) = 4
syscall (/usr/lib64/libc-2.24.so)
perf_evsel__open (/home/acme/bin/perf)
0.171 ( 0.005 ms): perf/8469 perf_event_open(attr_uptr: 0x24bc748, pid: 8470 (perf), cpu: 1, group_fd: -1, flags: FD_CLOEXEC) = 5
syscall (/usr/lib64/libc-2.24.so)
perf_evsel__open (/home/acme/bin/perf)
0.180 ( 0.007 ms): perf/8469 perf_event_open(attr_uptr: 0x24bc748, pid: 8470 (perf), cpu: 2, group_fd: -1, flags: FD_CLOEXEC) = 6
syscall (/usr/lib64/libc-2.24.so)
perf_evsel__open (/home/acme/bin/perf)
0.190 ( 0.005 ms): perf/8469 perf_event_open(attr_uptr: 0x24bc748, pid: 8470 (perf), cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 8
syscall (/usr/lib64/libc-2.24.so)
perf_evsel__open (/home/acme/bin/perf)
[ perf record: Captured and wrote 0.017 MB perf.data (7 samples) ]
#
The probe one called from perf_event_attr__set_max_precise_ip() works
the first time, with attr.precise_ip = 3, wit hthe next ones being the
per cpu ones for the cycles:ppp event.
And here is the text from a report and alternative proposed patch by
Thomas-Mich Richter:
---
On s390 the counter and sampling facility do not support a precise IP
skid level and sometimes returns EOPNOTSUPP when structure member
precise_ip in struct perf_event_attr is not set to zero.
On s390 commnd 'perf record -- true' fails with error EOPNOTSUPP. This
happens only when no events are specified on command line.
The functions called are
...
--> perf_evlist__add_default
--> perf_evsel__new_cycles
--> perf_event_attr__set_max_precise_ip
The last function determines the value of structure member precise_ip by
invoking the perf_event_open() system call and checking the return code.
The first successful open is the value for precise_ip.
However the value is determined without setting member sample_period and
indicates no sampling.
On s390 the counter facility and sampling facility are different. The
above procedure determines a precise_ip value of 3 using the counter
facility. Later it uses the sampling facility with a value of 3 and
fails with EOPNOTSUPP.
---
v2: Older compilers (e.g. gcc 4.4.7) don't support referencing members
of unnamed union members in the container struct initialization, so
move from:
struct perf_event_attr attr = {
...
.sample_period = 1,
};
to right after it as:
struct perf_event_attr attr = {
...
};
attr.sample_period = 1;
v3: We need to reset .sample_period to 0 to let the users of
perf_evsel__new_cycles() to properly setup attr.sample_period or
attr.sample_freq. Reported by Ingo Molnar.
Reported-and-Acked-by: Thomas-Mich Richter <tmricht@linux.vnet.ibm.com>
Acked-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 18e7a45af91a ("perf/x86: Reject non sampling events with precise_ip")
Link: http://lkml.kernel.org/n/tip-yv6nnkl7tzqocrm0hl3x7vf1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-06-09 16:54:28 -03:00
/*
* Now let the usual logic to set up the perf_event_attr defaults
* to kick in when we return and before perf_evsel__open ( ) is called .
*/
attr . sample_period = 0 ;
2017-07-03 13:05:43 -03:00
new_event :
2016-07-28 18:33:20 -03:00
evsel = perf_evsel__new ( & attr ) ;
if ( evsel = = NULL )
goto out ;
/* use asprintf() because free(evsel) assumes name is allocated */
2017-07-10 16:19:25 -03:00
if ( asprintf ( & evsel - > name , " cycles%s%s%.*s " ,
( attr . precise_ip | | attr . exclude_kernel ) ? " : " : " " ,
attr . exclude_kernel ? " u " : " " ,
attr . precise_ip ? attr . precise_ip + 1 : 0 , " ppp " ) < 0 )
2016-07-28 18:33:20 -03:00
goto error_free ;
out :
return evsel ;
error_free :
perf_evsel__delete ( evsel ) ;
evsel = NULL ;
goto out ;
}
2015-09-07 10:38:06 +02:00
/*
* Returns pointer with encoded error via < linux / err . h > interface .
*/
2013-11-07 16:41:19 -03:00
struct perf_evsel * perf_evsel__newtp_idx ( const char * sys , const char * name , int idx )
2012-09-18 11:21:50 -03:00
{
2014-10-09 15:29:51 -03:00
struct perf_evsel * evsel = zalloc ( perf_evsel__object . size ) ;
2015-09-07 10:38:06 +02:00
int err = - ENOMEM ;
2012-09-18 11:21:50 -03:00
2015-09-07 10:38:06 +02:00
if ( evsel = = NULL ) {
goto out_err ;
} else {
2012-09-18 11:21:50 -03:00
struct perf_event_attr attr = {
2012-09-26 12:28:26 -03:00
. type = PERF_TYPE_TRACEPOINT ,
. sample_type = ( PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD ) ,
2012-09-18 11:21:50 -03:00
} ;
2012-09-26 17:11:38 -03:00
if ( asprintf ( & evsel - > name , " %s:%s " , sys , name ) < 0 )
goto out_free ;
2013-12-03 14:09:24 +01:00
evsel - > tp_format = trace_event__tp_format ( sys , name ) ;
2015-09-07 10:38:06 +02:00
if ( IS_ERR ( evsel - > tp_format ) ) {
err = PTR_ERR ( evsel - > tp_format ) ;
2012-09-18 11:21:50 -03:00
goto out_free ;
2015-09-07 10:38:06 +02:00
}
2012-09-18 11:21:50 -03:00
2012-09-26 12:28:26 -03:00
event_attr_init ( & attr ) ;
2012-09-18 11:21:50 -03:00
attr . config = evsel - > tp_format - > id ;
2012-09-26 12:28:26 -03:00
attr . sample_period = 1 ;
2012-09-18 11:21:50 -03:00
perf_evsel__init ( evsel , & attr , idx ) ;
}
return evsel ;
out_free :
2013-12-27 16:55:14 -03:00
zfree ( & evsel - > name ) ;
2012-09-18 11:21:50 -03:00
free ( evsel ) ;
2015-09-07 10:38:06 +02:00
out_err :
return ERR_PTR ( err ) ;
2012-09-18 11:21:50 -03:00
}
2012-09-06 13:11:18 -03:00
const char * perf_evsel__hw_names [ PERF_COUNT_HW_MAX ] = {
2012-05-25 16:38:11 -03:00
" cycles " ,
" instructions " ,
" cache-references " ,
" cache-misses " ,
" branches " ,
" branch-misses " ,
" bus-cycles " ,
" stalled-cycles-frontend " ,
" stalled-cycles-backend " ,
" ref-cycles " ,
} ;
2012-06-13 15:52:42 -03:00
static const char * __perf_evsel__hw_name ( u64 config )
2012-05-25 16:38:11 -03:00
{
if ( config < PERF_COUNT_HW_MAX & & perf_evsel__hw_names [ config ] )
return perf_evsel__hw_names [ config ] ;
return " unknown-hardware " ;
}
2012-06-11 13:33:09 -03:00
static int perf_evsel__add_modifiers ( struct perf_evsel * evsel , char * bf , size_t size )
2012-05-25 16:38:11 -03:00
{
2012-06-11 13:33:09 -03:00
int colon = 0 , r = 0 ;
2012-05-25 16:38:11 -03:00
struct perf_event_attr * attr = & evsel - > attr ;
bool exclude_guest_default = false ;
# define MOD_PRINT(context, mod) do { \
if ( ! attr - > exclude_ # # context ) { \
2012-06-11 13:33:09 -03:00
if ( ! colon ) colon = + + r ; \
2012-05-25 16:38:11 -03:00
r + = scnprintf ( bf + r , size - r , " %c " , mod ) ; \
} } while ( 0 )
if ( attr - > exclude_kernel | | attr - > exclude_user | | attr - > exclude_hv ) {
MOD_PRINT ( kernel , ' k ' ) ;
MOD_PRINT ( user , ' u ' ) ;
MOD_PRINT ( hv , ' h ' ) ;
exclude_guest_default = true ;
}
if ( attr - > precise_ip ) {
if ( ! colon )
2012-06-11 13:33:09 -03:00
colon = + + r ;
2012-05-25 16:38:11 -03:00
r + = scnprintf ( bf + r , size - r , " %.*s " , attr - > precise_ip , " ppp " ) ;
exclude_guest_default = true ;
}
if ( attr - > exclude_host | | attr - > exclude_guest = = exclude_guest_default ) {
MOD_PRINT ( host , ' H ' ) ;
MOD_PRINT ( guest , ' G ' ) ;
}
# undef MOD_PRINT
if ( colon )
2012-06-11 13:33:09 -03:00
bf [ colon - 1 ] = ' : ' ;
2012-05-25 16:38:11 -03:00
return r ;
}
2012-06-11 13:33:09 -03:00
static int perf_evsel__hw_name ( struct perf_evsel * evsel , char * bf , size_t size )
{
int r = scnprintf ( bf , size , " %s " , __perf_evsel__hw_name ( evsel - > attr . config ) ) ;
return r + perf_evsel__add_modifiers ( evsel , bf + r , size - r ) ;
}
2012-09-06 13:11:18 -03:00
const char * perf_evsel__sw_names [ PERF_COUNT_SW_MAX ] = {
2012-06-11 14:36:20 -03:00
" cpu-clock " ,
" task-clock " ,
" page-faults " ,
" context-switches " ,
2012-09-06 13:11:18 -03:00
" cpu-migrations " ,
2012-06-11 14:36:20 -03:00
" minor-faults " ,
" major-faults " ,
" alignment-faults " ,
" emulation-faults " ,
2013-08-31 21:50:52 +03:00
" dummy " ,
2012-06-11 14:36:20 -03:00
} ;
2012-06-13 15:52:42 -03:00
static const char * __perf_evsel__sw_name ( u64 config )
2012-06-11 14:36:20 -03:00
{
if ( config < PERF_COUNT_SW_MAX & & perf_evsel__sw_names [ config ] )
return perf_evsel__sw_names [ config ] ;
return " unknown-software " ;
}
static int perf_evsel__sw_name ( struct perf_evsel * evsel , char * bf , size_t size )
{
int r = scnprintf ( bf , size , " %s " , __perf_evsel__sw_name ( evsel - > attr . config ) ) ;
return r + perf_evsel__add_modifiers ( evsel , bf + r , size - r ) ;
}
2012-06-28 23:18:49 +02:00
static int __perf_evsel__bp_name ( char * bf , size_t size , u64 addr , u64 type )
{
int r ;
r = scnprintf ( bf , size , " mem:0x% " PRIx64 " : " , addr ) ;
if ( type & HW_BREAKPOINT_R )
r + = scnprintf ( bf + r , size - r , " r " ) ;
if ( type & HW_BREAKPOINT_W )
r + = scnprintf ( bf + r , size - r , " w " ) ;
if ( type & HW_BREAKPOINT_X )
r + = scnprintf ( bf + r , size - r , " x " ) ;
return r ;
}
static int perf_evsel__bp_name ( struct perf_evsel * evsel , char * bf , size_t size )
{
struct perf_event_attr * attr = & evsel - > attr ;
int r = __perf_evsel__bp_name ( bf , size , attr - > bp_addr , attr - > bp_type ) ;
return r + perf_evsel__add_modifiers ( evsel , bf + r , size - r ) ;
}
2012-06-11 14:08:07 -03:00
const char * perf_evsel__hw_cache [ PERF_COUNT_HW_CACHE_MAX ]
[ PERF_EVSEL__MAX_ALIASES ] = {
{ " L1-dcache " , " l1-d " , " l1d " , " L1-data " , } ,
{ " L1-icache " , " l1-i " , " l1i " , " L1-instruction " , } ,
{ " LLC " , " L2 " , } ,
{ " dTLB " , " d-tlb " , " Data-TLB " , } ,
{ " iTLB " , " i-tlb " , " Instruction-TLB " , } ,
{ " branch " , " branches " , " bpu " , " btb " , " bpc " , } ,
{ " node " , } ,
} ;
const char * perf_evsel__hw_cache_op [ PERF_COUNT_HW_CACHE_OP_MAX ]
[ PERF_EVSEL__MAX_ALIASES ] = {
{ " load " , " loads " , " read " , } ,
{ " store " , " stores " , " write " , } ,
{ " prefetch " , " prefetches " , " speculative-read " , " speculative-load " , } ,
} ;
const char * perf_evsel__hw_cache_result [ PERF_COUNT_HW_CACHE_RESULT_MAX ]
[ PERF_EVSEL__MAX_ALIASES ] = {
{ " refs " , " Reference " , " ops " , " access " , } ,
{ " misses " , " miss " , } ,
} ;
# define C(x) PERF_COUNT_HW_CACHE_##x
# define CACHE_READ (1 << C(OP_READ))
# define CACHE_WRITE (1 << C(OP_WRITE))
# define CACHE_PREFETCH (1 << C(OP_PREFETCH))
# define COP(x) (1 << x)
/*
* cache operartion stat
* L1I : Read and prefetch only
* ITLB and BPU : Read - only
*/
static unsigned long perf_evsel__hw_cache_stat [ C ( MAX ) ] = {
[ C ( L1D ) ] = ( CACHE_READ | CACHE_WRITE | CACHE_PREFETCH ) ,
[ C ( L1I ) ] = ( CACHE_READ | CACHE_PREFETCH ) ,
[ C ( LL ) ] = ( CACHE_READ | CACHE_WRITE | CACHE_PREFETCH ) ,
[ C ( DTLB ) ] = ( CACHE_READ | CACHE_WRITE | CACHE_PREFETCH ) ,
[ C ( ITLB ) ] = ( CACHE_READ ) ,
[ C ( BPU ) ] = ( CACHE_READ ) ,
[ C ( NODE ) ] = ( CACHE_READ | CACHE_WRITE | CACHE_PREFETCH ) ,
} ;
bool perf_evsel__is_cache_op_valid ( u8 type , u8 op )
{
if ( perf_evsel__hw_cache_stat [ type ] & COP ( op ) )
return true ; /* valid */
else
return false ; /* invalid */
}
int __perf_evsel__hw_cache_type_op_res_name ( u8 type , u8 op , u8 result ,
char * bf , size_t size )
{
if ( result ) {
return scnprintf ( bf , size , " %s-%s-%s " , perf_evsel__hw_cache [ type ] [ 0 ] ,
perf_evsel__hw_cache_op [ op ] [ 0 ] ,
perf_evsel__hw_cache_result [ result ] [ 0 ] ) ;
}
return scnprintf ( bf , size , " %s-%s " , perf_evsel__hw_cache [ type ] [ 0 ] ,
perf_evsel__hw_cache_op [ op ] [ 1 ] ) ;
}
2012-06-13 15:52:42 -03:00
static int __perf_evsel__hw_cache_name ( u64 config , char * bf , size_t size )
2012-06-11 14:08:07 -03:00
{
u8 op , result , type = ( config > > 0 ) & 0xff ;
const char * err = " unknown-ext-hardware-cache-type " ;
2016-08-18 16:30:28 -03:00
if ( type > = PERF_COUNT_HW_CACHE_MAX )
2012-06-11 14:08:07 -03:00
goto out_err ;
op = ( config > > 8 ) & 0xff ;
err = " unknown-ext-hardware-cache-op " ;
2016-08-18 16:30:28 -03:00
if ( op > = PERF_COUNT_HW_CACHE_OP_MAX )
2012-06-11 14:08:07 -03:00
goto out_err ;
result = ( config > > 16 ) & 0xff ;
err = " unknown-ext-hardware-cache-result " ;
2016-08-18 16:30:28 -03:00
if ( result > = PERF_COUNT_HW_CACHE_RESULT_MAX )
2012-06-11 14:08:07 -03:00
goto out_err ;
err = " invalid-cache " ;
if ( ! perf_evsel__is_cache_op_valid ( type , op ) )
goto out_err ;
return __perf_evsel__hw_cache_type_op_res_name ( type , op , result , bf , size ) ;
out_err :
return scnprintf ( bf , size , " %s " , err ) ;
}
static int perf_evsel__hw_cache_name ( struct perf_evsel * evsel , char * bf , size_t size )
{
int ret = __perf_evsel__hw_cache_name ( evsel - > attr . config , bf , size ) ;
return ret + perf_evsel__add_modifiers ( evsel , bf + ret , size - ret ) ;
}
2012-06-13 11:53:37 -03:00
static int perf_evsel__raw_name ( struct perf_evsel * evsel , char * bf , size_t size )
{
int ret = scnprintf ( bf , size , " raw 0x% " PRIx64 , evsel - > attr . config ) ;
return ret + perf_evsel__add_modifiers ( evsel , bf + ret , size - ret ) ;
}
2012-06-12 12:34:58 -03:00
const char * perf_evsel__name ( struct perf_evsel * evsel )
2012-06-12 10:29:12 -03:00
{
2012-06-12 12:34:58 -03:00
char bf [ 128 ] ;
2012-06-12 10:29:12 -03:00
2012-06-12 12:34:58 -03:00
if ( evsel - > name )
return evsel - > name ;
2012-05-25 16:38:11 -03:00
switch ( evsel - > attr . type ) {
case PERF_TYPE_RAW :
2012-06-13 11:53:37 -03:00
perf_evsel__raw_name ( evsel , bf , sizeof ( bf ) ) ;
2012-05-25 16:38:11 -03:00
break ;
case PERF_TYPE_HARDWARE :
2012-06-12 12:34:58 -03:00
perf_evsel__hw_name ( evsel , bf , sizeof ( bf ) ) ;
2012-05-25 16:38:11 -03:00
break ;
2012-06-11 14:08:07 -03:00
case PERF_TYPE_HW_CACHE :
2012-06-12 12:34:58 -03:00
perf_evsel__hw_cache_name ( evsel , bf , sizeof ( bf ) ) ;
2012-06-11 14:08:07 -03:00
break ;
2012-06-11 14:36:20 -03:00
case PERF_TYPE_SOFTWARE :
2012-06-12 12:34:58 -03:00
perf_evsel__sw_name ( evsel , bf , sizeof ( bf ) ) ;
2012-06-11 14:36:20 -03:00
break ;
2012-06-12 10:29:12 -03:00
case PERF_TYPE_TRACEPOINT :
2012-06-12 12:34:58 -03:00
scnprintf ( bf , sizeof ( bf ) , " %s " , " unknown tracepoint " ) ;
2012-06-12 10:29:12 -03:00
break ;
2012-06-28 23:18:49 +02:00
case PERF_TYPE_BREAKPOINT :
perf_evsel__bp_name ( evsel , bf , sizeof ( bf ) ) ;
break ;
2012-05-25 16:38:11 -03:00
default :
2012-08-16 21:10:18 +02:00
scnprintf ( bf , sizeof ( bf ) , " unknown attr type: %d " ,
evsel - > attr . type ) ;
2012-06-12 10:29:12 -03:00
break ;
2012-05-25 16:38:11 -03:00
}
2012-06-12 12:34:58 -03:00
evsel - > name = strdup ( bf ) ;
return evsel - > name ? : " unknown " ;
2012-05-25 16:38:11 -03:00
}
2013-01-22 18:09:44 +09:00
const char * perf_evsel__group_name ( struct perf_evsel * evsel )
{
return evsel - > group_name ? : " anon group " ;
}
int perf_evsel__group_desc ( struct perf_evsel * evsel , char * buf , size_t size )
{
int ret ;
struct perf_evsel * pos ;
const char * group_name = perf_evsel__group_name ( evsel ) ;
ret = scnprintf ( buf , size , " %s " , group_name ) ;
ret + = scnprintf ( buf + ret , size - ret , " { %s " ,
perf_evsel__name ( evsel ) ) ;
for_each_group_member ( pos , evsel )
ret + = scnprintf ( buf + ret , size - ret , " , %s " ,
perf_evsel__name ( pos ) ) ;
ret + = scnprintf ( buf + ret , size - ret , " } " ) ;
return ret ;
}
2018-01-12 16:21:04 -03:00
static void __perf_evsel__config_callchain ( struct perf_evsel * evsel ,
struct record_opts * opts ,
struct callchain_param * param )
2014-03-02 16:56:40 +01:00
{
bool function = perf_evsel__is_function_event ( evsel ) ;
struct perf_event_attr * attr = & evsel - > attr ;
perf_evsel__set_sample_bit ( evsel , CALLCHAIN ) ;
perf tools: Per event max-stack settings
The tooling counterpart, now it is possible to do:
# perf record -e sched:sched_switch/max-stack=10/ -e cycles/call-graph=dwarf,max-stack=4/ -e cpu-cycles/call-graph=dwarf,max-stack=1024/ usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.052 MB perf.data (5 samples) ]
# perf evlist -v
sched:sched_switch: type: 2, size: 112, config: 0x110, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, sample_max_stack: 10
cycles/call-graph=dwarf,max-stack=4/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 4
cpu-cycles/call-graph=dwarf,max-stack=1024/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 1024
# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events
Using just /max-stack=N/ means /call-graph=fp,max-stack=N/, that should
be further configurable by means of some .perfconfig knob.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-28 19:03:42 -03:00
attr - > sample_max_stack = param - > max_stack ;
2015-08-04 04:30:20 -04:00
if ( param - > record_mode = = CALLCHAIN_LBR ) {
2015-01-05 13:23:04 -05:00
if ( ! opts - > branch_stack ) {
if ( attr - > exclude_user ) {
pr_warning ( " LBR callstack option is only available "
" to get user callchain information. "
" Falling back to framepointers. \n " ) ;
} else {
perf_evsel__set_sample_bit ( evsel , BRANCH_STACK ) ;
attr - > branch_sample_type = PERF_SAMPLE_BRANCH_USER |
2015-12-11 16:12:24 -08:00
PERF_SAMPLE_BRANCH_CALL_STACK |
PERF_SAMPLE_BRANCH_NO_CYCLES |
PERF_SAMPLE_BRANCH_NO_FLAGS ;
2015-01-05 13:23:04 -05:00
}
} else
pr_warning ( " Cannot use LBR callstack with branch stack. "
" Falling back to framepointers. \n " ) ;
}
2015-08-04 04:30:20 -04:00
if ( param - > record_mode = = CALLCHAIN_DWARF ) {
2014-03-02 16:56:40 +01:00
if ( ! function ) {
perf_evsel__set_sample_bit ( evsel , REGS_USER ) ;
perf_evsel__set_sample_bit ( evsel , STACK_USER ) ;
2017-09-05 10:00:28 -07:00
attr - > sample_regs_user | = PERF_REGS_MASK ;
2015-08-04 04:30:20 -04:00
attr - > sample_stack_user = param - > dump_size ;
2014-03-02 16:56:40 +01:00
attr - > exclude_callchain_user = 1 ;
} else {
pr_info ( " Cannot use DWARF unwind for function trace event, "
" falling back to framepointers. \n " ) ;
}
}
if ( function ) {
pr_info ( " Disabling user space callchains for function trace event. \n " ) ;
attr - > exclude_callchain_user = 1 ;
}
}
2018-01-12 16:21:04 -03:00
void perf_evsel__config_callchain ( struct perf_evsel * evsel ,
struct record_opts * opts ,
struct callchain_param * param )
{
if ( param - > enabled )
return __perf_evsel__config_callchain ( evsel , opts , param ) ;
}
perf callchain: Per-event type selection support
This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.
Here is an example.
perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1
perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1
Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-08-11 06:30:47 -04:00
static void
perf_evsel__reset_callgraph ( struct perf_evsel * evsel ,
struct callchain_param * param )
{
struct perf_event_attr * attr = & evsel - > attr ;
perf_evsel__reset_sample_bit ( evsel , CALLCHAIN ) ;
if ( param - > record_mode = = CALLCHAIN_LBR ) {
perf_evsel__reset_sample_bit ( evsel , BRANCH_STACK ) ;
attr - > branch_sample_type & = ~ ( PERF_SAMPLE_BRANCH_USER |
PERF_SAMPLE_BRANCH_CALL_STACK ) ;
}
if ( param - > record_mode = = CALLCHAIN_DWARF ) {
perf_evsel__reset_sample_bit ( evsel , REGS_USER ) ;
perf_evsel__reset_sample_bit ( evsel , STACK_USER ) ;
}
}
static void apply_config_terms ( struct perf_evsel * evsel ,
2018-01-16 11:16:25 -03:00
struct record_opts * opts , bool track )
2015-07-29 05:42:10 -04:00
{
struct perf_evsel_config_term * term ;
2015-08-04 04:30:19 -04:00
struct list_head * config_terms = & evsel - > config_terms ;
struct perf_event_attr * attr = & evsel - > attr ;
perf callchain: Fix attr.sample_max_stack setting
When setting the "dwarf" unwinder for a specific event and not
specifying the max-stack, the attr.sample_max_stack ended up using an
uninitialized callchain_param.max_stack, fix it by using designated
initializers for that callchain_param variable, zeroing all non
explicitely initialized struct members.
Here is what happened:
# perf trace -vv --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
callchain: type DWARF
callchain: stack dump size 8192
perf_event_attr:
type 2
size 112
config 0x730
{ sample_period, sample_freq } 1
sample_type IP|TID|TIME|ADDR|CALLCHAIN|CPU|PERIOD|RAW|REGS_USER|STACK_USER|DATA_SRC
exclude_callchain_user 1
{ wakeup_events, wakeup_watermark } 1
sample_regs_user 0xff0fff
sample_stack_user 8192
sample_max_stack 50656
sys_perf_event_open failed, error -75
Value too large for defined data type
# perf trace -vv --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
callchain: type DWARF
callchain: stack dump size 8192
perf_event_attr:
type 2
size 112
config 0x730
sample_type IP|TID|TIME|ADDR|CALLCHAIN|CPU|PERIOD|RAW|REGS_USER|STACK_USER|DATA_SRC
exclude_callchain_user 1
sample_regs_user 0xff0fff
sample_stack_user 8192
sample_max_stack 30448
sys_perf_event_open failed, error -75
Value too large for defined data type
#
Now the attr.sample_max_stack is set to zero and the above works as
expected:
# perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
PING ::1(::1) 56 data bytes
64 bytes from ::1: icmp_seq=1 ttl=64 time=0.072 ms
--- ::1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.072/0.072/0.072/0.000 ms
0.000 probe_libc:inet_pton:(7feb7a998350))
__inet_pton (inlined)
gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
__GI_getaddrinfo (inlined)
[0xffffaa39b6108f3f] (/usr/bin/ping)
#
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hendrick Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-is9tramondqa9jlxxsgcm9iz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2018-01-15 11:07:58 -03:00
/* callgraph default */
struct callchain_param param = {
. record_mode = callchain_param . record_mode ,
} ;
perf callchain: Per-event type selection support
This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.
Here is an example.
perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1
perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1
Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-08-11 06:30:47 -04:00
u32 dump_size = 0 ;
perf tools: Per event max-stack settings
The tooling counterpart, now it is possible to do:
# perf record -e sched:sched_switch/max-stack=10/ -e cycles/call-graph=dwarf,max-stack=4/ -e cpu-cycles/call-graph=dwarf,max-stack=1024/ usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.052 MB perf.data (5 samples) ]
# perf evlist -v
sched:sched_switch: type: 2, size: 112, config: 0x110, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, sample_max_stack: 10
cycles/call-graph=dwarf,max-stack=4/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 4
cpu-cycles/call-graph=dwarf,max-stack=1024/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 1024
# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events
Using just /max-stack=N/ means /call-graph=fp,max-stack=N/, that should
be further configurable by means of some .perfconfig knob.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-28 19:03:42 -03:00
int max_stack = 0 ;
const char * callgraph_buf = NULL ;
perf callchain: Per-event type selection support
This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.
Here is an example.
perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1
perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1
Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-08-11 06:30:47 -04:00
2015-07-29 05:42:10 -04:00
list_for_each_entry ( term , config_terms , list ) {
switch ( term - > type ) {
2015-07-29 05:42:11 -04:00
case PERF_EVSEL__CONFIG_TERM_PERIOD :
2017-10-20 13:27:55 -07:00
if ( ! ( term - > weak & & opts - > user_interval ! = ULLONG_MAX ) ) {
attr - > sample_period = term - > val . period ;
attr - > freq = 0 ;
}
2015-08-04 04:30:19 -04:00
break ;
2015-08-09 15:45:23 +09:00
case PERF_EVSEL__CONFIG_TERM_FREQ :
2017-10-20 13:27:55 -07:00
if ( ! ( term - > weak & & opts - > user_freq ! = UINT_MAX ) ) {
attr - > sample_freq = term - > val . freq ;
attr - > freq = 1 ;
}
2015-08-09 15:45:23 +09:00
break ;
2015-08-04 04:30:19 -04:00
case PERF_EVSEL__CONFIG_TERM_TIME :
if ( term - > val . time )
perf_evsel__set_sample_bit ( evsel , TIME ) ;
else
perf_evsel__reset_sample_bit ( evsel , TIME ) ;
break ;
perf callchain: Per-event type selection support
This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.
Here is an example.
perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1
perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1
Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-08-11 06:30:47 -04:00
case PERF_EVSEL__CONFIG_TERM_CALLGRAPH :
callgraph_buf = term - > val . callgraph ;
break ;
2016-10-12 14:02:06 -07:00
case PERF_EVSEL__CONFIG_TERM_BRANCH :
if ( term - > val . branch & & strcmp ( term - > val . branch , " no " ) ) {
perf_evsel__set_sample_bit ( evsel , BRANCH_STACK ) ;
parse_branch_str ( term - > val . branch ,
& attr - > branch_sample_type ) ;
} else
perf_evsel__reset_sample_bit ( evsel , BRANCH_STACK ) ;
break ;
perf callchain: Per-event type selection support
This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.
Here is an example.
perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1
perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1
Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-08-11 06:30:47 -04:00
case PERF_EVSEL__CONFIG_TERM_STACK_USER :
dump_size = term - > val . stack_user ;
break ;
perf tools: Per event max-stack settings
The tooling counterpart, now it is possible to do:
# perf record -e sched:sched_switch/max-stack=10/ -e cycles/call-graph=dwarf,max-stack=4/ -e cpu-cycles/call-graph=dwarf,max-stack=1024/ usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.052 MB perf.data (5 samples) ]
# perf evlist -v
sched:sched_switch: type: 2, size: 112, config: 0x110, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, sample_max_stack: 10
cycles/call-graph=dwarf,max-stack=4/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 4
cpu-cycles/call-graph=dwarf,max-stack=1024/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 1024
# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events
Using just /max-stack=N/ means /call-graph=fp,max-stack=N/, that should
be further configurable by means of some .perfconfig knob.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-28 19:03:42 -03:00
case PERF_EVSEL__CONFIG_TERM_MAX_STACK :
max_stack = term - > val . max_stack ;
break ;
perf tools: Enable pre-event inherit setting by config terms
This patch allows perf record setting event's attr.inherit bit by
config terms like:
# perf record -e cycles/no-inherit/ ...
# perf record -e cycles/inherit/ ...
So user can control inherit bit for each event separately.
In following example, a.out fork()s in main then do some complex
CPU intensive computations in both of its children.
Basic result with and without inherit:
# perf record -e cycles -e instructions ./a.out
[ perf record: Woken up 9 times to write data ]
[ perf record: Captured and wrote 2.205 MB perf.data (47920 samples) ]
# perf report --stdio
# ...
# Samples: 23K of event 'cycles'
# Event count (approx.): 23641752891
...
# Samples: 24K of event 'instructions'
# Event count (approx.): 30428312415
# perf record -i -e cycles -e instructions ./a.out
[ perf record: Woken up 5 times to write data ]
[ perf record: Captured and wrote 1.111 MB perf.data (24019 samples) ]
...
# Samples: 12K of event 'cycles'
# Event count (approx.): 11699501775
...
# Samples: 12K of event 'instructions'
# Event count (approx.): 15058023559
Cancel inherit for one event when globally enable:
# perf record -e cycles/no-inherit/ -e instructions ./a.out
[ perf record: Woken up 7 times to write data ]
[ perf record: Captured and wrote 1.660 MB perf.data (36004 samples) ]
...
# Samples: 12K of event 'cycles/no-inherit/'
# Event count (approx.): 11895759282
...
# Samples: 24K of event 'instructions'
# Event count (approx.): 30668000441
Enable inherit for one event when globally disable:
# perf record -i -e cycles/inherit/ -e instructions ./a.out
[ perf record: Woken up 7 times to write data ]
[ perf record: Captured and wrote 1.654 MB perf.data (35868 samples) ]
...
# Samples: 23K of event 'cycles/inherit/'
# Event count (approx.): 23285400229
...
# Samples: 11K of event 'instructions'
# Event count (approx.): 14969050259
Committer note:
One can check if the bit was set, in addition to seeing the result in
the perf.data file size as above by doing one of:
# perf record -e cycles -e instructions -a usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.911 MB perf.data (63 samples) ]
# perf evlist -v
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1
instructions: size: 112, config: 0x1, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1, exclude_guest: 1
#
So, the inherit bit was set in both, now, if we disable it globally using
--no-inherit:
# perf record --no-inherit -e cycles -e instructions -a usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.910 MB perf.data (56 samples) ]
# perf evlist -v
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1
instructions: size: 112, config: 0x1, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, freq: 1, sample_id_all: 1, exclude_guest: 1
No inherit bit set, then disabling it and setting just on the cycles event:
# perf record --no-inherit -e cycles/inherit/ -e instructions -a usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.909 MB perf.data (48 samples) ]
# perf evlist -v
cycles/inherit/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1
instructions: size: 112, config: 0x1, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|CPU|PERIOD, read_format: ID, disabled: 1, freq: 1, sample_id_all: 1, exclude_guest: 1
#
We can see it as well in by using a more verbose level of debug messages in
the tool that sets up the perf_event_attr, 'perf record' in this case:
[root@zoo ~]# perf record -vv --no-inherit -e cycles/inherit/ -e instructions -a usleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|ID|CPU|PERIOD
read_format ID
disabled 1
inherit 1
mmap 1
comm 1
freq 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
------------------------------------------------------------
perf_event_attr:
size 112
config 0x1
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|ID|CPU|PERIOD
read_format ID
disabled 1
freq 1
sample_id_all 1
exclude_guest 1
------------------------------------------------------------
sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
<SNIP>
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1446029705-199659-2-git-send-email-wangnan0@huawei.com
[ s/u64/bool/ for the perf_evsel_config_term inherit field - jolsa]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-28 10:55:02 +00:00
case PERF_EVSEL__CONFIG_TERM_INHERIT :
/*
* attr - > inherit should has already been set by
* perf_evsel__config . If user explicitly set
* inherit using config terms , override global
* opt - > no_inherit setting .
*/
attr - > inherit = term - > val . inherit ? 1 : 0 ;
break ;
perf tools: Enable overwrite settings
This patch allows following config terms and option:
Globally setting events to overwrite;
# perf record --overwrite ...
Set specific events to be overwrite or no-overwrite.
# perf record --event cycles/overwrite/ ...
# perf record --event cycles/no-overwrite/ ...
Add missing config terms and update the config term array size because
the longest string length has changed.
For overwritable events, it automatically selects attr.write_backward
since perf requires it to be backward for reading.
Test result:
# perf record --overwrite -e syscalls:*enter_nanosleep* usleep 1
[ perf record: Woken up 2 times to write data ]
[ perf record: Captured and wrote 0.011 MB perf.data (1 samples) ]
# perf evlist -v
syscalls:sys_enter_nanosleep: type: 2, size: 112, config: 0x134, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|PERIOD|RAW, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, write_backward: 1
# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nilay Vaish <nilayvaish@gmail.com>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1468485287-33422-14-git-send-email-wangnan0@huawei.com
Signed-off-by: He Kuang <hekuang@huawei.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-07-14 08:34:45 +00:00
case PERF_EVSEL__CONFIG_TERM_OVERWRITE :
attr - > write_backward = term - > val . overwrite ? 1 : 0 ;
break ;
2017-10-20 13:27:54 -07:00
case PERF_EVSEL__CONFIG_TERM_DRV_CFG :
2018-01-10 13:46:51 -07:00
break ;
2015-07-29 05:42:10 -04:00
default :
break ;
}
}
perf callchain: Per-event type selection support
This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.
Here is an example.
perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1
perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1
Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-08-11 06:30:47 -04:00
/* User explicitly set per-event callgraph, clear the old setting and reset. */
perf tools: Per event max-stack settings
The tooling counterpart, now it is possible to do:
# perf record -e sched:sched_switch/max-stack=10/ -e cycles/call-graph=dwarf,max-stack=4/ -e cpu-cycles/call-graph=dwarf,max-stack=1024/ usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.052 MB perf.data (5 samples) ]
# perf evlist -v
sched:sched_switch: type: 2, size: 112, config: 0x110, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, sample_max_stack: 10
cycles/call-graph=dwarf,max-stack=4/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 4
cpu-cycles/call-graph=dwarf,max-stack=1024/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 1024
# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events
Using just /max-stack=N/ means /call-graph=fp,max-stack=N/, that should
be further configurable by means of some .perfconfig knob.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-28 19:03:42 -03:00
if ( ( callgraph_buf ! = NULL ) | | ( dump_size > 0 ) | | max_stack ) {
2018-01-16 11:16:25 -03:00
bool sample_address = false ;
perf tools: Per event max-stack settings
The tooling counterpart, now it is possible to do:
# perf record -e sched:sched_switch/max-stack=10/ -e cycles/call-graph=dwarf,max-stack=4/ -e cpu-cycles/call-graph=dwarf,max-stack=1024/ usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.052 MB perf.data (5 samples) ]
# perf evlist -v
sched:sched_switch: type: 2, size: 112, config: 0x110, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, sample_max_stack: 10
cycles/call-graph=dwarf,max-stack=4/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 4
cpu-cycles/call-graph=dwarf,max-stack=1024/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 1024
# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events
Using just /max-stack=N/ means /call-graph=fp,max-stack=N/, that should
be further configurable by means of some .perfconfig knob.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-28 19:03:42 -03:00
if ( max_stack ) {
param . max_stack = max_stack ;
if ( callgraph_buf = = NULL )
callgraph_buf = " fp " ;
}
perf callchain: Per-event type selection support
This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.
Here is an example.
perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1
perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1
Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-08-11 06:30:47 -04:00
/* parse callgraph parameters */
if ( callgraph_buf ! = NULL ) {
2015-08-11 06:30:48 -04:00
if ( ! strcmp ( callgraph_buf , " no " ) ) {
param . enabled = false ;
param . record_mode = CALLCHAIN_NONE ;
} else {
param . enabled = true ;
if ( parse_callchain_record ( callgraph_buf , & param ) ) {
pr_err ( " per-event callgraph setting for %s failed. "
" Apply callgraph global setting for it \n " ,
evsel - > name ) ;
return ;
}
2018-01-16 11:16:25 -03:00
if ( param . record_mode = = CALLCHAIN_DWARF )
sample_address = true ;
perf callchain: Per-event type selection support
This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.
Here is an example.
perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1
perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1
Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-08-11 06:30:47 -04:00
}
}
if ( dump_size > 0 ) {
dump_size = round_up ( dump_size , sizeof ( u64 ) ) ;
param . dump_size = dump_size ;
}
/* If global callgraph set, clear it */
if ( callchain_param . enabled )
perf_evsel__reset_callgraph ( evsel , & callchain_param ) ;
/* set perf-event callgraph */
2018-01-16 11:16:25 -03:00
if ( param . enabled ) {
if ( sample_address ) {
perf_evsel__set_sample_bit ( evsel , ADDR ) ;
perf_evsel__set_sample_bit ( evsel , DATA_SRC ) ;
evsel - > attr . mmap_data = track ;
}
2016-04-11 18:39:37 -03:00
perf_evsel__config_callchain ( evsel , opts , & param ) ;
2018-01-16 11:16:25 -03:00
}
perf callchain: Per-event type selection support
This patchkit adds the ability to set callgraph mode (fp, dwarf, lbr) per
event. This in term can reduce sampling overhead and the size of the
perf.data.
Here is an example.
perf record -e 'cpu/cpu-cycles,period=1000,call-graph=fp,time=1/,cpu/instructions,call-graph=lbr/' sleep 1
perf evlist -v
cpu/cpu-cycles,period=1000,call-graph=fp,time=1/: type: 4, size: 112,
config: 0x3c, { sample_period, sample_freq }: 1000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|IDENTIFIER, read_format: ID, disabled: 1,
inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all:
1, exclude_guest: 1, mmap2: 1, comm_exec: 1
cpu/instructions,call-graph=lbr/: type: 4, size: 112, config: 0xc0, {
sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|CALLCHAIN|PERIOD|BRANCH_STACK|IDENTIFIER, read_format: ID,
disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1,
exclude_guest: 1
Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/1439289050-40510-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-08-11 06:30:47 -04:00
}
2015-07-29 05:42:10 -04:00
}
2012-11-12 18:34:01 +01:00
/*
* The enable_on_exec / disabled value strategy :
*
* 1 ) For any type of traced program :
* - all independent events and group leaders are disabled
* - all group members are enabled
*
* Group members are ruled by group leaders . They need to
* be enabled , because the group scheduling relies on that .
*
* 2 ) For traced programs executed by perf :
* - all independent events and group leaders have
* enable_on_exec set
* - we don ' t specifically enable or disable any event during
* the record command
*
* Independent events and group leaders are initially disabled
* and get enabled by exec . Group members are ruled by group
* leaders as stated in 1 ) .
*
* 3 ) For traced programs attached by perf ( pid / tid ) :
* - we specifically enable or disable all events during
* the record command
*
* When attaching events to already running traced we
* enable / disable events specifically , as there ' s no
* initial traced exec call .
*/
2016-04-11 18:15:29 -03:00
void perf_evsel__config ( struct perf_evsel * evsel , struct record_opts * opts ,
struct callchain_param * callchain )
2011-11-08 14:41:57 -02:00
{
2012-10-10 17:39:03 +02:00
struct perf_evsel * leader = evsel - > leader ;
2011-11-08 14:41:57 -02:00
struct perf_event_attr * attr = & evsel - > attr ;
2014-07-31 09:00:52 +03:00
int track = evsel - > tracking ;
2013-11-15 15:52:29 +02:00
bool per_cpu = opts - > target . default_per_cpu & & ! opts - > target . per_thread ;
2011-11-08 14:41:57 -02:00
2012-12-13 13:13:07 -03:00
attr - > sample_id_all = perf_missing_features . sample_id_all ? 0 : 1 ;
2011-11-08 14:41:57 -02:00
attr - > inherit = ! opts - > no_inherit ;
perf tools: Enable overwrite settings
This patch allows following config terms and option:
Globally setting events to overwrite;
# perf record --overwrite ...
Set specific events to be overwrite or no-overwrite.
# perf record --event cycles/overwrite/ ...
# perf record --event cycles/no-overwrite/ ...
Add missing config terms and update the config term array size because
the longest string length has changed.
For overwritable events, it automatically selects attr.write_backward
since perf requires it to be backward for reading.
Test result:
# perf record --overwrite -e syscalls:*enter_nanosleep* usleep 1
[ perf record: Woken up 2 times to write data ]
[ perf record: Captured and wrote 0.011 MB perf.data (1 samples) ]
# perf evlist -v
syscalls:sys_enter_nanosleep: type: 2, size: 112, config: 0x134, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|PERIOD|RAW, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, write_backward: 1
# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nilay Vaish <nilayvaish@gmail.com>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1468485287-33422-14-git-send-email-wangnan0@huawei.com
Signed-off-by: He Kuang <hekuang@huawei.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-07-14 08:34:45 +00:00
attr - > write_backward = opts - > overwrite ? 1 : 0 ;
2011-11-08 14:41:57 -02:00
2012-12-10 14:53:43 -03:00
perf_evsel__set_sample_bit ( evsel , IP ) ;
perf_evsel__set_sample_bit ( evsel , TID ) ;
2011-11-08 14:41:57 -02:00
2012-10-10 17:39:03 +02:00
if ( evsel - > sample_read ) {
perf_evsel__set_sample_bit ( evsel , READ ) ;
/*
* We need ID even in case of single event , because
* PERF_SAMPLE_READ process ID specific data .
*/
2013-08-27 11:23:09 +03:00
perf_evsel__set_sample_id ( evsel , false ) ;
2012-10-10 17:39:03 +02:00
/*
* Apply group format only if we belong to group
* with more than one members .
*/
if ( leader - > nr_members > 1 ) {
attr - > read_format | = PERF_FORMAT_GROUP ;
attr - > inherit = 0 ;
}
}
2011-11-08 14:41:57 -02:00
/*
2014-06-09 14:43:37 +09:00
* We default some events to have a default interval . But keep
2011-11-08 14:41:57 -02:00
* it a weak assumption overridable by the user .
*/
2014-06-09 14:43:37 +09:00
if ( ! attr - > sample_period | | ( opts - > user_freq ! = UINT_MAX | |
2011-11-08 14:41:57 -02:00
opts - > user_interval ! = ULLONG_MAX ) ) {
if ( opts - > freq ) {
2012-12-10 14:53:43 -03:00
perf_evsel__set_sample_bit ( evsel , PERIOD ) ;
2011-11-08 14:41:57 -02:00
attr - > freq = 1 ;
attr - > sample_freq = opts - > freq ;
} else {
attr - > sample_period = opts - > default_interval ;
}
}
2012-10-10 17:39:03 +02:00
/*
* Disable sampling for all group members other
* than leader in case leader ' leads ' the sampling .
*/
if ( ( leader ! = evsel ) & & leader - > sample_read ) {
attr - > sample_freq = 0 ;
attr - > sample_period = 0 ;
}
2011-11-08 14:41:57 -02:00
if ( opts - > no_samples )
attr - > sample_freq = 0 ;
2017-08-24 18:27:31 +02:00
if ( opts - > inherit_stat ) {
evsel - > attr . read_format | =
PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING |
PERF_FORMAT_ID ;
2011-11-08 14:41:57 -02:00
attr - > inherit_stat = 1 ;
2017-08-24 18:27:31 +02:00
}
2011-11-08 14:41:57 -02:00
if ( opts - > sample_address ) {
2012-12-10 14:53:43 -03:00
perf_evsel__set_sample_bit ( evsel , ADDR ) ;
2011-11-08 14:41:57 -02:00
attr - > mmap_data = track ;
}
2014-11-13 18:21:03 +01:00
/*
* We don ' t allow user space callchains for function trace
* event , due to issues with page faults while tracing page
* fault handler and its overall trickiness nature .
*/
if ( perf_evsel__is_function_event ( evsel ) )
evsel - > attr . exclude_callchain_user = 1 ;
2016-04-11 18:15:29 -03:00
if ( callchain & & callchain - > enabled & & ! evsel - > no_aux_samples )
2016-04-11 18:39:37 -03:00
perf_evsel__config_callchain ( evsel , opts , callchain ) ;
2012-08-07 15:20:47 +02:00
2014-09-24 13:48:39 +02:00
if ( opts - > sample_intr_regs ) {
perf record: Add ability to name registers to record
This patch modifies the -I/--int-regs option to enablepassing the name
of the registers to sample on interrupt. Registers can be specified by
their symbolic names. For instance on x86, --intr-regs=ax,si.
The motivation is to reduce the size of the perf.data file and the
overhead of sampling by only collecting the registers useful to a
specific analysis. For instance, for value profiling, sampling only the
registers used to passed arguements to functions.
With no parameter, the --intr-regs still records all possible registers
based on the architecture.
To name registers, it is necessary to use the long form of the option,
i.e., --intr-regs:
$ perf record --intr-regs=si,di,r8,r9 .....
To record any possible registers:
$ perf record -I .....
$ perf report --intr-regs ...
To display the register, one can use perf report -D
To list the available registers:
$ perf record --intr-regs=\?
available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15
Signed-off-by: Stephane Eranian <eranian@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1441039273-16260-4-git-send-email-eranian@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-08-31 18:41:12 +02:00
attr - > sample_regs_intr = opts - > sample_intr_regs ;
2014-09-24 13:48:39 +02:00
perf_evsel__set_sample_bit ( evsel , REGS_INTR ) ;
}
2017-09-05 10:00:28 -07:00
if ( opts - > sample_user_regs ) {
attr - > sample_regs_user | = opts - > sample_user_regs ;
perf_evsel__set_sample_bit ( evsel , REGS_USER ) ;
}
2016-08-01 20:02:35 +02:00
if ( target__has_cpu ( & opts - > target ) | | opts - > sample_cpu )
2012-12-10 14:53:43 -03:00
perf_evsel__set_sample_bit ( evsel , CPU ) ;
2011-11-08 14:41:57 -02:00
2011-12-20 17:32:45 +03:00
if ( opts - > period )
2012-12-10 14:53:43 -03:00
perf_evsel__set_sample_bit ( evsel , PERIOD ) ;
2011-12-20 17:32:45 +03:00
2014-07-31 14:45:04 +08:00
/*
2016-02-24 10:02:25 -08:00
* When the user explicitly disabled time don ' t force it here .
2014-07-31 14:45:04 +08:00
*/
if ( opts - > sample_time & &
( ! perf_missing_features . sample_id_all & &
2015-07-06 14:51:01 +03:00
( ! opts - > no_inherit | | target__has_cpu ( & opts - > target ) | | per_cpu | |
opts - > sample_time_set ) ) )
2012-12-10 14:53:43 -03:00
perf_evsel__set_sample_bit ( evsel , TIME ) ;
2011-11-08 14:41:57 -02:00
2014-07-14 13:02:56 +03:00
if ( opts - > raw_samples & & ! evsel - > no_aux_samples ) {
2012-12-10 14:53:43 -03:00
perf_evsel__set_sample_bit ( evsel , TIME ) ;
perf_evsel__set_sample_bit ( evsel , RAW ) ;
perf_evsel__set_sample_bit ( evsel , CPU ) ;
2011-11-08 14:41:57 -02:00
}
2013-01-24 16:10:37 +01:00
if ( opts - > sample_address )
2013-11-01 15:51:35 +02:00
perf_evsel__set_sample_bit ( evsel , DATA_SRC ) ;
2013-01-24 16:10:37 +01:00
2017-08-29 13:11:08 -04:00
if ( opts - > sample_phys_addr )
perf_evsel__set_sample_bit ( evsel , PHYS_ADDR ) ;
2014-01-14 17:52:14 -03:00
if ( opts - > no_buffering ) {
2011-11-08 14:41:57 -02:00
attr - > watermark = 0 ;
attr - > wakeup_events = 1 ;
}
2014-07-14 13:02:56 +03:00
if ( opts - > branch_stack & & ! evsel - > no_aux_samples ) {
2012-12-10 14:53:43 -03:00
perf_evsel__set_sample_bit ( evsel , BRANCH_STACK ) ;
2012-02-09 23:21:02 +01:00
attr - > branch_sample_type = opts - > branch_stack ;
}
2011-11-08 14:41:57 -02:00
2013-01-24 16:10:29 +01:00
if ( opts - > sample_weight )
2013-11-01 15:51:35 +02:00
perf_evsel__set_sample_bit ( evsel , WEIGHT ) ;
2013-01-24 16:10:29 +01:00
2015-01-29 17:06:46 +09:00
attr - > task = track ;
2013-08-21 12:10:25 +02:00
attr - > mmap = track ;
2014-05-30 10:49:42 -04:00
attr - > mmap2 = track & & ! perf_missing_features . mmap2 ;
2013-08-21 12:10:25 +02:00
attr - > comm = track ;
2011-11-08 14:41:57 -02:00
perf tools: Add PERF_RECORD_NAMESPACES to include namespaces related info
Introduce a new option to record PERF_RECORD_NAMESPACES events emitted
by the kernel when fork, clone, setns or unshare are invoked. And update
perf-record documentation with the new option to record namespace
events.
Committer notes:
Combined it with a later patch to allow printing it via 'perf report -D'
and be able to test the feature introduced in this patch. Had to move
here also perf_ns__name(), that was introduced in another later patch.
Also used PRIu64 and PRIx64 to fix the build in some enfironments wrt:
util/event.c:1129:39: error: format '%lx' expects argument of type 'long unsigned int', but argument 6 has type 'long long unsigned int' [-Werror=format=]
ret += fprintf(fp, "%u/%s: %lu/0x%lx%s", idx
^
Testing it:
# perf record --namespaces -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.083 MB perf.data (423 samples) ]
#
# perf report -D
<SNIP>
3 2028902078892 0x115140 [0xa0]: PERF_RECORD_NAMESPACES 14783/14783 - nr_namespaces: 7
[0/net: 3/0xf0000081, 1/uts: 3/0xeffffffe, 2/ipc: 3/0xefffffff, 3/pid: 3/0xeffffffc,
4/user: 3/0xeffffffd, 5/mnt: 3/0xf0000000, 6/cgroup: 3/0xeffffffb]
0x1151e0 [0x30]: event: 9
.
. ... raw event: size 48 bytes
. 0000: 09 00 00 00 02 00 30 00 c4 71 82 68 0c 7f 00 00 ......0..q.h....
. 0010: a9 39 00 00 a9 39 00 00 94 28 fe 63 d8 01 00 00 .9...9...(.c....
. 0020: 03 00 00 00 00 00 00 00 ce c4 02 00 00 00 00 00 ................
<SNIP>
NAMESPACES events: 1
<SNIP>
#
Signed-off-by: Hari Bathini <hbathini@linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sargun Dhillon <sargun@sargun.me>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/148891930386.25309.18412039920746995488.stgit@hbathini.in.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-03-08 02:11:43 +05:30
if ( opts - > record_namespaces )
attr - > namespaces = track ;
2015-07-21 12:44:04 +03:00
if ( opts - > record_switch_events )
attr - > context_switch = track ;
2013-09-20 07:40:43 -07:00
if ( opts - > sample_transaction )
2013-11-01 15:51:35 +02:00
perf_evsel__set_sample_bit ( evsel , TRANSACTION ) ;
2013-09-20 07:40:43 -07:00
2015-02-24 15:13:40 -08:00
if ( opts - > running_time ) {
evsel - > attr . read_format | =
PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING ;
}
2012-11-12 18:34:01 +01:00
/*
* XXX see the function comment above
*
* Disabling only independent events or group leaders ,
* keeping group members enabled .
*/
2012-11-29 15:38:30 +09:00
if ( perf_evsel__is_group_leader ( evsel ) )
2012-11-12 18:34:01 +01:00
attr - > disabled = 1 ;
/*
* Setting enable_on_exec for independent events and
* group leaders for traced executed by perf .
*/
2014-01-11 13:38:27 -08:00
if ( target__none ( & opts - > target ) & & perf_evsel__is_group_leader ( evsel ) & &
! opts - > initial_delay )
2011-11-08 14:41:57 -02:00
attr - > enable_on_exec = 1 ;
2014-07-14 13:02:57 +03:00
if ( evsel - > immediate ) {
attr - > disabled = 0 ;
attr - > enable_on_exec = 0 ;
}
2015-03-31 00:19:31 +02:00
clockid = opts - > clockid ;
if ( opts - > use_clockid ) {
attr - > use_clockid = 1 ;
attr - > clockid = opts - > clockid ;
}
2015-07-29 05:42:10 -04:00
perf tools: Introduce 'P' modifier to request max precision
The 'P' will cause the event to get maximum possible detected precise
level.
Following record:
$ perf record -e cycles:P ...
will detect maximum precise level for 'cycles' event and use it.
Commiter note:
Testing it:
$ perf record -e cycles:P usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.013 MB perf.data (9 samples) ]
$ perf evlist
cycles:P
$ perf evlist -v
cycles:P: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1,
enable_on_exec: 1, task: 1, precise_ip: 2, sample_id_all: 1, mmap2: 1,
comm_exec: 1
$
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1444068369-20978-6-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-05 20:06:05 +02:00
if ( evsel - > precise_max )
perf_event_attr__set_max_precise_ip ( attr ) ;
2016-02-15 09:34:31 +01:00
if ( opts - > all_user ) {
attr - > exclude_kernel = 1 ;
attr - > exclude_user = 0 ;
}
if ( opts - > all_kernel ) {
attr - > exclude_kernel = 0 ;
attr - > exclude_user = 1 ;
}
2015-07-29 05:42:10 -04:00
/*
* Apply event specific term settings ,
* it overloads any global configuration .
*/
2018-01-16 11:16:25 -03:00
apply_config_terms ( evsel , opts , track ) ;
2016-12-13 08:46:22 +01:00
evsel - > ignore_missing_thread = opts - > ignore_missing_thread ;
2011-11-08 14:41:57 -02:00
}
2014-10-13 13:30:27 -03:00
static int perf_evsel__alloc_fd ( struct perf_evsel * evsel , int ncpus , int nthreads )
2011-01-03 16:39:04 -02:00
{
2014-07-31 09:00:51 +03:00
if ( evsel - > system_wide )
nthreads = 1 ;
2011-01-03 16:39:04 -02:00
evsel - > fd = xyarray__new ( ncpus , nthreads , sizeof ( int ) ) ;
2011-05-27 09:58:34 -06:00
if ( evsel - > fd ) {
2016-10-03 11:07:24 -03:00
int cpu , thread ;
2011-05-27 09:58:34 -06:00
for ( cpu = 0 ; cpu < ncpus ; cpu + + ) {
for ( thread = 0 ; thread < nthreads ; thread + + ) {
FD ( evsel , cpu , thread ) = - 1 ;
}
}
}
2011-01-03 16:39:04 -02:00
return evsel - > fd ! = NULL ? 0 : - ENOMEM ;
}
2017-08-11 16:26:17 -07:00
static int perf_evsel__run_ioctl ( struct perf_evsel * evsel ,
2013-08-02 17:41:10 -07:00
int ioc , void * arg )
2012-09-26 15:07:39 -03:00
{
int cpu , thread ;
2017-08-11 16:26:17 -07:00
for ( cpu = 0 ; cpu < xyarray__max_x ( evsel - > fd ) ; cpu + + ) {
for ( thread = 0 ; thread < xyarray__max_y ( evsel - > fd ) ; thread + + ) {
2012-09-26 15:07:39 -03:00
int fd = FD ( evsel , cpu , thread ) ,
2013-08-02 17:41:10 -07:00
err = ioctl ( fd , ioc , arg ) ;
2012-09-26 15:07:39 -03:00
if ( err )
return err ;
}
}
return 0 ;
}
2017-08-11 16:26:17 -07:00
int perf_evsel__apply_filter ( struct perf_evsel * evsel , const char * filter )
2013-08-02 17:41:10 -07:00
{
2017-08-11 16:26:17 -07:00
return perf_evsel__run_ioctl ( evsel ,
2013-08-02 17:41:10 -07:00
PERF_EVENT_IOC_SET_FILTER ,
( void * ) filter ) ;
}
2015-07-03 17:05:50 -03:00
int perf_evsel__set_filter ( struct perf_evsel * evsel , const char * filter )
{
char * new_filter = strdup ( filter ) ;
if ( new_filter ! = NULL ) {
free ( evsel - > filter ) ;
evsel - > filter = new_filter ;
return 0 ;
}
return - 1 ;
}
2016-09-16 08:44:04 -06:00
static int perf_evsel__append_filter ( struct perf_evsel * evsel ,
const char * fmt , const char * filter )
2015-07-04 12:19:13 -03:00
{
char * new_filter ;
if ( evsel - > filter = = NULL )
return perf_evsel__set_filter ( evsel , filter ) ;
2016-09-16 08:44:03 -06:00
if ( asprintf ( & new_filter , fmt , evsel - > filter , filter ) > 0 ) {
2015-07-04 12:19:13 -03:00
free ( evsel - > filter ) ;
evsel - > filter = new_filter ;
return 0 ;
}
return - 1 ;
}
2016-09-16 08:44:04 -06:00
int perf_evsel__append_tp_filter ( struct perf_evsel * evsel , const char * filter )
{
return perf_evsel__append_filter ( evsel , " (%s) & & ( % s ) " , filter) ;
}
2016-09-16 08:44:05 -06:00
int perf_evsel__append_addr_filter ( struct perf_evsel * evsel , const char * filter )
{
return perf_evsel__append_filter ( evsel , " %s,%s " , filter ) ;
}
2015-12-03 10:06:40 +01:00
int perf_evsel__enable ( struct perf_evsel * evsel )
2013-08-02 17:41:10 -07:00
{
2017-08-11 16:26:17 -07:00
return perf_evsel__run_ioctl ( evsel ,
2013-08-02 17:41:10 -07:00
PERF_EVENT_IOC_ENABLE ,
0 ) ;
}
2015-12-03 10:06:41 +01:00
int perf_evsel__disable ( struct perf_evsel * evsel )
{
2017-08-11 16:26:17 -07:00
return perf_evsel__run_ioctl ( evsel ,
2015-12-03 10:06:41 +01:00
PERF_EVENT_IOC_DISABLE ,
0 ) ;
}
2011-01-12 22:39:13 -02:00
int perf_evsel__alloc_id ( struct perf_evsel * evsel , int ncpus , int nthreads )
{
perf evsel: Don't rely on malloc working for sz 0
When running perf on ARC (uClibc based userspace), ran into this issue
------------->8----------------
[ARCLinux]$ ./perf record ls
bin etc perf sys
debug init perf.data tmp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.001 MB perf.data (~24 samples) ]
[ARCLinux]$ ./perf report
incompatible file format (rerun with -v to learn more)
------------->8----------------
The problem happens in the following call stack when zalloc is called
with size zero
glibc default / uClibc with MALLOC_GLIBC_COMPAT are OK, but not if that
config option is not enabled.
cmd_report
perf_session__new
perf_session__open
perf_session__read_header
read_attr(fd, header, &f_attr)
nr_ids = f_attr.ids.size / sizeof(u64); <-- 0
perf_evsel__alloc_id(vsel, 1, nr_ids)
zalloc(ncpus * nthreads * sizeof(u64)) <-- 0
header.c: read_attr()
(gdb) p *f_attr
$17 = {
attr = {
type = 0,
size = 96,
config = 0,
{
sample_period = 4000,
sample_freq = 4000
},
...
ids = {
offset = 104,
size = 0 <------
}
}
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Suggested-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1421156604-30603-5-git-send-email-vgupta@synopsys.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-01-13 19:13:23 +05:30
if ( ncpus = = 0 | | nthreads = = 0 )
return 0 ;
2014-07-31 09:00:51 +03:00
if ( evsel - > system_wide )
nthreads = 1 ;
2011-03-10 11:15:54 -03:00
evsel - > sample_id = xyarray__new ( ncpus , nthreads , sizeof ( struct perf_sample_id ) ) ;
if ( evsel - > sample_id = = NULL )
return - ENOMEM ;
evsel - > id = zalloc ( ncpus * nthreads * sizeof ( u64 ) ) ;
if ( evsel - > id = = NULL ) {
xyarray__delete ( evsel - > sample_id ) ;
evsel - > sample_id = NULL ;
return - ENOMEM ;
}
return 0 ;
2011-01-12 22:39:13 -02:00
}
2014-10-13 13:30:27 -03:00
static void perf_evsel__free_fd ( struct perf_evsel * evsel )
2011-01-03 16:39:04 -02:00
{
xyarray__delete ( evsel - > fd ) ;
evsel - > fd = NULL ;
}
2014-10-13 13:30:27 -03:00
static void perf_evsel__free_id ( struct perf_evsel * evsel )
2011-01-12 22:39:13 -02:00
{
2011-03-10 11:15:54 -03:00
xyarray__delete ( evsel - > sample_id ) ;
evsel - > sample_id = NULL ;
2013-12-26 17:41:15 -03:00
zfree ( & evsel - > id ) ;
2011-01-12 22:39:13 -02:00
}
2015-07-29 05:42:10 -04:00
static void perf_evsel__free_config_terms ( struct perf_evsel * evsel )
{
struct perf_evsel_config_term * term , * h ;
list_for_each_entry_safe ( term , h , & evsel - > config_terms , list ) {
list_del ( & term - > list ) ;
free ( term ) ;
}
}
2017-08-11 16:26:17 -07:00
void perf_evsel__close_fd ( struct perf_evsel * evsel )
2011-01-03 17:45:52 -02:00
{
int cpu , thread ;
2017-08-11 16:26:17 -07:00
for ( cpu = 0 ; cpu < xyarray__max_x ( evsel - > fd ) ; cpu + + )
for ( thread = 0 ; thread < xyarray__max_y ( evsel - > fd ) ; + + thread ) {
2011-01-03 17:45:52 -02:00
close ( FD ( evsel , cpu , thread ) ) ;
FD ( evsel , cpu , thread ) = - 1 ;
}
}
2011-01-18 21:41:45 -02:00
void perf_evsel__exit ( struct perf_evsel * evsel )
2011-01-03 16:39:04 -02:00
{
assert ( list_empty ( & evsel - > node ) ) ;
2015-08-27 08:07:40 -04:00
assert ( evsel - > evlist = = NULL ) ;
2013-03-15 14:48:49 +09:00
perf_evsel__free_fd ( evsel ) ;
perf_evsel__free_id ( evsel ) ;
2015-07-29 05:42:10 -04:00
perf_evsel__free_config_terms ( evsel ) ;
2014-10-16 13:25:01 -03:00
close_cgroup ( evsel - > cgrp ) ;
2015-06-23 00:36:04 +02:00
cpu_map__put ( evsel - > cpus ) ;
2015-09-08 10:58:55 +03:00
cpu_map__put ( evsel - > own_cpus ) ;
2015-06-23 00:36:07 +02:00
thread_map__put ( evsel - > threads ) ;
2014-10-16 13:25:01 -03:00
zfree ( & evsel - > group_name ) ;
zfree ( & evsel - > name ) ;
2014-10-09 15:29:51 -03:00
perf_evsel__object . fini ( evsel ) ;
2011-01-18 21:41:45 -02:00
}
void perf_evsel__delete ( struct perf_evsel * evsel )
{
perf_evsel__exit ( evsel ) ;
2011-01-03 16:39:04 -02:00
free ( evsel ) ;
}
2011-01-03 17:45:52 -02:00
2015-06-26 11:29:11 +02:00
void perf_evsel__compute_deltas ( struct perf_evsel * evsel , int cpu , int thread ,
2014-11-21 10:31:05 +01:00
struct perf_counts_values * count )
2013-01-29 12:47:43 +01:00
{
struct perf_counts_values tmp ;
if ( ! evsel - > prev_raw_counts )
return ;
if ( cpu = = - 1 ) {
tmp = evsel - > prev_raw_counts - > aggr ;
evsel - > prev_raw_counts - > aggr = * count ;
} else {
2015-06-26 11:29:11 +02:00
tmp = * perf_counts ( evsel - > prev_raw_counts , cpu , thread ) ;
* perf_counts ( evsel - > prev_raw_counts , cpu , thread ) = * count ;
2013-01-29 12:47:43 +01:00
}
count - > val = count - > val - tmp . val ;
count - > ena = count - > ena - tmp . ena ;
count - > run = count - > run - tmp . run ;
}
2014-11-21 10:31:06 +01:00
void perf_counts_values__scale ( struct perf_counts_values * count ,
bool scale , s8 * pscaled )
{
s8 scaled = 0 ;
if ( scale ) {
if ( count - > run = = 0 ) {
scaled = - 1 ;
count - > val = 0 ;
} else if ( count - > run < count - > ena ) {
scaled = 1 ;
count - > val = ( u64 ) ( ( double ) count - > val * count - > ena / count - > run + 0.5 ) ;
}
} else
count - > ena = count - > run = 0 ;
if ( pscaled )
* pscaled = scaled ;
}
2017-07-26 14:02:04 +02:00
static int perf_evsel__read_size ( struct perf_evsel * evsel )
{
u64 read_format = evsel - > attr . read_format ;
int entry = sizeof ( u64 ) ; /* value */
int size = 0 ;
int nr = 1 ;
if ( read_format & PERF_FORMAT_TOTAL_TIME_ENABLED )
size + = sizeof ( u64 ) ;
if ( read_format & PERF_FORMAT_TOTAL_TIME_RUNNING )
size + = sizeof ( u64 ) ;
if ( read_format & PERF_FORMAT_ID )
entry + = sizeof ( u64 ) ;
if ( read_format & PERF_FORMAT_GROUP ) {
nr = evsel - > nr_members ;
size + = sizeof ( u64 ) ;
}
size + = entry * nr ;
return size ;
}
2015-06-26 11:29:18 +02:00
int perf_evsel__read ( struct perf_evsel * evsel , int cpu , int thread ,
struct perf_counts_values * count )
{
2017-07-26 14:02:04 +02:00
size_t size = perf_evsel__read_size ( evsel ) ;
2015-06-26 11:29:18 +02:00
memset ( count , 0 , sizeof ( * count ) ) ;
if ( FD ( evsel , cpu , thread ) < 0 )
return - EINVAL ;
2017-07-26 14:02:04 +02:00
if ( readn ( FD ( evsel , cpu , thread ) , count - > values , size ) < = 0 )
2015-06-26 11:29:18 +02:00
return - errno ;
return 0 ;
}
2017-07-26 14:02:05 +02:00
static int
perf_evsel__read_one ( struct perf_evsel * evsel , int cpu , int thread )
{
struct perf_counts_values * count = perf_counts ( evsel - > counts , cpu , thread ) ;
return perf_evsel__read ( evsel , cpu , thread , count ) ;
}
static void
perf_evsel__set_count ( struct perf_evsel * counter , int cpu , int thread ,
u64 val , u64 ena , u64 run )
{
struct perf_counts_values * count ;
count = perf_counts ( counter - > counts , cpu , thread ) ;
count - > val = val ;
count - > ena = ena ;
count - > run = run ;
perf stat: Use group read for event groups
Make perf stat use group read if there are groups defined. The group
read will get the values for all member of groups within a single
syscall instead of calling read syscall for every event.
We can see considerable less amount of kernel cycles spent on single
group read, than reading each event separately, like for following perf
stat command:
# perf stat -e {cycles,instructions} -I 10 -a sleep 1
Monitored with "perf stat -r 5 -e '{cycles:u,cycles:k}'"
Before:
24,325,676 cycles:u
297,040,775 cycles:k
1.038554134 seconds time elapsed
After:
25,034,418 cycles:u
158,256,395 cycles:k
1.036864497 seconds time elapsed
The perf_evsel__open fallback changes contributed by Andi Kleen.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20170726120206.9099-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-07-26 14:02:06 +02:00
count - > loaded = true ;
2017-07-26 14:02:05 +02:00
}
static int
perf_evsel__process_group_data ( struct perf_evsel * leader ,
int cpu , int thread , u64 * data )
{
u64 read_format = leader - > attr . read_format ;
struct sample_read_value * v ;
u64 nr , ena = 0 , run = 0 , i ;
nr = * data + + ;
if ( nr ! = ( u64 ) leader - > nr_members )
return - EINVAL ;
if ( read_format & PERF_FORMAT_TOTAL_TIME_ENABLED )
ena = * data + + ;
if ( read_format & PERF_FORMAT_TOTAL_TIME_RUNNING )
run = * data + + ;
v = ( struct sample_read_value * ) data ;
perf_evsel__set_count ( leader , cpu , thread ,
v [ 0 ] . value , ena , run ) ;
for ( i = 1 ; i < nr ; i + + ) {
struct perf_evsel * counter ;
counter = perf_evlist__id2evsel ( leader - > evlist , v [ i ] . id ) ;
if ( ! counter )
return - EINVAL ;
perf_evsel__set_count ( counter , cpu , thread ,
v [ i ] . value , ena , run ) ;
}
return 0 ;
}
static int
perf_evsel__read_group ( struct perf_evsel * leader , int cpu , int thread )
{
2017-11-09 12:03:40 -03:00
struct perf_stat_evsel * ps = leader - > stats ;
2017-07-26 14:02:05 +02:00
u64 read_format = leader - > attr . read_format ;
int size = perf_evsel__read_size ( leader ) ;
u64 * data = ps - > group_data ;
if ( ! ( read_format & PERF_FORMAT_ID ) )
return - EINVAL ;
if ( ! perf_evsel__is_group_leader ( leader ) )
return - EINVAL ;
if ( ! data ) {
data = zalloc ( size ) ;
if ( ! data )
return - ENOMEM ;
ps - > group_data = data ;
}
if ( FD ( leader , cpu , thread ) < 0 )
return - EINVAL ;
if ( readn ( FD ( leader , cpu , thread ) , data , size ) < = 0 )
return - errno ;
return perf_evsel__process_group_data ( leader , cpu , thread , data ) ;
}
int perf_evsel__read_counter ( struct perf_evsel * evsel , int cpu , int thread )
{
u64 read_format = evsel - > attr . read_format ;
if ( read_format & PERF_FORMAT_GROUP )
return perf_evsel__read_group ( evsel , cpu , thread ) ;
else
return perf_evsel__read_one ( evsel , cpu , thread ) ;
}
2011-01-03 17:45:52 -02:00
int __perf_evsel__read_on_cpu ( struct perf_evsel * evsel ,
int cpu , int thread , bool scale )
{
struct perf_counts_values count ;
size_t nv = scale ? 3 : 1 ;
if ( FD ( evsel , cpu , thread ) < 0 )
return - EINVAL ;
2015-06-26 11:29:11 +02:00
if ( evsel - > counts = = NULL & & perf_evsel__alloc_counts ( evsel , cpu + 1 , thread + 1 ) < 0 )
2011-01-04 00:13:17 -02:00
return - ENOMEM ;
2017-04-12 11:23:01 -07:00
if ( readn ( FD ( evsel , cpu , thread ) , & count , nv * sizeof ( u64 ) ) < = 0 )
2011-01-03 17:45:52 -02:00
return - errno ;
2015-06-26 11:29:11 +02:00
perf_evsel__compute_deltas ( evsel , cpu , thread , & count ) ;
2014-11-21 10:31:06 +01:00
perf_counts_values__scale ( & count , scale , NULL ) ;
2015-06-26 11:29:11 +02:00
* perf_counts ( evsel - > counts , cpu , thread ) = count ;
2011-01-03 17:45:52 -02:00
return 0 ;
}
perf tools: Enable grouping logic for parsed events
This patch adds a functionality that allows to create event groups
based on the way they are specified on the command line. Adding
functionality to the '{}' group syntax introduced in earlier patch.
The current '--group/-g' option behaviour remains intact. If you
specify it for record/stat/top command, all the specified events
become members of a single group with the first event as a group
leader.
With the new '{}' group syntax you can create group like:
# perf record -e '{cycles,faults}' ls
resulting in single event group containing 'cycles' and 'faults'
events, with cycles event as group leader.
All groups are created with regards to threads and cpus. Thus
recording an event group within a 2 threads on server with
4 CPUs will create 8 separate groups.
Examples (first event in brackets is group leader):
# 1 group (cpu-clock,task-clock)
perf record --group -e cpu-clock,task-clock ls
perf record -e '{cpu-clock,task-clock}' ls
# 2 groups (cpu-clock,task-clock) (minor-faults,major-faults)
perf record -e '{cpu-clock,task-clock},{minor-faults,major-faults}' ls
# 1 group (cpu-clock,task-clock,minor-faults,major-faults)
perf record --group -e cpu-clock,task-clock -e minor-faults,major-faults ls
perf record -e '{cpu-clock,task-clock,minor-faults,major-faults}' ls
# 2 groups (cpu-clock,task-clock) (minor-faults,major-faults)
perf record -e '{cpu-clock,task-clock} -e '{minor-faults,major-faults}' \
-e instructions ls
# 1 group
# (cpu-clock,task-clock,minor-faults,major-faults,instructions)
perf record --group -e cpu-clock,task-clock \
-e minor-faults,major-faults -e instructions ls perf record -e
'{cpu-clock,task-clock,minor-faults,major-faults,instructions}' ls
It's possible to use standard event modifier for a group, which spans
over all events in the group and updates each event modifier settings,
for example:
# perf record -r '{faults:k,cache-references}:p'
resulting in ':kp' modifier being used for 'faults' and ':p' modifier
being used for 'cache-references' event.
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/n/tip-ho42u0wcr8mn1otkalqi13qp@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-08-08 12:22:36 +02:00
static int get_group_fd ( struct perf_evsel * evsel , int cpu , int thread )
{
struct perf_evsel * leader = evsel - > leader ;
int fd ;
2012-11-29 15:38:30 +09:00
if ( perf_evsel__is_group_leader ( evsel ) )
perf tools: Enable grouping logic for parsed events
This patch adds a functionality that allows to create event groups
based on the way they are specified on the command line. Adding
functionality to the '{}' group syntax introduced in earlier patch.
The current '--group/-g' option behaviour remains intact. If you
specify it for record/stat/top command, all the specified events
become members of a single group with the first event as a group
leader.
With the new '{}' group syntax you can create group like:
# perf record -e '{cycles,faults}' ls
resulting in single event group containing 'cycles' and 'faults'
events, with cycles event as group leader.
All groups are created with regards to threads and cpus. Thus
recording an event group within a 2 threads on server with
4 CPUs will create 8 separate groups.
Examples (first event in brackets is group leader):
# 1 group (cpu-clock,task-clock)
perf record --group -e cpu-clock,task-clock ls
perf record -e '{cpu-clock,task-clock}' ls
# 2 groups (cpu-clock,task-clock) (minor-faults,major-faults)
perf record -e '{cpu-clock,task-clock},{minor-faults,major-faults}' ls
# 1 group (cpu-clock,task-clock,minor-faults,major-faults)
perf record --group -e cpu-clock,task-clock -e minor-faults,major-faults ls
perf record -e '{cpu-clock,task-clock,minor-faults,major-faults}' ls
# 2 groups (cpu-clock,task-clock) (minor-faults,major-faults)
perf record -e '{cpu-clock,task-clock} -e '{minor-faults,major-faults}' \
-e instructions ls
# 1 group
# (cpu-clock,task-clock,minor-faults,major-faults,instructions)
perf record --group -e cpu-clock,task-clock \
-e minor-faults,major-faults -e instructions ls perf record -e
'{cpu-clock,task-clock,minor-faults,major-faults,instructions}' ls
It's possible to use standard event modifier for a group, which spans
over all events in the group and updates each event modifier settings,
for example:
# perf record -r '{faults:k,cache-references}:p'
resulting in ':kp' modifier being used for 'faults' and ':p' modifier
being used for 'cache-references' event.
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/n/tip-ho42u0wcr8mn1otkalqi13qp@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-08-08 12:22:36 +02:00
return - 1 ;
/*
* Leader must be already processed / open ,
* if not it ' s a bug .
*/
BUG_ON ( ! leader - > fd ) ;
fd = FD ( leader , cpu , thread ) ;
BUG_ON ( fd = = - 1 ) ;
return fd ;
}
perf tools: Merge all perf_event_attr print functions
Currently there's 3 (that I found) different and incomplete
implementations of printing perf_event_attr.
This is quite silly. Merge the lot.
While this patch does not retain the exact form all printing that I
found is debug output and thus it should not be critical.
Also, I cannot find a single print_event_desc() caller.
Pre:
$ perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
type 0
size 104
config 0
sample_period 4000
sample_freq 4000
sample_type 0x107
read_format 0
disabled 1 inherit 1
pinned 0 exclusive 0
exclude_user 0 exclude_kernel 0
exclude_hv 0 exclude_idle 0
mmap 1 comm 1
mmap2 1 comm_exec 1
freq 1 inherit_stat 0
enable_on_exec 1 task 1
watermark 0 precise_ip 0
mmap_data 0 sample_id_all 1
exclude_host 0 exclude_guest 1
excl.callchain_kern 0 excl.callchain_user 0
wakeup_events 0
wakeup_watermark 0
bp_type 0
bp_addr 0
config1 0
bp_len 0
config2 0
branch_sample_type 0
sample_regs_user 0
sample_stack_user 0
sample_regs_intr 0
------------------------------------------------------------
$ perf evlist -vv
cycles: sample_freq=4000, size: 104, sample_type: IP|TID|TIME|PERIOD,
disabled: 1, inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1,
freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1
Post:
$ ./perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|PERIOD
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
$ ./perf evlist -vv
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq:
1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1,
mmap2: 1, comm_exec: 1
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150407091150.644238729@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-07 11:09:54 +02:00
struct bit_names {
int bit ;
const char * name ;
} ;
static void __p_bits ( char * buf , size_t size , u64 value , struct bit_names * bits )
{
bool first_bit = true ;
int i = 0 ;
do {
if ( value & bits [ i ] . bit ) {
buf + = scnprintf ( buf , size , " %s%s " , first_bit ? " " : " | " , bits [ i ] . name ) ;
first_bit = false ;
}
} while ( bits [ + + i ] . name ! = NULL ) ;
}
static void __p_sample_type ( char * buf , size_t size , u64 value )
{
# define bit_name(n) { PERF_SAMPLE_##n, #n }
struct bit_names bits [ ] = {
bit_name ( IP ) , bit_name ( TID ) , bit_name ( TIME ) , bit_name ( ADDR ) ,
bit_name ( READ ) , bit_name ( CALLCHAIN ) , bit_name ( ID ) , bit_name ( CPU ) ,
bit_name ( PERIOD ) , bit_name ( STREAM_ID ) , bit_name ( RAW ) ,
bit_name ( BRANCH_STACK ) , bit_name ( REGS_USER ) , bit_name ( STACK_USER ) ,
2015-10-05 20:06:02 +02:00
bit_name ( IDENTIFIER ) , bit_name ( REGS_INTR ) , bit_name ( DATA_SRC ) ,
2017-08-29 13:11:08 -04:00
bit_name ( WEIGHT ) , bit_name ( PHYS_ADDR ) ,
perf tools: Merge all perf_event_attr print functions
Currently there's 3 (that I found) different and incomplete
implementations of printing perf_event_attr.
This is quite silly. Merge the lot.
While this patch does not retain the exact form all printing that I
found is debug output and thus it should not be critical.
Also, I cannot find a single print_event_desc() caller.
Pre:
$ perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
type 0
size 104
config 0
sample_period 4000
sample_freq 4000
sample_type 0x107
read_format 0
disabled 1 inherit 1
pinned 0 exclusive 0
exclude_user 0 exclude_kernel 0
exclude_hv 0 exclude_idle 0
mmap 1 comm 1
mmap2 1 comm_exec 1
freq 1 inherit_stat 0
enable_on_exec 1 task 1
watermark 0 precise_ip 0
mmap_data 0 sample_id_all 1
exclude_host 0 exclude_guest 1
excl.callchain_kern 0 excl.callchain_user 0
wakeup_events 0
wakeup_watermark 0
bp_type 0
bp_addr 0
config1 0
bp_len 0
config2 0
branch_sample_type 0
sample_regs_user 0
sample_stack_user 0
sample_regs_intr 0
------------------------------------------------------------
$ perf evlist -vv
cycles: sample_freq=4000, size: 104, sample_type: IP|TID|TIME|PERIOD,
disabled: 1, inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1,
freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1
Post:
$ ./perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|PERIOD
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
$ ./perf evlist -vv
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq:
1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1,
mmap2: 1, comm_exec: 1
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150407091150.644238729@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-07 11:09:54 +02:00
{ . name = NULL , }
} ;
# undef bit_name
__p_bits ( buf , size , value , bits ) ;
}
perf evlist: Decode perf_event_attr->branch_sample_type
While trying to use --call-graph lbr in 'perf trace', since we only are
interested in the callchain for userspace, up to the callchain, I found
that 'perf evlist' is not decoding the branch_sample_type field, fix it.
Before:
# perf record --call-graph lbr usleep 1
# perf evlist -v
cycles:ppp: size: 112, { sample_period, sample_freq }: 4000,
sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK,
disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1,
precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1,
comm_exec: 1, branch_sample_type: 51201
^^^^^^^^^^^^^^^^^^^^^^^^^
After:
# perf evlist -v
cycles:ppp: size: 112, { sample_period, sample_freq }: 4000,
sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK,
disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1,
precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1,
comm_exec: 1, branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-hozai7974u0ulgx13k96fcaw@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-25 16:45:29 -03:00
static void __p_branch_sample_type ( char * buf , size_t size , u64 value )
{
# define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
struct bit_names bits [ ] = {
bit_name ( USER ) , bit_name ( KERNEL ) , bit_name ( HV ) , bit_name ( ANY ) ,
bit_name ( ANY_CALL ) , bit_name ( ANY_RETURN ) , bit_name ( IND_CALL ) ,
bit_name ( ABORT_TX ) , bit_name ( IN_TX ) , bit_name ( NO_TX ) ,
bit_name ( COND ) , bit_name ( CALL_STACK ) , bit_name ( IND_JUMP ) ,
bit_name ( CALL ) , bit_name ( NO_FLAGS ) , bit_name ( NO_CYCLES ) ,
{ . name = NULL , }
} ;
# undef bit_name
__p_bits ( buf , size , value , bits ) ;
}
perf tools: Merge all perf_event_attr print functions
Currently there's 3 (that I found) different and incomplete
implementations of printing perf_event_attr.
This is quite silly. Merge the lot.
While this patch does not retain the exact form all printing that I
found is debug output and thus it should not be critical.
Also, I cannot find a single print_event_desc() caller.
Pre:
$ perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
type 0
size 104
config 0
sample_period 4000
sample_freq 4000
sample_type 0x107
read_format 0
disabled 1 inherit 1
pinned 0 exclusive 0
exclude_user 0 exclude_kernel 0
exclude_hv 0 exclude_idle 0
mmap 1 comm 1
mmap2 1 comm_exec 1
freq 1 inherit_stat 0
enable_on_exec 1 task 1
watermark 0 precise_ip 0
mmap_data 0 sample_id_all 1
exclude_host 0 exclude_guest 1
excl.callchain_kern 0 excl.callchain_user 0
wakeup_events 0
wakeup_watermark 0
bp_type 0
bp_addr 0
config1 0
bp_len 0
config2 0
branch_sample_type 0
sample_regs_user 0
sample_stack_user 0
sample_regs_intr 0
------------------------------------------------------------
$ perf evlist -vv
cycles: sample_freq=4000, size: 104, sample_type: IP|TID|TIME|PERIOD,
disabled: 1, inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1,
freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1
Post:
$ ./perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|PERIOD
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
$ ./perf evlist -vv
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq:
1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1,
mmap2: 1, comm_exec: 1
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150407091150.644238729@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-07 11:09:54 +02:00
static void __p_read_format ( char * buf , size_t size , u64 value )
{
# define bit_name(n) { PERF_FORMAT_##n, #n }
struct bit_names bits [ ] = {
bit_name ( TOTAL_TIME_ENABLED ) , bit_name ( TOTAL_TIME_RUNNING ) ,
bit_name ( ID ) , bit_name ( GROUP ) ,
{ . name = NULL , }
} ;
# undef bit_name
__p_bits ( buf , size , value , bits ) ;
}
# define BUF_SIZE 1024
2015-06-11 15:51:04 +03:00
# define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
perf tools: Merge all perf_event_attr print functions
Currently there's 3 (that I found) different and incomplete
implementations of printing perf_event_attr.
This is quite silly. Merge the lot.
While this patch does not retain the exact form all printing that I
found is debug output and thus it should not be critical.
Also, I cannot find a single print_event_desc() caller.
Pre:
$ perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
type 0
size 104
config 0
sample_period 4000
sample_freq 4000
sample_type 0x107
read_format 0
disabled 1 inherit 1
pinned 0 exclusive 0
exclude_user 0 exclude_kernel 0
exclude_hv 0 exclude_idle 0
mmap 1 comm 1
mmap2 1 comm_exec 1
freq 1 inherit_stat 0
enable_on_exec 1 task 1
watermark 0 precise_ip 0
mmap_data 0 sample_id_all 1
exclude_host 0 exclude_guest 1
excl.callchain_kern 0 excl.callchain_user 0
wakeup_events 0
wakeup_watermark 0
bp_type 0
bp_addr 0
config1 0
bp_len 0
config2 0
branch_sample_type 0
sample_regs_user 0
sample_stack_user 0
sample_regs_intr 0
------------------------------------------------------------
$ perf evlist -vv
cycles: sample_freq=4000, size: 104, sample_type: IP|TID|TIME|PERIOD,
disabled: 1, inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1,
freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1
Post:
$ ./perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|PERIOD
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
$ ./perf evlist -vv
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq:
1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1,
mmap2: 1, comm_exec: 1
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150407091150.644238729@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-07 11:09:54 +02:00
# define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
# define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
# define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
perf evlist: Decode perf_event_attr->branch_sample_type
While trying to use --call-graph lbr in 'perf trace', since we only are
interested in the callchain for userspace, up to the callchain, I found
that 'perf evlist' is not decoding the branch_sample_type field, fix it.
Before:
# perf record --call-graph lbr usleep 1
# perf evlist -v
cycles:ppp: size: 112, { sample_period, sample_freq }: 4000,
sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK,
disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1,
precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1,
comm_exec: 1, branch_sample_type: 51201
^^^^^^^^^^^^^^^^^^^^^^^^^
After:
# perf evlist -v
cycles:ppp: size: 112, { sample_period, sample_freq }: 4000,
sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK,
disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1,
precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1,
comm_exec: 1, branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-hozai7974u0ulgx13k96fcaw@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-25 16:45:29 -03:00
# define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
perf tools: Merge all perf_event_attr print functions
Currently there's 3 (that I found) different and incomplete
implementations of printing perf_event_attr.
This is quite silly. Merge the lot.
While this patch does not retain the exact form all printing that I
found is debug output and thus it should not be critical.
Also, I cannot find a single print_event_desc() caller.
Pre:
$ perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
type 0
size 104
config 0
sample_period 4000
sample_freq 4000
sample_type 0x107
read_format 0
disabled 1 inherit 1
pinned 0 exclusive 0
exclude_user 0 exclude_kernel 0
exclude_hv 0 exclude_idle 0
mmap 1 comm 1
mmap2 1 comm_exec 1
freq 1 inherit_stat 0
enable_on_exec 1 task 1
watermark 0 precise_ip 0
mmap_data 0 sample_id_all 1
exclude_host 0 exclude_guest 1
excl.callchain_kern 0 excl.callchain_user 0
wakeup_events 0
wakeup_watermark 0
bp_type 0
bp_addr 0
config1 0
bp_len 0
config2 0
branch_sample_type 0
sample_regs_user 0
sample_stack_user 0
sample_regs_intr 0
------------------------------------------------------------
$ perf evlist -vv
cycles: sample_freq=4000, size: 104, sample_type: IP|TID|TIME|PERIOD,
disabled: 1, inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1,
freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1
Post:
$ ./perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|PERIOD
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
$ ./perf evlist -vv
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq:
1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1,
mmap2: 1, comm_exec: 1
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150407091150.644238729@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-07 11:09:54 +02:00
# define p_read_format(val) __p_read_format(buf, BUF_SIZE, val)
# define PRINT_ATTRn(_n, _f, _p) \
do { \
if ( attr - > _f ) { \
_p ( attr - > _f ) ; \
ret + = attr__fprintf ( fp , _n , buf , priv ) ; \
} \
} while ( 0 )
# define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p)
int perf_event_attr__fprintf ( FILE * fp , struct perf_event_attr * attr ,
attr__fprintf_f attr__fprintf , void * priv )
{
char buf [ BUF_SIZE ] ;
int ret = 0 ;
PRINT_ATTRf ( type , p_unsigned ) ;
PRINT_ATTRf ( size , p_unsigned ) ;
PRINT_ATTRf ( config , p_hex ) ;
PRINT_ATTRn ( " { sample_period, sample_freq } " , sample_period , p_unsigned ) ;
PRINT_ATTRf ( sample_type , p_sample_type ) ;
PRINT_ATTRf ( read_format , p_read_format ) ;
PRINT_ATTRf ( disabled , p_unsigned ) ;
PRINT_ATTRf ( inherit , p_unsigned ) ;
PRINT_ATTRf ( pinned , p_unsigned ) ;
PRINT_ATTRf ( exclusive , p_unsigned ) ;
PRINT_ATTRf ( exclude_user , p_unsigned ) ;
PRINT_ATTRf ( exclude_kernel , p_unsigned ) ;
PRINT_ATTRf ( exclude_hv , p_unsigned ) ;
PRINT_ATTRf ( exclude_idle , p_unsigned ) ;
PRINT_ATTRf ( mmap , p_unsigned ) ;
PRINT_ATTRf ( comm , p_unsigned ) ;
PRINT_ATTRf ( freq , p_unsigned ) ;
PRINT_ATTRf ( inherit_stat , p_unsigned ) ;
PRINT_ATTRf ( enable_on_exec , p_unsigned ) ;
PRINT_ATTRf ( task , p_unsigned ) ;
PRINT_ATTRf ( watermark , p_unsigned ) ;
PRINT_ATTRf ( precise_ip , p_unsigned ) ;
PRINT_ATTRf ( mmap_data , p_unsigned ) ;
PRINT_ATTRf ( sample_id_all , p_unsigned ) ;
PRINT_ATTRf ( exclude_host , p_unsigned ) ;
PRINT_ATTRf ( exclude_guest , p_unsigned ) ;
PRINT_ATTRf ( exclude_callchain_kernel , p_unsigned ) ;
PRINT_ATTRf ( exclude_callchain_user , p_unsigned ) ;
PRINT_ATTRf ( mmap2 , p_unsigned ) ;
PRINT_ATTRf ( comm_exec , p_unsigned ) ;
PRINT_ATTRf ( use_clockid , p_unsigned ) ;
2015-07-21 12:44:03 +03:00
PRINT_ATTRf ( context_switch , p_unsigned ) ;
2016-05-09 18:08:33 -03:00
PRINT_ATTRf ( write_backward , p_unsigned ) ;
2018-01-07 17:03:46 +01:00
PRINT_ATTRf ( namespaces , p_unsigned ) ;
perf tools: Merge all perf_event_attr print functions
Currently there's 3 (that I found) different and incomplete
implementations of printing perf_event_attr.
This is quite silly. Merge the lot.
While this patch does not retain the exact form all printing that I
found is debug output and thus it should not be critical.
Also, I cannot find a single print_event_desc() caller.
Pre:
$ perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
type 0
size 104
config 0
sample_period 4000
sample_freq 4000
sample_type 0x107
read_format 0
disabled 1 inherit 1
pinned 0 exclusive 0
exclude_user 0 exclude_kernel 0
exclude_hv 0 exclude_idle 0
mmap 1 comm 1
mmap2 1 comm_exec 1
freq 1 inherit_stat 0
enable_on_exec 1 task 1
watermark 0 precise_ip 0
mmap_data 0 sample_id_all 1
exclude_host 0 exclude_guest 1
excl.callchain_kern 0 excl.callchain_user 0
wakeup_events 0
wakeup_watermark 0
bp_type 0
bp_addr 0
config1 0
bp_len 0
config2 0
branch_sample_type 0
sample_regs_user 0
sample_stack_user 0
sample_regs_intr 0
------------------------------------------------------------
$ perf evlist -vv
cycles: sample_freq=4000, size: 104, sample_type: IP|TID|TIME|PERIOD,
disabled: 1, inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1,
freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1
Post:
$ ./perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|PERIOD
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
$ ./perf evlist -vv
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq:
1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1,
mmap2: 1, comm_exec: 1
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150407091150.644238729@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-07 11:09:54 +02:00
PRINT_ATTRn ( " { wakeup_events, wakeup_watermark } " , wakeup_events , p_unsigned ) ;
PRINT_ATTRf ( bp_type , p_unsigned ) ;
PRINT_ATTRn ( " { bp_addr, config1 } " , bp_addr , p_hex ) ;
PRINT_ATTRn ( " { bp_len, config2 } " , bp_len , p_hex ) ;
perf evlist: Decode perf_event_attr->branch_sample_type
While trying to use --call-graph lbr in 'perf trace', since we only are
interested in the callchain for userspace, up to the callchain, I found
that 'perf evlist' is not decoding the branch_sample_type field, fix it.
Before:
# perf record --call-graph lbr usleep 1
# perf evlist -v
cycles:ppp: size: 112, { sample_period, sample_freq }: 4000,
sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK,
disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1,
precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1,
comm_exec: 1, branch_sample_type: 51201
^^^^^^^^^^^^^^^^^^^^^^^^^
After:
# perf evlist -v
cycles:ppp: size: 112, { sample_period, sample_freq }: 4000,
sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|BRANCH_STACK,
disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1,
precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1,
comm_exec: 1, branch_sample_type: USER|CALL_STACK|NO_FLAGS|NO_CYCLES
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-hozai7974u0ulgx13k96fcaw@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-25 16:45:29 -03:00
PRINT_ATTRf ( branch_sample_type , p_branch_sample_type ) ;
perf tools: Merge all perf_event_attr print functions
Currently there's 3 (that I found) different and incomplete
implementations of printing perf_event_attr.
This is quite silly. Merge the lot.
While this patch does not retain the exact form all printing that I
found is debug output and thus it should not be critical.
Also, I cannot find a single print_event_desc() caller.
Pre:
$ perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
type 0
size 104
config 0
sample_period 4000
sample_freq 4000
sample_type 0x107
read_format 0
disabled 1 inherit 1
pinned 0 exclusive 0
exclude_user 0 exclude_kernel 0
exclude_hv 0 exclude_idle 0
mmap 1 comm 1
mmap2 1 comm_exec 1
freq 1 inherit_stat 0
enable_on_exec 1 task 1
watermark 0 precise_ip 0
mmap_data 0 sample_id_all 1
exclude_host 0 exclude_guest 1
excl.callchain_kern 0 excl.callchain_user 0
wakeup_events 0
wakeup_watermark 0
bp_type 0
bp_addr 0
config1 0
bp_len 0
config2 0
branch_sample_type 0
sample_regs_user 0
sample_stack_user 0
sample_regs_intr 0
------------------------------------------------------------
$ perf evlist -vv
cycles: sample_freq=4000, size: 104, sample_type: IP|TID|TIME|PERIOD,
disabled: 1, inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1,
freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1
Post:
$ ./perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|PERIOD
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
$ ./perf evlist -vv
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq:
1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1,
mmap2: 1, comm_exec: 1
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150407091150.644238729@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-07 11:09:54 +02:00
PRINT_ATTRf ( sample_regs_user , p_hex ) ;
PRINT_ATTRf ( sample_stack_user , p_unsigned ) ;
PRINT_ATTRf ( clockid , p_signed ) ;
PRINT_ATTRf ( sample_regs_intr , p_hex ) ;
2015-04-09 18:54:06 +03:00
PRINT_ATTRf ( aux_watermark , p_unsigned ) ;
perf tools: Per event max-stack settings
The tooling counterpart, now it is possible to do:
# perf record -e sched:sched_switch/max-stack=10/ -e cycles/call-graph=dwarf,max-stack=4/ -e cpu-cycles/call-graph=dwarf,max-stack=1024/ usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.052 MB perf.data (5 samples) ]
# perf evlist -v
sched:sched_switch: type: 2, size: 112, config: 0x110, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|RAW|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, sample_max_stack: 10
cycles/call-graph=dwarf,max-stack=4/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 4
cpu-cycles/call-graph=dwarf,max-stack=1024/: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CALLCHAIN|PERIOD|REGS_USER|STACK_USER|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, freq: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1, exclude_callchain_user: 1, sample_regs_user: 0xff0fff, sample_stack_user: 8192, sample_max_stack: 1024
# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events
Using just /max-stack=N/ means /call-graph=fp,max-stack=N/, that should
be further configurable by means of some .perfconfig knob.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/n/tip-kolmn1yo40p7jhswxwrc7rrd@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-04-28 19:03:42 -03:00
PRINT_ATTRf ( sample_max_stack , p_unsigned ) ;
2013-08-14 15:48:24 +03:00
return ret ;
}
perf tools: Merge all perf_event_attr print functions
Currently there's 3 (that I found) different and incomplete
implementations of printing perf_event_attr.
This is quite silly. Merge the lot.
While this patch does not retain the exact form all printing that I
found is debug output and thus it should not be critical.
Also, I cannot find a single print_event_desc() caller.
Pre:
$ perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
type 0
size 104
config 0
sample_period 4000
sample_freq 4000
sample_type 0x107
read_format 0
disabled 1 inherit 1
pinned 0 exclusive 0
exclude_user 0 exclude_kernel 0
exclude_hv 0 exclude_idle 0
mmap 1 comm 1
mmap2 1 comm_exec 1
freq 1 inherit_stat 0
enable_on_exec 1 task 1
watermark 0 precise_ip 0
mmap_data 0 sample_id_all 1
exclude_host 0 exclude_guest 1
excl.callchain_kern 0 excl.callchain_user 0
wakeup_events 0
wakeup_watermark 0
bp_type 0
bp_addr 0
config1 0
bp_len 0
config2 0
branch_sample_type 0
sample_regs_user 0
sample_stack_user 0
sample_regs_intr 0
------------------------------------------------------------
$ perf evlist -vv
cycles: sample_freq=4000, size: 104, sample_type: IP|TID|TIME|PERIOD,
disabled: 1, inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1,
freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1
Post:
$ ./perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|PERIOD
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
$ ./perf evlist -vv
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq:
1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1,
mmap2: 1, comm_exec: 1
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150407091150.644238729@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-07 11:09:54 +02:00
static int __open_attr__fprintf ( FILE * fp , const char * name , const char * val ,
2017-06-16 12:18:27 -03:00
void * priv __maybe_unused )
perf tools: Merge all perf_event_attr print functions
Currently there's 3 (that I found) different and incomplete
implementations of printing perf_event_attr.
This is quite silly. Merge the lot.
While this patch does not retain the exact form all printing that I
found is debug output and thus it should not be critical.
Also, I cannot find a single print_event_desc() caller.
Pre:
$ perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
type 0
size 104
config 0
sample_period 4000
sample_freq 4000
sample_type 0x107
read_format 0
disabled 1 inherit 1
pinned 0 exclusive 0
exclude_user 0 exclude_kernel 0
exclude_hv 0 exclude_idle 0
mmap 1 comm 1
mmap2 1 comm_exec 1
freq 1 inherit_stat 0
enable_on_exec 1 task 1
watermark 0 precise_ip 0
mmap_data 0 sample_id_all 1
exclude_host 0 exclude_guest 1
excl.callchain_kern 0 excl.callchain_user 0
wakeup_events 0
wakeup_watermark 0
bp_type 0
bp_addr 0
config1 0
bp_len 0
config2 0
branch_sample_type 0
sample_regs_user 0
sample_stack_user 0
sample_regs_intr 0
------------------------------------------------------------
$ perf evlist -vv
cycles: sample_freq=4000, size: 104, sample_type: IP|TID|TIME|PERIOD,
disabled: 1, inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1,
freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1
Post:
$ ./perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|PERIOD
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
$ ./perf evlist -vv
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq:
1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1,
mmap2: 1, comm_exec: 1
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150407091150.644238729@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-07 11:09:54 +02:00
{
return fprintf ( fp , " %-32s %s \n " , name , val ) ;
}
perf evsel: Enable ignore_missing_thread for pid option
While monitoring a multithread process with pid option, perf sometimes
may return sys_perf_event_open failure with 3(No such process) if any of
the process's threads die before we open the event. However, we want
perf continue monitoring the remaining threads and do not exit with
error.
Here, the patch enables perf_evsel::ignore_missing_thread for -p option
to ignore complete failure if any of threads die before we open the event.
But it may still return sys_perf_event_open failure with 22(Invalid) if we
monitors several event groups.
sys_perf_event_open: pid 28960 cpu 40 group_fd 118202 flags 0x8
sys_perf_event_open: pid 28961 cpu 40 group_fd 118203 flags 0x8
WARNING: Ignored open failure for pid 28962
sys_perf_event_open: pid 28962 cpu 40 group_fd [118203] flags 0x8
sys_perf_event_open failed, error -22
That is because when we ignore a missing thread, we change the thread_idx
without dealing with its fds, FD(evsel, cpu, thread). Then get_group_fd()
may return a wrong group_fd for the next thread and sys_perf_event_open()
return with 22.
sys_perf_event_open(){
...
if (group_fd != -1)
perf_fget_light()//to get corresponding group_leader by group_fd
...
if (group_leader)
if (group_leader->ctx->task != ctx->task)//should on the same task
goto err_context
...
}
This patch also fixes this bug by introducing perf_evsel__remove_fd() and
update_fds to allow removing fds for the missing thread.
Changes since v1:
- Change group_fd__remove() into a more genetic way without changing code logic
- Remove redundant condition
Changes since v2:
- Use a proper function name and add some comment.
- Multiline comment style fixes.
Committer testing:
Before this patch the recently added 'perf stat --per-thread' for system
wide counting would race while enumerating all threads using /proc:
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]#
When, say, the kernel was being built, so lots of shortlived threads,
after this patch this doesn't happen.
Signed-off-by: Mengting Zhang <zhangmengting@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Cheng Jian <cj.chengjian@huawei.com>
Cc: Li Bin <huawei.libin@huawei.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1513148513-6974-1-git-send-email-zhangmengting@huawei.com
[ Remove one use 'evlist' alias variable ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-12-13 15:01:53 +08:00
static void perf_evsel__remove_fd ( struct perf_evsel * pos ,
int nr_cpus , int nr_threads ,
int thread_idx )
{
for ( int cpu = 0 ; cpu < nr_cpus ; cpu + + )
for ( int thread = thread_idx ; thread < nr_threads - 1 ; thread + + )
FD ( pos , cpu , thread ) = FD ( pos , cpu , thread + 1 ) ;
}
static int update_fds ( struct perf_evsel * evsel ,
int nr_cpus , int cpu_idx ,
int nr_threads , int thread_idx )
{
struct perf_evsel * pos ;
if ( cpu_idx > = nr_cpus | | thread_idx > = nr_threads )
return - EINVAL ;
evlist__for_each_entry ( evsel - > evlist , pos ) {
nr_cpus = pos ! = evsel ? nr_cpus : cpu_idx ;
perf_evsel__remove_fd ( pos , nr_cpus , nr_threads , thread_idx ) ;
/*
* Since fds for next evsel has not been created ,
* there is no need to iterate whole event list .
*/
if ( pos = = evsel )
break ;
}
return 0 ;
}
2016-12-13 08:46:22 +01:00
static bool ignore_missing_thread ( struct perf_evsel * evsel ,
perf evsel: Enable ignore_missing_thread for pid option
While monitoring a multithread process with pid option, perf sometimes
may return sys_perf_event_open failure with 3(No such process) if any of
the process's threads die before we open the event. However, we want
perf continue monitoring the remaining threads and do not exit with
error.
Here, the patch enables perf_evsel::ignore_missing_thread for -p option
to ignore complete failure if any of threads die before we open the event.
But it may still return sys_perf_event_open failure with 22(Invalid) if we
monitors several event groups.
sys_perf_event_open: pid 28960 cpu 40 group_fd 118202 flags 0x8
sys_perf_event_open: pid 28961 cpu 40 group_fd 118203 flags 0x8
WARNING: Ignored open failure for pid 28962
sys_perf_event_open: pid 28962 cpu 40 group_fd [118203] flags 0x8
sys_perf_event_open failed, error -22
That is because when we ignore a missing thread, we change the thread_idx
without dealing with its fds, FD(evsel, cpu, thread). Then get_group_fd()
may return a wrong group_fd for the next thread and sys_perf_event_open()
return with 22.
sys_perf_event_open(){
...
if (group_fd != -1)
perf_fget_light()//to get corresponding group_leader by group_fd
...
if (group_leader)
if (group_leader->ctx->task != ctx->task)//should on the same task
goto err_context
...
}
This patch also fixes this bug by introducing perf_evsel__remove_fd() and
update_fds to allow removing fds for the missing thread.
Changes since v1:
- Change group_fd__remove() into a more genetic way without changing code logic
- Remove redundant condition
Changes since v2:
- Use a proper function name and add some comment.
- Multiline comment style fixes.
Committer testing:
Before this patch the recently added 'perf stat --per-thread' for system
wide counting would race while enumerating all threads using /proc:
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]#
When, say, the kernel was being built, so lots of shortlived threads,
after this patch this doesn't happen.
Signed-off-by: Mengting Zhang <zhangmengting@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Cheng Jian <cj.chengjian@huawei.com>
Cc: Li Bin <huawei.libin@huawei.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1513148513-6974-1-git-send-email-zhangmengting@huawei.com
[ Remove one use 'evlist' alias variable ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-12-13 15:01:53 +08:00
int nr_cpus , int cpu ,
2016-12-13 08:46:22 +01:00
struct thread_map * threads ,
int thread , int err )
{
perf evsel: Enable ignore_missing_thread for pid option
While monitoring a multithread process with pid option, perf sometimes
may return sys_perf_event_open failure with 3(No such process) if any of
the process's threads die before we open the event. However, we want
perf continue monitoring the remaining threads and do not exit with
error.
Here, the patch enables perf_evsel::ignore_missing_thread for -p option
to ignore complete failure if any of threads die before we open the event.
But it may still return sys_perf_event_open failure with 22(Invalid) if we
monitors several event groups.
sys_perf_event_open: pid 28960 cpu 40 group_fd 118202 flags 0x8
sys_perf_event_open: pid 28961 cpu 40 group_fd 118203 flags 0x8
WARNING: Ignored open failure for pid 28962
sys_perf_event_open: pid 28962 cpu 40 group_fd [118203] flags 0x8
sys_perf_event_open failed, error -22
That is because when we ignore a missing thread, we change the thread_idx
without dealing with its fds, FD(evsel, cpu, thread). Then get_group_fd()
may return a wrong group_fd for the next thread and sys_perf_event_open()
return with 22.
sys_perf_event_open(){
...
if (group_fd != -1)
perf_fget_light()//to get corresponding group_leader by group_fd
...
if (group_leader)
if (group_leader->ctx->task != ctx->task)//should on the same task
goto err_context
...
}
This patch also fixes this bug by introducing perf_evsel__remove_fd() and
update_fds to allow removing fds for the missing thread.
Changes since v1:
- Change group_fd__remove() into a more genetic way without changing code logic
- Remove redundant condition
Changes since v2:
- Use a proper function name and add some comment.
- Multiline comment style fixes.
Committer testing:
Before this patch the recently added 'perf stat --per-thread' for system
wide counting would race while enumerating all threads using /proc:
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]#
When, say, the kernel was being built, so lots of shortlived threads,
after this patch this doesn't happen.
Signed-off-by: Mengting Zhang <zhangmengting@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Cheng Jian <cj.chengjian@huawei.com>
Cc: Li Bin <huawei.libin@huawei.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1513148513-6974-1-git-send-email-zhangmengting@huawei.com
[ Remove one use 'evlist' alias variable ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-12-13 15:01:53 +08:00
pid_t ignore_pid = thread_map__pid ( threads , thread ) ;
2016-12-13 08:46:22 +01:00
if ( ! evsel - > ignore_missing_thread )
return false ;
/* The system wide setup does not work with threads. */
if ( evsel - > system_wide )
return false ;
/* The -ESRCH is perf event syscall errno for pid's not found. */
if ( err ! = - ESRCH )
return false ;
/* If there's only one thread, let it fail. */
if ( threads - > nr = = 1 )
return false ;
perf evsel: Enable ignore_missing_thread for pid option
While monitoring a multithread process with pid option, perf sometimes
may return sys_perf_event_open failure with 3(No such process) if any of
the process's threads die before we open the event. However, we want
perf continue monitoring the remaining threads and do not exit with
error.
Here, the patch enables perf_evsel::ignore_missing_thread for -p option
to ignore complete failure if any of threads die before we open the event.
But it may still return sys_perf_event_open failure with 22(Invalid) if we
monitors several event groups.
sys_perf_event_open: pid 28960 cpu 40 group_fd 118202 flags 0x8
sys_perf_event_open: pid 28961 cpu 40 group_fd 118203 flags 0x8
WARNING: Ignored open failure for pid 28962
sys_perf_event_open: pid 28962 cpu 40 group_fd [118203] flags 0x8
sys_perf_event_open failed, error -22
That is because when we ignore a missing thread, we change the thread_idx
without dealing with its fds, FD(evsel, cpu, thread). Then get_group_fd()
may return a wrong group_fd for the next thread and sys_perf_event_open()
return with 22.
sys_perf_event_open(){
...
if (group_fd != -1)
perf_fget_light()//to get corresponding group_leader by group_fd
...
if (group_leader)
if (group_leader->ctx->task != ctx->task)//should on the same task
goto err_context
...
}
This patch also fixes this bug by introducing perf_evsel__remove_fd() and
update_fds to allow removing fds for the missing thread.
Changes since v1:
- Change group_fd__remove() into a more genetic way without changing code logic
- Remove redundant condition
Changes since v2:
- Use a proper function name and add some comment.
- Multiline comment style fixes.
Committer testing:
Before this patch the recently added 'perf stat --per-thread' for system
wide counting would race while enumerating all threads using /proc:
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]#
When, say, the kernel was being built, so lots of shortlived threads,
after this patch this doesn't happen.
Signed-off-by: Mengting Zhang <zhangmengting@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Cheng Jian <cj.chengjian@huawei.com>
Cc: Li Bin <huawei.libin@huawei.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1513148513-6974-1-git-send-email-zhangmengting@huawei.com
[ Remove one use 'evlist' alias variable ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-12-13 15:01:53 +08:00
/*
* We should remove fd for missing_thread first
* because thread_map__remove ( ) will decrease threads - > nr .
*/
if ( update_fds ( evsel , nr_cpus , cpu , threads - > nr , thread ) )
return false ;
2016-12-13 08:46:22 +01:00
if ( thread_map__remove ( threads , thread ) )
return false ;
pr_warning ( " WARNING: Ignored open failure for pid %d \n " ,
perf evsel: Enable ignore_missing_thread for pid option
While monitoring a multithread process with pid option, perf sometimes
may return sys_perf_event_open failure with 3(No such process) if any of
the process's threads die before we open the event. However, we want
perf continue monitoring the remaining threads and do not exit with
error.
Here, the patch enables perf_evsel::ignore_missing_thread for -p option
to ignore complete failure if any of threads die before we open the event.
But it may still return sys_perf_event_open failure with 22(Invalid) if we
monitors several event groups.
sys_perf_event_open: pid 28960 cpu 40 group_fd 118202 flags 0x8
sys_perf_event_open: pid 28961 cpu 40 group_fd 118203 flags 0x8
WARNING: Ignored open failure for pid 28962
sys_perf_event_open: pid 28962 cpu 40 group_fd [118203] flags 0x8
sys_perf_event_open failed, error -22
That is because when we ignore a missing thread, we change the thread_idx
without dealing with its fds, FD(evsel, cpu, thread). Then get_group_fd()
may return a wrong group_fd for the next thread and sys_perf_event_open()
return with 22.
sys_perf_event_open(){
...
if (group_fd != -1)
perf_fget_light()//to get corresponding group_leader by group_fd
...
if (group_leader)
if (group_leader->ctx->task != ctx->task)//should on the same task
goto err_context
...
}
This patch also fixes this bug by introducing perf_evsel__remove_fd() and
update_fds to allow removing fds for the missing thread.
Changes since v1:
- Change group_fd__remove() into a more genetic way without changing code logic
- Remove redundant condition
Changes since v2:
- Use a proper function name and add some comment.
- Multiline comment style fixes.
Committer testing:
Before this patch the recently added 'perf stat --per-thread' for system
wide counting would race while enumerating all threads using /proc:
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]#
When, say, the kernel was being built, so lots of shortlived threads,
after this patch this doesn't happen.
Signed-off-by: Mengting Zhang <zhangmengting@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Cheng Jian <cj.chengjian@huawei.com>
Cc: Li Bin <huawei.libin@huawei.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1513148513-6974-1-git-send-email-zhangmengting@huawei.com
[ Remove one use 'evlist' alias variable ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-12-13 15:01:53 +08:00
ignore_pid ) ;
2016-12-13 08:46:22 +01:00
return true ;
}
2017-02-14 10:59:04 -03:00
int perf_evsel__open ( struct perf_evsel * evsel , struct cpu_map * cpus ,
struct thread_map * threads )
2011-01-03 17:48:12 -02:00
{
2014-07-31 09:00:51 +03:00
int cpu , thread , nthreads ;
2014-06-30 22:28:47 +02:00
unsigned long flags = PERF_FLAG_FD_CLOEXEC ;
2011-10-25 10:42:19 -02:00
int pid = - 1 , err ;
2013-08-04 19:41:26 -07:00
enum { NO_CHANGE , SET_TO_MAX , INCREASED_MAX } set_rlimit = NO_CHANGE ;
2011-01-03 17:48:12 -02:00
2016-07-14 08:34:33 +00:00
if ( perf_missing_features . write_backward & & evsel - > attr . write_backward )
return - EINVAL ;
2017-02-14 10:59:04 -03:00
if ( cpus = = NULL ) {
static struct cpu_map * empty_cpu_map ;
if ( empty_cpu_map = = NULL ) {
empty_cpu_map = cpu_map__dummy_new ( ) ;
if ( empty_cpu_map = = NULL )
return - ENOMEM ;
}
cpus = empty_cpu_map ;
}
if ( threads = = NULL ) {
static struct thread_map * empty_thread_map ;
if ( empty_thread_map = = NULL ) {
empty_thread_map = thread_map__new_by_tid ( - 1 ) ;
if ( empty_thread_map = = NULL )
return - ENOMEM ;
}
threads = empty_thread_map ;
}
2014-07-31 09:00:51 +03:00
if ( evsel - > system_wide )
nthreads = 1 ;
else
nthreads = threads - > nr ;
2011-01-04 11:55:27 -02:00
if ( evsel - > fd = = NULL & &
2014-07-31 09:00:51 +03:00
perf_evsel__alloc_fd ( evsel , cpus - > nr , nthreads ) < 0 )
2011-10-25 10:42:19 -02:00
return - ENOMEM ;
2011-01-04 00:13:17 -02:00
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 11:20:01 +02:00
if ( evsel - > cgrp ) {
2014-06-30 22:28:47 +02:00
flags | = PERF_FLAG_PID_CGROUP ;
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 11:20:01 +02:00
pid = evsel - > cgrp - > fd ;
}
2012-12-13 13:13:07 -03:00
fallback_missing_features :
2015-03-31 00:19:31 +02:00
if ( perf_missing_features . clockid_wrong )
evsel - > attr . clockid = CLOCK_MONOTONIC ; /* should always work */
if ( perf_missing_features . clockid ) {
evsel - > attr . use_clockid = 0 ;
evsel - > attr . clockid = 0 ;
}
2014-06-30 22:28:47 +02:00
if ( perf_missing_features . cloexec )
flags & = ~ ( unsigned long ) PERF_FLAG_FD_CLOEXEC ;
2013-08-21 12:10:25 +02:00
if ( perf_missing_features . mmap2 )
evsel - > attr . mmap2 = 0 ;
2012-12-13 13:13:07 -03:00
if ( perf_missing_features . exclude_guest )
evsel - > attr . exclude_guest = evsel - > attr . exclude_host = 0 ;
2015-12-11 16:12:24 -08:00
if ( perf_missing_features . lbr_flags )
evsel - > attr . branch_sample_type & = ~ ( PERF_SAMPLE_BRANCH_NO_FLAGS |
PERF_SAMPLE_BRANCH_NO_CYCLES ) ;
perf stat: Use group read for event groups
Make perf stat use group read if there are groups defined. The group
read will get the values for all member of groups within a single
syscall instead of calling read syscall for every event.
We can see considerable less amount of kernel cycles spent on single
group read, than reading each event separately, like for following perf
stat command:
# perf stat -e {cycles,instructions} -I 10 -a sleep 1
Monitored with "perf stat -r 5 -e '{cycles:u,cycles:k}'"
Before:
24,325,676 cycles:u
297,040,775 cycles:k
1.038554134 seconds time elapsed
After:
25,034,418 cycles:u
158,256,395 cycles:k
1.036864497 seconds time elapsed
The perf_evsel__open fallback changes contributed by Andi Kleen.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20170726120206.9099-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-07-26 14:02:06 +02:00
if ( perf_missing_features . group_read & & evsel - > attr . inherit )
evsel - > attr . read_format & = ~ ( PERF_FORMAT_GROUP | PERF_FORMAT_ID ) ;
2012-12-13 13:13:07 -03:00
retry_sample_id :
if ( perf_missing_features . sample_id_all )
evsel - > attr . sample_id_all = 0 ;
perf tools: Merge all perf_event_attr print functions
Currently there's 3 (that I found) different and incomplete
implementations of printing perf_event_attr.
This is quite silly. Merge the lot.
While this patch does not retain the exact form all printing that I
found is debug output and thus it should not be critical.
Also, I cannot find a single print_event_desc() caller.
Pre:
$ perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
type 0
size 104
config 0
sample_period 4000
sample_freq 4000
sample_type 0x107
read_format 0
disabled 1 inherit 1
pinned 0 exclusive 0
exclude_user 0 exclude_kernel 0
exclude_hv 0 exclude_idle 0
mmap 1 comm 1
mmap2 1 comm_exec 1
freq 1 inherit_stat 0
enable_on_exec 1 task 1
watermark 0 precise_ip 0
mmap_data 0 sample_id_all 1
exclude_host 0 exclude_guest 1
excl.callchain_kern 0 excl.callchain_user 0
wakeup_events 0
wakeup_watermark 0
bp_type 0
bp_addr 0
config1 0
bp_len 0
config2 0
branch_sample_type 0
sample_regs_user 0
sample_stack_user 0
sample_regs_intr 0
------------------------------------------------------------
$ perf evlist -vv
cycles: sample_freq=4000, size: 104, sample_type: IP|TID|TIME|PERIOD,
disabled: 1, inherit: 1, mmap: 1, mmap2: 1, comm: 1, comm_exec: 1,
freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1
Post:
$ ./perf record -vv -e cycles -- sleep 1
------------------------------------------------------------
perf_event_attr:
size 112
{ sample_period, sample_freq } 4000
sample_type IP|TID|TIME|PERIOD
disabled 1
inherit 1
mmap 1
comm 1
freq 1
enable_on_exec 1
task 1
sample_id_all 1
exclude_guest 1
mmap2 1
comm_exec 1
------------------------------------------------------------
$ ./perf evlist -vv
cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq:
1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1,
mmap2: 1, comm_exec: 1
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150407091150.644238729@infradead.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-04-07 11:09:54 +02:00
if ( verbose > = 2 ) {
fprintf ( stderr , " %.60s \n " , graph_dotted_line ) ;
fprintf ( stderr , " perf_event_attr: \n " ) ;
perf_event_attr__fprintf ( stderr , & evsel - > attr , __open_attr__fprintf , NULL ) ;
fprintf ( stderr , " %.60s \n " , graph_dotted_line ) ;
}
2013-08-14 15:48:24 +03:00
2011-01-03 23:09:46 -02:00
for ( cpu = 0 ; cpu < cpus - > nr ; cpu + + ) {
2011-01-12 00:08:18 -02:00
2014-07-31 09:00:51 +03:00
for ( thread = 0 ; thread < nthreads ; thread + + ) {
2016-12-12 11:35:40 +01:00
int fd , group_fd ;
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 11:20:01 +02:00
2014-07-31 09:00:51 +03:00
if ( ! evsel - > cgrp & & ! evsel - > system_wide )
2015-06-23 00:36:02 +02:00
pid = thread_map__pid ( threads , thread ) ;
perf tool: Add cgroup support
This patch adds the ability to filter monitoring based on container groups
(cgroups) for both perf stat and perf record. It is possible to monitor
multiple cgroup in parallel. There is one cgroup per event. The cgroups to
monitor are passed via a new -G option followed by a comma separated list of
cgroup names.
The cgroup filesystem has to be mounted. Given a cgroup name, the perf tool
finds the corresponding directory in the cgroup filesystem and opens it. It
then passes that file descriptor to the kernel.
Example:
$ perf stat -B -a -e cycles:u,cycles:u,cycles:u -G test1,,test2 -- sleep 1
Performance counter stats for 'sleep 1':
2,368,667,414 cycles test1
2,369,661,459 cycles
<not counted> cycles test2
1.001856890 seconds time elapsed
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4d590290.825bdf0a.7d0a.4890@mx.google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-02-14 11:20:01 +02:00
perf tools: Enable grouping logic for parsed events
This patch adds a functionality that allows to create event groups
based on the way they are specified on the command line. Adding
functionality to the '{}' group syntax introduced in earlier patch.
The current '--group/-g' option behaviour remains intact. If you
specify it for record/stat/top command, all the specified events
become members of a single group with the first event as a group
leader.
With the new '{}' group syntax you can create group like:
# perf record -e '{cycles,faults}' ls
resulting in single event group containing 'cycles' and 'faults'
events, with cycles event as group leader.
All groups are created with regards to threads and cpus. Thus
recording an event group within a 2 threads on server with
4 CPUs will create 8 separate groups.
Examples (first event in brackets is group leader):
# 1 group (cpu-clock,task-clock)
perf record --group -e cpu-clock,task-clock ls
perf record -e '{cpu-clock,task-clock}' ls
# 2 groups (cpu-clock,task-clock) (minor-faults,major-faults)
perf record -e '{cpu-clock,task-clock},{minor-faults,major-faults}' ls
# 1 group (cpu-clock,task-clock,minor-faults,major-faults)
perf record --group -e cpu-clock,task-clock -e minor-faults,major-faults ls
perf record -e '{cpu-clock,task-clock,minor-faults,major-faults}' ls
# 2 groups (cpu-clock,task-clock) (minor-faults,major-faults)
perf record -e '{cpu-clock,task-clock} -e '{minor-faults,major-faults}' \
-e instructions ls
# 1 group
# (cpu-clock,task-clock,minor-faults,major-faults,instructions)
perf record --group -e cpu-clock,task-clock \
-e minor-faults,major-faults -e instructions ls perf record -e
'{cpu-clock,task-clock,minor-faults,major-faults,instructions}' ls
It's possible to use standard event modifier for a group, which spans
over all events in the group and updates each event modifier settings,
for example:
# perf record -r '{faults:k,cache-references}:p'
resulting in ':kp' modifier being used for 'faults' and ':p' modifier
being used for 'cache-references' event.
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/n/tip-ho42u0wcr8mn1otkalqi13qp@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-08-08 12:22:36 +02:00
group_fd = get_group_fd ( evsel , cpu , thread ) ;
2013-08-04 19:41:26 -07:00
retry_open :
2016-11-21 22:33:26 +01:00
pr_debug2 ( " sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx " ,
2013-08-14 15:48:24 +03:00
pid , cpus - > map [ cpu ] , group_fd , flags ) ;
2017-07-03 16:50:18 +02:00
test_attr__ready ( ) ;
2016-12-12 11:35:40 +01:00
fd = sys_perf_event_open ( & evsel - > attr , pid , cpus - > map [ cpu ] ,
group_fd , flags ) ;
FD ( evsel , cpu , thread ) = fd ;
if ( fd < 0 ) {
2011-10-25 10:42:19 -02:00
err = - errno ;
2016-12-13 08:46:22 +01:00
perf evsel: Enable ignore_missing_thread for pid option
While monitoring a multithread process with pid option, perf sometimes
may return sys_perf_event_open failure with 3(No such process) if any of
the process's threads die before we open the event. However, we want
perf continue monitoring the remaining threads and do not exit with
error.
Here, the patch enables perf_evsel::ignore_missing_thread for -p option
to ignore complete failure if any of threads die before we open the event.
But it may still return sys_perf_event_open failure with 22(Invalid) if we
monitors several event groups.
sys_perf_event_open: pid 28960 cpu 40 group_fd 118202 flags 0x8
sys_perf_event_open: pid 28961 cpu 40 group_fd 118203 flags 0x8
WARNING: Ignored open failure for pid 28962
sys_perf_event_open: pid 28962 cpu 40 group_fd [118203] flags 0x8
sys_perf_event_open failed, error -22
That is because when we ignore a missing thread, we change the thread_idx
without dealing with its fds, FD(evsel, cpu, thread). Then get_group_fd()
may return a wrong group_fd for the next thread and sys_perf_event_open()
return with 22.
sys_perf_event_open(){
...
if (group_fd != -1)
perf_fget_light()//to get corresponding group_leader by group_fd
...
if (group_leader)
if (group_leader->ctx->task != ctx->task)//should on the same task
goto err_context
...
}
This patch also fixes this bug by introducing perf_evsel__remove_fd() and
update_fds to allow removing fds for the missing thread.
Changes since v1:
- Change group_fd__remove() into a more genetic way without changing code logic
- Remove redundant condition
Changes since v2:
- Use a proper function name and add some comment.
- Multiline comment style fixes.
Committer testing:
Before this patch the recently added 'perf stat --per-thread' for system
wide counting would race while enumerating all threads using /proc:
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]# perf stat --per-thread
failed to parse CPUs map: No such file or directory
Usage: perf stat [<options>] [<command>]
-C, --cpu <cpu> list of cpus to monitor in system-wide
-a, --all-cpus system-wide collection from all CPUs
[root@jouet ~]#
When, say, the kernel was being built, so lots of shortlived threads,
after this patch this doesn't happen.
Signed-off-by: Mengting Zhang <zhangmengting@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Cheng Jian <cj.chengjian@huawei.com>
Cc: Li Bin <huawei.libin@huawei.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1513148513-6974-1-git-send-email-zhangmengting@huawei.com
[ Remove one use 'evlist' alias variable ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-12-13 15:01:53 +08:00
if ( ignore_missing_thread ( evsel , cpus - > nr , cpu , threads , thread , err ) ) {
2016-12-13 08:46:22 +01:00
/*
* We just removed 1 thread , so take a step
* back on thread index and lower the upper
* nthreads limit .
*/
nthreads - - ;
thread - - ;
/* ... and pretend like nothing have happened. */
err = 0 ;
continue ;
}
2016-11-21 22:33:26 +01:00
pr_debug2 ( " \n sys_perf_event_open failed, error %d \n " ,
2013-11-01 15:51:29 +02:00
err ) ;
2012-12-13 13:13:07 -03:00
goto try_fallback ;
2011-10-25 10:42:19 -02:00
}
perf bpf: Attach eBPF filter to perf event
This is the final patch which makes basic BPF filter work. After
applying this patch, users are allowed to use BPF filter like:
# perf record --event ./hello_world.o ls
A bpf_fd field is appended to 'struct evsel', and setup during the
callback function add_bpf_event() for each 'probe_trace_event'.
PERF_EVENT_IOC_SET_BPF ioctl is used to attach eBPF program to a newly
created perf event. The file descriptor of the eBPF program is passed to
perf record using previous patches, and stored into evsel->bpf_fd.
It is possible that different perf event are created for one kprobe
events for different CPUs. In this case, when trying to call the ioctl,
EEXIST will be return. This patch doesn't treat it as an error.
Committer note:
The bpf proggie used so far:
__attribute__((section("fork=_do_fork"), used))
int fork(void *ctx)
{
return 0;
}
char _license[] __attribute__((section("license"), used)) = "GPL";
int _version __attribute__((section("version"), used)) = 0x40300;
failed to produce any samples, even with forks happening and it being
running in system wide mode.
That is because now the filter is being associated, and the code above
always returns zero, meaning that all forks will be probed but filtered
away ;-/
Change it to 'return 1;' instead and after that:
# trace --no-syscalls --event /tmp/foo.o
0.000 perf_bpf_probe:fork:(ffffffff8109be30))
2.333 perf_bpf_probe:fork:(ffffffff8109be30))
3.725 perf_bpf_probe:fork:(ffffffff8109be30))
4.550 perf_bpf_probe:fork:(ffffffff8109be30))
^C#
And it works with all tools, including 'perf trace'.
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1444826502-49291-8-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 12:41:18 +00:00
2016-12-12 11:35:40 +01:00
pr_debug2 ( " = %d \n " , fd ) ;
2016-11-21 22:33:26 +01:00
perf bpf: Attach eBPF filter to perf event
This is the final patch which makes basic BPF filter work. After
applying this patch, users are allowed to use BPF filter like:
# perf record --event ./hello_world.o ls
A bpf_fd field is appended to 'struct evsel', and setup during the
callback function add_bpf_event() for each 'probe_trace_event'.
PERF_EVENT_IOC_SET_BPF ioctl is used to attach eBPF program to a newly
created perf event. The file descriptor of the eBPF program is passed to
perf record using previous patches, and stored into evsel->bpf_fd.
It is possible that different perf event are created for one kprobe
events for different CPUs. In this case, when trying to call the ioctl,
EEXIST will be return. This patch doesn't treat it as an error.
Committer note:
The bpf proggie used so far:
__attribute__((section("fork=_do_fork"), used))
int fork(void *ctx)
{
return 0;
}
char _license[] __attribute__((section("license"), used)) = "GPL";
int _version __attribute__((section("version"), used)) = 0x40300;
failed to produce any samples, even with forks happening and it being
running in system wide mode.
That is because now the filter is being associated, and the code above
always returns zero, meaning that all forks will be probed but filtered
away ;-/
Change it to 'return 1;' instead and after that:
# trace --no-syscalls --event /tmp/foo.o
0.000 perf_bpf_probe:fork:(ffffffff8109be30))
2.333 perf_bpf_probe:fork:(ffffffff8109be30))
3.725 perf_bpf_probe:fork:(ffffffff8109be30))
4.550 perf_bpf_probe:fork:(ffffffff8109be30))
^C#
And it works with all tools, including 'perf trace'.
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1444826502-49291-8-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 12:41:18 +00:00
if ( evsel - > bpf_fd > = 0 ) {
2016-12-12 11:35:40 +01:00
int evt_fd = fd ;
perf bpf: Attach eBPF filter to perf event
This is the final patch which makes basic BPF filter work. After
applying this patch, users are allowed to use BPF filter like:
# perf record --event ./hello_world.o ls
A bpf_fd field is appended to 'struct evsel', and setup during the
callback function add_bpf_event() for each 'probe_trace_event'.
PERF_EVENT_IOC_SET_BPF ioctl is used to attach eBPF program to a newly
created perf event. The file descriptor of the eBPF program is passed to
perf record using previous patches, and stored into evsel->bpf_fd.
It is possible that different perf event are created for one kprobe
events for different CPUs. In this case, when trying to call the ioctl,
EEXIST will be return. This patch doesn't treat it as an error.
Committer note:
The bpf proggie used so far:
__attribute__((section("fork=_do_fork"), used))
int fork(void *ctx)
{
return 0;
}
char _license[] __attribute__((section("license"), used)) = "GPL";
int _version __attribute__((section("version"), used)) = 0x40300;
failed to produce any samples, even with forks happening and it being
running in system wide mode.
That is because now the filter is being associated, and the code above
always returns zero, meaning that all forks will be probed but filtered
away ;-/
Change it to 'return 1;' instead and after that:
# trace --no-syscalls --event /tmp/foo.o
0.000 perf_bpf_probe:fork:(ffffffff8109be30))
2.333 perf_bpf_probe:fork:(ffffffff8109be30))
3.725 perf_bpf_probe:fork:(ffffffff8109be30))
4.550 perf_bpf_probe:fork:(ffffffff8109be30))
^C#
And it works with all tools, including 'perf trace'.
Signed-off-by: Wang Nan <wangnan0@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1444826502-49291-8-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-10-14 12:41:18 +00:00
int bpf_fd = evsel - > bpf_fd ;
err = ioctl ( evt_fd ,
PERF_EVENT_IOC_SET_BPF ,
bpf_fd ) ;
if ( err & & errno ! = EEXIST ) {
pr_err ( " failed to attach bpf fd %d: %s \n " ,
bpf_fd , strerror ( errno ) ) ;
err = - EINVAL ;
goto out_close ;
}
}
2013-08-04 19:41:26 -07:00
set_rlimit = NO_CHANGE ;
2015-03-31 00:19:31 +02:00
/*
* If we succeeded but had to kill clockid , fail and
* have perf_evsel__open_strerror ( ) print us a nice
* error .
*/
if ( perf_missing_features . clockid | |
perf_missing_features . clockid_wrong ) {
err = - EINVAL ;
goto out_close ;
}
2011-01-04 11:55:27 -02:00
}
2011-01-03 17:48:12 -02:00
}
return 0 ;
2012-12-13 13:13:07 -03:00
try_fallback :
2013-08-04 19:41:26 -07:00
/*
* perf stat needs between 5 and 22 fds per CPU . When we run out
* of them try to increase the limits .
*/
if ( err = = - EMFILE & & set_rlimit < INCREASED_MAX ) {
struct rlimit l ;
int old_errno = errno ;
if ( getrlimit ( RLIMIT_NOFILE , & l ) = = 0 ) {
if ( set_rlimit = = NO_CHANGE )
l . rlim_cur = l . rlim_max ;
else {
l . rlim_cur = l . rlim_max + 1000 ;
l . rlim_max = l . rlim_cur ;
}
if ( setrlimit ( RLIMIT_NOFILE , & l ) = = 0 ) {
set_rlimit + + ;
errno = old_errno ;
goto retry_open ;
}
}
errno = old_errno ;
}
2012-12-13 13:13:07 -03:00
if ( err ! = - EINVAL | | cpu > 0 | | thread > 0 )
goto out_close ;
2015-03-31 00:19:31 +02:00
/*
* Must probe features in the order they were added to the
* perf_event_attr interface .
*/
2016-06-20 10:47:18 +00:00
if ( ! perf_missing_features . write_backward & & evsel - > attr . write_backward ) {
perf_missing_features . write_backward = true ;
2017-07-21 14:12:09 +02:00
pr_debug2 ( " switching off write_backward \n " ) ;
2016-07-14 08:34:33 +00:00
goto out_close ;
2016-06-20 10:47:18 +00:00
} else if ( ! perf_missing_features . clockid_wrong & & evsel - > attr . use_clockid ) {
2015-03-31 00:19:31 +02:00
perf_missing_features . clockid_wrong = true ;
2017-07-21 14:12:09 +02:00
pr_debug2 ( " switching off clockid \n " ) ;
2015-03-31 00:19:31 +02:00
goto fallback_missing_features ;
} else if ( ! perf_missing_features . clockid & & evsel - > attr . use_clockid ) {
perf_missing_features . clockid = true ;
2017-07-21 14:12:09 +02:00
pr_debug2 ( " switching off use_clockid \n " ) ;
2015-03-31 00:19:31 +02:00
goto fallback_missing_features ;
} else if ( ! perf_missing_features . cloexec & & ( flags & PERF_FLAG_FD_CLOEXEC ) ) {
2014-06-30 22:28:47 +02:00
perf_missing_features . cloexec = true ;
2017-07-21 14:12:09 +02:00
pr_debug2 ( " switching off cloexec flag \n " ) ;
2014-06-30 22:28:47 +02:00
goto fallback_missing_features ;
} else if ( ! perf_missing_features . mmap2 & & evsel - > attr . mmap2 ) {
2013-08-21 12:10:25 +02:00
perf_missing_features . mmap2 = true ;
2017-07-21 14:12:09 +02:00
pr_debug2 ( " switching off mmap2 \n " ) ;
2013-08-21 12:10:25 +02:00
goto fallback_missing_features ;
} else if ( ! perf_missing_features . exclude_guest & &
( evsel - > attr . exclude_guest | | evsel - > attr . exclude_host ) ) {
2012-12-13 13:13:07 -03:00
perf_missing_features . exclude_guest = true ;
2017-07-21 14:12:09 +02:00
pr_debug2 ( " switching off exclude_guest, exclude_host \n " ) ;
2012-12-13 13:13:07 -03:00
goto fallback_missing_features ;
} else if ( ! perf_missing_features . sample_id_all ) {
perf_missing_features . sample_id_all = true ;
2017-07-21 14:12:09 +02:00
pr_debug2 ( " switching off sample_id_all \n " ) ;
2012-12-13 13:13:07 -03:00
goto retry_sample_id ;
2015-12-11 16:12:24 -08:00
} else if ( ! perf_missing_features . lbr_flags & &
( evsel - > attr . branch_sample_type &
( PERF_SAMPLE_BRANCH_NO_CYCLES |
PERF_SAMPLE_BRANCH_NO_FLAGS ) ) ) {
perf_missing_features . lbr_flags = true ;
2017-07-21 14:12:09 +02:00
pr_debug2 ( " switching off branch sample type no (cycles/flags) \n " ) ;
2015-12-11 16:12:24 -08:00
goto fallback_missing_features ;
perf stat: Use group read for event groups
Make perf stat use group read if there are groups defined. The group
read will get the values for all member of groups within a single
syscall instead of calling read syscall for every event.
We can see considerable less amount of kernel cycles spent on single
group read, than reading each event separately, like for following perf
stat command:
# perf stat -e {cycles,instructions} -I 10 -a sleep 1
Monitored with "perf stat -r 5 -e '{cycles:u,cycles:k}'"
Before:
24,325,676 cycles:u
297,040,775 cycles:k
1.038554134 seconds time elapsed
After:
25,034,418 cycles:u
158,256,395 cycles:k
1.036864497 seconds time elapsed
The perf_evsel__open fallback changes contributed by Andi Kleen.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20170726120206.9099-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-07-26 14:02:06 +02:00
} else if ( ! perf_missing_features . group_read & &
evsel - > attr . inherit & &
( evsel - > attr . read_format & PERF_FORMAT_GROUP ) ) {
perf_missing_features . group_read = true ;
pr_debug2 ( " switching off group read \n " ) ;
goto fallback_missing_features ;
2012-12-13 13:13:07 -03:00
}
2011-01-03 17:48:12 -02:00
out_close :
2011-01-04 11:55:27 -02:00
do {
while ( - - thread > = 0 ) {
close ( FD ( evsel , cpu , thread ) ) ;
FD ( evsel , cpu , thread ) = - 1 ;
}
2014-07-31 09:00:51 +03:00
thread = nthreads ;
2011-01-04 11:55:27 -02:00
} while ( - - cpu > = 0 ) ;
2011-10-25 10:42:19 -02:00
return err ;
}
2017-08-11 16:26:17 -07:00
void perf_evsel__close ( struct perf_evsel * evsel )
2011-10-25 10:42:19 -02:00
{
if ( evsel - > fd = = NULL )
return ;
2017-08-11 16:26:17 -07:00
perf_evsel__close_fd ( evsel ) ;
2011-10-25 10:42:19 -02:00
perf_evsel__free_fd ( evsel ) ;
2011-01-03 17:48:12 -02:00
}
2011-01-11 23:42:19 -02:00
int perf_evsel__open_per_cpu ( struct perf_evsel * evsel ,
perf tools: Enable grouping logic for parsed events
This patch adds a functionality that allows to create event groups
based on the way they are specified on the command line. Adding
functionality to the '{}' group syntax introduced in earlier patch.
The current '--group/-g' option behaviour remains intact. If you
specify it for record/stat/top command, all the specified events
become members of a single group with the first event as a group
leader.
With the new '{}' group syntax you can create group like:
# perf record -e '{cycles,faults}' ls
resulting in single event group containing 'cycles' and 'faults'
events, with cycles event as group leader.
All groups are created with regards to threads and cpus. Thus
recording an event group within a 2 threads on server with
4 CPUs will create 8 separate groups.
Examples (first event in brackets is group leader):
# 1 group (cpu-clock,task-clock)
perf record --group -e cpu-clock,task-clock ls
perf record -e '{cpu-clock,task-clock}' ls
# 2 groups (cpu-clock,task-clock) (minor-faults,major-faults)
perf record -e '{cpu-clock,task-clock},{minor-faults,major-faults}' ls
# 1 group (cpu-clock,task-clock,minor-faults,major-faults)
perf record --group -e cpu-clock,task-clock -e minor-faults,major-faults ls
perf record -e '{cpu-clock,task-clock,minor-faults,major-faults}' ls
# 2 groups (cpu-clock,task-clock) (minor-faults,major-faults)
perf record -e '{cpu-clock,task-clock} -e '{minor-faults,major-faults}' \
-e instructions ls
# 1 group
# (cpu-clock,task-clock,minor-faults,major-faults,instructions)
perf record --group -e cpu-clock,task-clock \
-e minor-faults,major-faults -e instructions ls perf record -e
'{cpu-clock,task-clock,minor-faults,major-faults,instructions}' ls
It's possible to use standard event modifier for a group, which spans
over all events in the group and updates each event modifier settings,
for example:
# perf record -r '{faults:k,cache-references}:p'
resulting in ':kp' modifier being used for 'faults' and ':p' modifier
being used for 'cache-references' event.
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/n/tip-ho42u0wcr8mn1otkalqi13qp@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-08-08 12:22:36 +02:00
struct cpu_map * cpus )
2011-01-03 17:48:12 -02:00
{
2017-02-14 10:59:04 -03:00
return perf_evsel__open ( evsel , cpus , NULL ) ;
2011-01-04 11:55:27 -02:00
}
2011-01-03 17:48:12 -02:00
2011-01-11 23:42:19 -02:00
int perf_evsel__open_per_thread ( struct perf_evsel * evsel ,
perf tools: Enable grouping logic for parsed events
This patch adds a functionality that allows to create event groups
based on the way they are specified on the command line. Adding
functionality to the '{}' group syntax introduced in earlier patch.
The current '--group/-g' option behaviour remains intact. If you
specify it for record/stat/top command, all the specified events
become members of a single group with the first event as a group
leader.
With the new '{}' group syntax you can create group like:
# perf record -e '{cycles,faults}' ls
resulting in single event group containing 'cycles' and 'faults'
events, with cycles event as group leader.
All groups are created with regards to threads and cpus. Thus
recording an event group within a 2 threads on server with
4 CPUs will create 8 separate groups.
Examples (first event in brackets is group leader):
# 1 group (cpu-clock,task-clock)
perf record --group -e cpu-clock,task-clock ls
perf record -e '{cpu-clock,task-clock}' ls
# 2 groups (cpu-clock,task-clock) (minor-faults,major-faults)
perf record -e '{cpu-clock,task-clock},{minor-faults,major-faults}' ls
# 1 group (cpu-clock,task-clock,minor-faults,major-faults)
perf record --group -e cpu-clock,task-clock -e minor-faults,major-faults ls
perf record -e '{cpu-clock,task-clock,minor-faults,major-faults}' ls
# 2 groups (cpu-clock,task-clock) (minor-faults,major-faults)
perf record -e '{cpu-clock,task-clock} -e '{minor-faults,major-faults}' \
-e instructions ls
# 1 group
# (cpu-clock,task-clock,minor-faults,major-faults,instructions)
perf record --group -e cpu-clock,task-clock \
-e minor-faults,major-faults -e instructions ls perf record -e
'{cpu-clock,task-clock,minor-faults,major-faults,instructions}' ls
It's possible to use standard event modifier for a group, which spans
over all events in the group and updates each event modifier settings,
for example:
# perf record -r '{faults:k,cache-references}:p'
resulting in ':kp' modifier being used for 'faults' and ':p' modifier
being used for 'cache-references' event.
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/n/tip-ho42u0wcr8mn1otkalqi13qp@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2012-08-08 12:22:36 +02:00
struct thread_map * threads )
2011-01-04 11:55:27 -02:00
{
2017-02-14 10:59:04 -03:00
return perf_evsel__open ( evsel , NULL , threads ) ;
2011-01-03 17:48:12 -02:00
}
2011-01-12 17:03:24 -02:00
2012-09-26 12:48:18 -03:00
static int perf_evsel__parse_id_sample ( const struct perf_evsel * evsel ,
const union perf_event * event ,
struct perf_sample * sample )
2011-01-21 13:46:41 -02:00
{
2012-09-26 12:48:18 -03:00
u64 type = evsel - > attr . sample_type ;
2011-01-21 13:46:41 -02:00
const u64 * array = event - > sample . array ;
2012-09-26 12:48:18 -03:00
bool swapped = evsel - > needs_swap ;
2012-05-30 14:23:44 +02:00
union u64_swap u ;
2011-01-21 13:46:41 -02:00
array + = ( ( event - > header . size -
sizeof ( event - > header ) ) / sizeof ( u64 ) ) - 1 ;
2013-08-27 11:23:09 +03:00
if ( type & PERF_SAMPLE_IDENTIFIER ) {
sample - > id = * array ;
array - - ;
}
2011-01-21 13:46:41 -02:00
if ( type & PERF_SAMPLE_CPU ) {
2012-05-30 14:23:44 +02:00
u . val64 = * array ;
if ( swapped ) {
/* undo swap of u64, then swap on individual u32s */
u . val64 = bswap_64 ( u . val64 ) ;
u . val32 [ 0 ] = bswap_32 ( u . val32 [ 0 ] ) ;
}
sample - > cpu = u . val32 [ 0 ] ;
2011-01-21 13:46:41 -02:00
array - - ;
}
if ( type & PERF_SAMPLE_STREAM_ID ) {
sample - > stream_id = * array ;
array - - ;
}
if ( type & PERF_SAMPLE_ID ) {
sample - > id = * array ;
array - - ;
}
if ( type & PERF_SAMPLE_TIME ) {
sample - > time = * array ;
array - - ;
}
if ( type & PERF_SAMPLE_TID ) {
2012-05-30 14:23:44 +02:00
u . val64 = * array ;
if ( swapped ) {
/* undo swap of u64, then swap on individual u32s */
u . val64 = bswap_64 ( u . val64 ) ;
u . val32 [ 0 ] = bswap_32 ( u . val32 [ 0 ] ) ;
u . val32 [ 1 ] = bswap_32 ( u . val32 [ 1 ] ) ;
}
sample - > pid = u . val32 [ 0 ] ;
sample - > tid = u . val32 [ 1 ] ;
2013-10-18 15:29:01 +03:00
array - - ;
2011-01-21 13:46:41 -02:00
}
return 0 ;
}
2013-08-27 11:23:04 +03:00
static inline bool overflow ( const void * endp , u16 max_size , const void * offset ,
u64 size )
2011-05-21 20:08:15 +02:00
{
2013-08-27 11:23:04 +03:00
return size > max_size | | offset + size > endp ;
}
2011-05-21 20:08:15 +02:00
2013-08-27 11:23:04 +03:00
# define OVERFLOW_CHECK(offset, size, max_size) \
do { \
if ( overflow ( endp , ( max_size ) , ( offset ) , ( size ) ) ) \
return - EFAULT ; \
} while ( 0 )
2011-05-21 20:08:15 +02:00
2013-08-27 11:23:04 +03:00
# define OVERFLOW_CHECK_u64(offset) \
OVERFLOW_CHECK ( offset , sizeof ( u64 ) , sizeof ( u64 ) )
2011-05-21 20:08:15 +02:00
2017-08-03 13:10:28 +02:00
static int
perf_event__check_size ( union perf_event * event , unsigned int sample_size )
{
/*
* The evsel ' s sample_size is based on PERF_SAMPLE_MASK which includes
* up to PERF_SAMPLE_PERIOD . After that overflow ( ) must be used to
* check the format does not go past the end of the event .
*/
if ( sample_size + sizeof ( event - > header ) > event - > header . size )
return - EFAULT ;
return 0 ;
}
2012-08-02 12:23:46 -03:00
int perf_evsel__parse_sample ( struct perf_evsel * evsel , union perf_event * event ,
2012-09-26 12:48:18 -03:00
struct perf_sample * data )
2011-01-21 13:46:41 -02:00
{
2012-08-02 12:23:46 -03:00
u64 type = evsel - > attr . sample_type ;
2012-09-26 12:48:18 -03:00
bool swapped = evsel - > needs_swap ;
2011-01-21 13:46:41 -02:00
const u64 * array ;
2013-08-27 11:23:04 +03:00
u16 max_size = event - > header . size ;
const void * endp = ( void * ) event + max_size ;
u64 sz ;
2011-01-21 13:46:41 -02:00
2011-09-06 09:12:26 -06:00
/*
* used for cross - endian analysis . See git commit 65014 ab3
* for why this goofiness is needed .
*/
2012-05-16 08:59:04 +02:00
union u64_swap u ;
2011-09-06 09:12:26 -06:00
2011-12-15 17:32:39 +01:00
memset ( data , 0 , sizeof ( * data ) ) ;
2011-01-21 13:46:41 -02:00
data - > cpu = data - > pid = data - > tid = - 1 ;
data - > stream_id = data - > id = data - > time = - 1ULL ;
2014-02-03 12:44:41 +01:00
data - > period = evsel - > attr . sample_period ;
2016-03-22 18:23:43 -03:00
data - > cpumode = event - > header . misc & PERF_RECORD_MISC_CPUMODE_MASK ;
2018-01-07 17:03:52 +01:00
data - > misc = event - > header . misc ;
2017-08-03 16:07:05 +02:00
data - > id = - 1ULL ;
data - > data_src = PERF_MEM_DATA_SRC_NONE ;
2011-01-21 13:46:41 -02:00
if ( event - > header . type ! = PERF_RECORD_SAMPLE ) {
2012-08-02 12:23:46 -03:00
if ( ! evsel - > attr . sample_id_all )
2011-01-21 13:46:41 -02:00
return 0 ;
2012-09-26 12:48:18 -03:00
return perf_evsel__parse_id_sample ( evsel , event , data ) ;
2011-01-21 13:46:41 -02:00
}
array = event - > sample . array ;
2017-08-03 13:10:28 +02:00
if ( perf_event__check_size ( event , evsel - > sample_size ) )
2011-05-21 19:33:04 +02:00
return - EFAULT ;
2013-08-27 11:23:09 +03:00
if ( type & PERF_SAMPLE_IDENTIFIER ) {
data - > id = * array ;
array + + ;
}
2011-01-21 13:46:41 -02:00
if ( type & PERF_SAMPLE_IP ) {
2013-08-27 11:23:06 +03:00
data - > ip = * array ;
2011-01-21 13:46:41 -02:00
array + + ;
}
if ( type & PERF_SAMPLE_TID ) {
2011-09-06 09:12:26 -06:00
u . val64 = * array ;
if ( swapped ) {
/* undo swap of u64, then swap on individual u32s */
u . val64 = bswap_64 ( u . val64 ) ;
u . val32 [ 0 ] = bswap_32 ( u . val32 [ 0 ] ) ;
u . val32 [ 1 ] = bswap_32 ( u . val32 [ 1 ] ) ;
}
data - > pid = u . val32 [ 0 ] ;
data - > tid = u . val32 [ 1 ] ;
2011-01-21 13:46:41 -02:00
array + + ;
}
if ( type & PERF_SAMPLE_TIME ) {
data - > time = * array ;
array + + ;
}
if ( type & PERF_SAMPLE_ADDR ) {
data - > addr = * array ;
array + + ;
}
if ( type & PERF_SAMPLE_ID ) {
data - > id = * array ;
array + + ;
}
if ( type & PERF_SAMPLE_STREAM_ID ) {
data - > stream_id = * array ;
array + + ;
}
if ( type & PERF_SAMPLE_CPU ) {
2011-09-06 09:12:26 -06:00
u . val64 = * array ;
if ( swapped ) {
/* undo swap of u64, then swap on individual u32s */
u . val64 = bswap_64 ( u . val64 ) ;
u . val32 [ 0 ] = bswap_32 ( u . val32 [ 0 ] ) ;
}
data - > cpu = u . val32 [ 0 ] ;
2011-01-21 13:46:41 -02:00
array + + ;
}
if ( type & PERF_SAMPLE_PERIOD ) {
data - > period = * array ;
array + + ;
}
if ( type & PERF_SAMPLE_READ ) {
2012-10-10 17:38:13 +02:00
u64 read_format = evsel - > attr . read_format ;
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
2012-10-10 17:38:13 +02:00
if ( read_format & PERF_FORMAT_GROUP )
data - > read . group . nr = * array ;
else
data - > read . one . value = * array ;
array + + ;
if ( read_format & PERF_FORMAT_TOTAL_TIME_ENABLED ) {
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
2012-10-10 17:38:13 +02:00
data - > read . time_enabled = * array ;
array + + ;
}
if ( read_format & PERF_FORMAT_TOTAL_TIME_RUNNING ) {
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
2012-10-10 17:38:13 +02:00
data - > read . time_running = * array ;
array + + ;
}
/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
if ( read_format & PERF_FORMAT_GROUP ) {
2013-08-27 11:23:04 +03:00
const u64 max_group_nr = UINT64_MAX /
sizeof ( struct sample_read_value ) ;
if ( data - > read . group . nr > max_group_nr )
return - EFAULT ;
sz = data - > read . group . nr *
sizeof ( struct sample_read_value ) ;
OVERFLOW_CHECK ( array , sz , max_size ) ;
data - > read . group . values =
( struct sample_read_value * ) array ;
array = ( void * ) array + sz ;
2012-10-10 17:38:13 +02:00
} else {
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
2012-10-10 17:38:13 +02:00
data - > read . one . id = * array ;
array + + ;
}
2011-01-21 13:46:41 -02:00
}
if ( type & PERF_SAMPLE_CALLCHAIN ) {
2013-08-27 11:23:04 +03:00
const u64 max_callchain_nr = UINT64_MAX / sizeof ( u64 ) ;
2011-05-21 20:08:15 +02:00
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
data - > callchain = ( struct ip_callchain * ) array + + ;
if ( data - > callchain - > nr > max_callchain_nr )
2011-05-21 20:08:15 +02:00
return - EFAULT ;
2013-08-27 11:23:04 +03:00
sz = data - > callchain - > nr * sizeof ( u64 ) ;
OVERFLOW_CHECK ( array , sz , max_size ) ;
array = ( void * ) array + sz ;
2011-01-21 13:46:41 -02:00
}
if ( type & PERF_SAMPLE_RAW ) {
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
2011-09-06 09:12:26 -06:00
u . val64 = * array ;
2017-11-29 19:43:46 +01:00
/*
* Undo swap of u64 , then swap on individual u32s ,
* get the size of the raw area and undo all of the
* swap . The pevent interface handles endianity by
* itself .
*/
if ( swapped ) {
2011-09-06 09:12:26 -06:00
u . val64 = bswap_64 ( u . val64 ) ;
u . val32 [ 0 ] = bswap_32 ( u . val32 [ 0 ] ) ;
u . val32 [ 1 ] = bswap_32 ( u . val32 [ 1 ] ) ;
}
data - > raw_size = u . val32 [ 0 ] ;
2017-11-29 19:43:46 +01:00
/*
* The raw data is aligned on 64 bits including the
* u32 size , so it ' s safe to use mem_bswap_64 .
*/
if ( swapped )
mem_bswap_64 ( ( void * ) array , data - > raw_size ) ;
2013-08-27 11:23:04 +03:00
array = ( void * ) array + sizeof ( u32 ) ;
2011-05-21 20:08:15 +02:00
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK ( array , data - > raw_size , max_size ) ;
data - > raw_data = ( void * ) array ;
array = ( void * ) array + data - > raw_size ;
2011-01-21 13:46:41 -02:00
}
2012-02-09 23:21:01 +01:00
if ( type & PERF_SAMPLE_BRANCH_STACK ) {
2013-08-27 11:23:04 +03:00
const u64 max_branch_nr = UINT64_MAX /
sizeof ( struct branch_entry ) ;
2012-02-09 23:21:01 +01:00
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
data - > branch_stack = ( struct branch_stack * ) array + + ;
2012-02-09 23:21:01 +01:00
2013-08-27 11:23:04 +03:00
if ( data - > branch_stack - > nr > max_branch_nr )
return - EFAULT ;
2012-02-09 23:21:01 +01:00
sz = data - > branch_stack - > nr * sizeof ( struct branch_entry ) ;
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK ( array , sz , max_size ) ;
array = ( void * ) array + sz ;
2012-02-09 23:21:01 +01:00
}
2012-08-07 15:20:45 +02:00
if ( type & PERF_SAMPLE_REGS_USER ) {
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
2013-08-27 11:23:10 +03:00
data - > user_regs . abi = * array ;
array + + ;
2012-08-07 15:20:45 +02:00
2013-08-27 11:23:10 +03:00
if ( data - > user_regs . abi ) {
2014-01-07 13:47:25 +01:00
u64 mask = evsel - > attr . sample_regs_user ;
2013-08-27 11:23:04 +03:00
2014-01-07 13:47:25 +01:00
sz = hweight_long ( mask ) * sizeof ( u64 ) ;
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK ( array , sz , max_size ) ;
2014-01-07 13:47:25 +01:00
data - > user_regs . mask = mask ;
2012-08-07 15:20:45 +02:00
data - > user_regs . regs = ( u64 * ) array ;
2013-08-27 11:23:04 +03:00
array = ( void * ) array + sz ;
2012-08-07 15:20:45 +02:00
}
}
if ( type & PERF_SAMPLE_STACK_USER ) {
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
sz = * array + + ;
2012-08-07 15:20:45 +02:00
data - > user_stack . offset = ( ( char * ) ( array - 1 )
- ( char * ) event ) ;
2013-08-27 11:23:04 +03:00
if ( ! sz ) {
2012-08-07 15:20:45 +02:00
data - > user_stack . size = 0 ;
} else {
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK ( array , sz , max_size ) ;
2012-08-07 15:20:45 +02:00
data - > user_stack . data = ( char * ) array ;
2013-08-27 11:23:04 +03:00
array = ( void * ) array + sz ;
OVERFLOW_CHECK_u64 ( array ) ;
2013-07-04 16:20:34 +03:00
data - > user_stack . size = * array + + ;
2013-10-02 15:46:39 +02:00
if ( WARN_ONCE ( data - > user_stack . size > sz ,
" user stack dump failure \n " ) )
return - EFAULT ;
2012-08-07 15:20:45 +02:00
}
}
2013-01-24 16:10:29 +01:00
if ( type & PERF_SAMPLE_WEIGHT ) {
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
2013-01-24 16:10:29 +01:00
data - > weight = * array ;
array + + ;
}
2013-01-24 16:10:35 +01:00
if ( type & PERF_SAMPLE_DATA_SRC ) {
2013-08-27 11:23:04 +03:00
OVERFLOW_CHECK_u64 ( array ) ;
2013-01-24 16:10:35 +01:00
data - > data_src = * array ;
array + + ;
}
2013-09-20 07:40:43 -07:00
if ( type & PERF_SAMPLE_TRANSACTION ) {
2013-11-01 15:51:36 +02:00
OVERFLOW_CHECK_u64 ( array ) ;
2013-09-20 07:40:43 -07:00
data - > transaction = * array ;
array + + ;
}
2014-09-24 13:48:39 +02:00
data - > intr_regs . abi = PERF_SAMPLE_REGS_ABI_NONE ;
if ( type & PERF_SAMPLE_REGS_INTR ) {
OVERFLOW_CHECK_u64 ( array ) ;
data - > intr_regs . abi = * array ;
array + + ;
if ( data - > intr_regs . abi ! = PERF_SAMPLE_REGS_ABI_NONE ) {
u64 mask = evsel - > attr . sample_regs_intr ;
sz = hweight_long ( mask ) * sizeof ( u64 ) ;
OVERFLOW_CHECK ( array , sz , max_size ) ;
data - > intr_regs . mask = mask ;
data - > intr_regs . regs = ( u64 * ) array ;
array = ( void * ) array + sz ;
}
}
2017-08-29 13:11:08 -04:00
data - > phys_addr = 0 ;
if ( type & PERF_SAMPLE_PHYS_ADDR ) {
data - > phys_addr = * array ;
array + + ;
}
2011-01-21 13:46:41 -02:00
return 0 ;
}
2011-11-28 12:03:31 +03:00
2017-08-03 13:10:28 +02:00
int perf_evsel__parse_sample_timestamp ( struct perf_evsel * evsel ,
union perf_event * event ,
u64 * timestamp )
{
u64 type = evsel - > attr . sample_type ;
const u64 * array ;
if ( ! ( type & PERF_SAMPLE_TIME ) )
return - 1 ;
if ( event - > header . type ! = PERF_RECORD_SAMPLE ) {
struct perf_sample data = {
. time = - 1ULL ,
} ;
if ( ! evsel - > attr . sample_id_all )
return - 1 ;
if ( perf_evsel__parse_id_sample ( evsel , event , & data ) )
return - 1 ;
* timestamp = data . time ;
return 0 ;
}
array = event - > sample . array ;
if ( perf_event__check_size ( event , evsel - > sample_size ) )
return - EFAULT ;
if ( type & PERF_SAMPLE_IDENTIFIER )
array + + ;
if ( type & PERF_SAMPLE_IP )
array + + ;
if ( type & PERF_SAMPLE_TID )
array + + ;
if ( type & PERF_SAMPLE_TIME )
* timestamp = * array ;
return 0 ;
}
2013-08-27 11:23:12 +03:00
size_t perf_event__sample_event_size ( const struct perf_sample * sample , u64 type ,
2014-01-07 13:47:25 +01:00
u64 read_format )
2013-08-27 11:23:12 +03:00
{
size_t sz , result = sizeof ( struct sample_event ) ;
if ( type & PERF_SAMPLE_IDENTIFIER )
result + = sizeof ( u64 ) ;
if ( type & PERF_SAMPLE_IP )
result + = sizeof ( u64 ) ;
if ( type & PERF_SAMPLE_TID )
result + = sizeof ( u64 ) ;
if ( type & PERF_SAMPLE_TIME )
result + = sizeof ( u64 ) ;
if ( type & PERF_SAMPLE_ADDR )
result + = sizeof ( u64 ) ;
if ( type & PERF_SAMPLE_ID )
result + = sizeof ( u64 ) ;
if ( type & PERF_SAMPLE_STREAM_ID )
result + = sizeof ( u64 ) ;
if ( type & PERF_SAMPLE_CPU )
result + = sizeof ( u64 ) ;
if ( type & PERF_SAMPLE_PERIOD )
result + = sizeof ( u64 ) ;
if ( type & PERF_SAMPLE_READ ) {
result + = sizeof ( u64 ) ;
if ( read_format & PERF_FORMAT_TOTAL_TIME_ENABLED )
result + = sizeof ( u64 ) ;
if ( read_format & PERF_FORMAT_TOTAL_TIME_RUNNING )
result + = sizeof ( u64 ) ;
/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
if ( read_format & PERF_FORMAT_GROUP ) {
sz = sample - > read . group . nr *
sizeof ( struct sample_read_value ) ;
result + = sz ;
} else {
result + = sizeof ( u64 ) ;
}
}
if ( type & PERF_SAMPLE_CALLCHAIN ) {
sz = ( sample - > callchain - > nr + 1 ) * sizeof ( u64 ) ;
result + = sz ;
}
if ( type & PERF_SAMPLE_RAW ) {
result + = sizeof ( u32 ) ;
result + = sample - > raw_size ;
}
if ( type & PERF_SAMPLE_BRANCH_STACK ) {
sz = sample - > branch_stack - > nr * sizeof ( struct branch_entry ) ;
sz + = sizeof ( u64 ) ;
result + = sz ;
}
if ( type & PERF_SAMPLE_REGS_USER ) {
if ( sample - > user_regs . abi ) {
result + = sizeof ( u64 ) ;
2014-01-07 13:47:25 +01:00
sz = hweight_long ( sample - > user_regs . mask ) * sizeof ( u64 ) ;
2013-08-27 11:23:12 +03:00
result + = sz ;
} else {
result + = sizeof ( u64 ) ;
}
}
if ( type & PERF_SAMPLE_STACK_USER ) {
sz = sample - > user_stack . size ;
result + = sizeof ( u64 ) ;
if ( sz ) {
result + = sz ;
result + = sizeof ( u64 ) ;
}
}
if ( type & PERF_SAMPLE_WEIGHT )
result + = sizeof ( u64 ) ;
if ( type & PERF_SAMPLE_DATA_SRC )
result + = sizeof ( u64 ) ;
2013-11-01 15:51:38 +02:00
if ( type & PERF_SAMPLE_TRANSACTION )
result + = sizeof ( u64 ) ;
2014-09-24 13:48:39 +02:00
if ( type & PERF_SAMPLE_REGS_INTR ) {
if ( sample - > intr_regs . abi ) {
result + = sizeof ( u64 ) ;
sz = hweight_long ( sample - > intr_regs . mask ) * sizeof ( u64 ) ;
result + = sz ;
} else {
result + = sizeof ( u64 ) ;
}
}
2017-08-29 13:11:08 -04:00
if ( type & PERF_SAMPLE_PHYS_ADDR )
result + = sizeof ( u64 ) ;
2013-08-27 11:23:12 +03:00
return result ;
}
2011-11-28 12:03:31 +03:00
int perf_event__synthesize_sample ( union perf_event * event , u64 type ,
2014-01-07 13:47:25 +01:00
u64 read_format ,
2018-01-16 15:14:52 +02:00
const struct perf_sample * sample )
2011-11-28 12:03:31 +03:00
{
u64 * array ;
2013-08-27 11:23:11 +03:00
size_t sz ;
2011-11-28 12:03:31 +03:00
/*
* used for cross - endian analysis . See git commit 65014 ab3
* for why this goofiness is needed .
*/
2012-05-16 08:59:04 +02:00
union u64_swap u ;
2011-11-28 12:03:31 +03:00
array = event - > sample . array ;
2013-08-27 11:23:09 +03:00
if ( type & PERF_SAMPLE_IDENTIFIER ) {
* array = sample - > id ;
array + + ;
}
2011-11-28 12:03:31 +03:00
if ( type & PERF_SAMPLE_IP ) {
2013-08-27 11:23:06 +03:00
* array = sample - > ip ;
2011-11-28 12:03:31 +03:00
array + + ;
}
if ( type & PERF_SAMPLE_TID ) {
u . val32 [ 0 ] = sample - > pid ;
u . val32 [ 1 ] = sample - > tid ;
* array = u . val64 ;
array + + ;
}
if ( type & PERF_SAMPLE_TIME ) {
* array = sample - > time ;
array + + ;
}
if ( type & PERF_SAMPLE_ADDR ) {
* array = sample - > addr ;
array + + ;
}
if ( type & PERF_SAMPLE_ID ) {
* array = sample - > id ;
array + + ;
}
if ( type & PERF_SAMPLE_STREAM_ID ) {
* array = sample - > stream_id ;
array + + ;
}
if ( type & PERF_SAMPLE_CPU ) {
u . val32 [ 0 ] = sample - > cpu ;
2018-01-16 15:14:51 +02:00
u . val32 [ 1 ] = 0 ;
2011-11-28 12:03:31 +03:00
* array = u . val64 ;
array + + ;
}
if ( type & PERF_SAMPLE_PERIOD ) {
* array = sample - > period ;
array + + ;
}
2013-08-27 11:23:11 +03:00
if ( type & PERF_SAMPLE_READ ) {
if ( read_format & PERF_FORMAT_GROUP )
* array = sample - > read . group . nr ;
else
* array = sample - > read . one . value ;
array + + ;
if ( read_format & PERF_FORMAT_TOTAL_TIME_ENABLED ) {
* array = sample - > read . time_enabled ;
array + + ;
}
if ( read_format & PERF_FORMAT_TOTAL_TIME_RUNNING ) {
* array = sample - > read . time_running ;
array + + ;
}
/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
if ( read_format & PERF_FORMAT_GROUP ) {
sz = sample - > read . group . nr *
sizeof ( struct sample_read_value ) ;
memcpy ( array , sample - > read . group . values , sz ) ;
array = ( void * ) array + sz ;
} else {
* array = sample - > read . one . id ;
array + + ;
}
}
if ( type & PERF_SAMPLE_CALLCHAIN ) {
sz = ( sample - > callchain - > nr + 1 ) * sizeof ( u64 ) ;
memcpy ( array , sample - > callchain , sz ) ;
array = ( void * ) array + sz ;
}
if ( type & PERF_SAMPLE_RAW ) {
u . val32 [ 0 ] = sample - > raw_size ;
* array = u . val64 ;
array = ( void * ) array + sizeof ( u32 ) ;
memcpy ( array , sample - > raw_data , sample - > raw_size ) ;
array = ( void * ) array + sample - > raw_size ;
}
if ( type & PERF_SAMPLE_BRANCH_STACK ) {
sz = sample - > branch_stack - > nr * sizeof ( struct branch_entry ) ;
sz + = sizeof ( u64 ) ;
memcpy ( array , sample - > branch_stack , sz ) ;
array = ( void * ) array + sz ;
}
if ( type & PERF_SAMPLE_REGS_USER ) {
if ( sample - > user_regs . abi ) {
* array + + = sample - > user_regs . abi ;
2014-01-07 13:47:25 +01:00
sz = hweight_long ( sample - > user_regs . mask ) * sizeof ( u64 ) ;
2013-08-27 11:23:11 +03:00
memcpy ( array , sample - > user_regs . regs , sz ) ;
array = ( void * ) array + sz ;
} else {
* array + + = 0 ;
}
}
if ( type & PERF_SAMPLE_STACK_USER ) {
sz = sample - > user_stack . size ;
* array + + = sz ;
if ( sz ) {
memcpy ( array , sample - > user_stack . data , sz ) ;
array = ( void * ) array + sz ;
* array + + = sz ;
}
}
if ( type & PERF_SAMPLE_WEIGHT ) {
* array = sample - > weight ;
array + + ;
}
if ( type & PERF_SAMPLE_DATA_SRC ) {
* array = sample - > data_src ;
array + + ;
}
2013-11-01 15:51:38 +02:00
if ( type & PERF_SAMPLE_TRANSACTION ) {
* array = sample - > transaction ;
array + + ;
}
2014-09-24 13:48:39 +02:00
if ( type & PERF_SAMPLE_REGS_INTR ) {
if ( sample - > intr_regs . abi ) {
* array + + = sample - > intr_regs . abi ;
sz = hweight_long ( sample - > intr_regs . mask ) * sizeof ( u64 ) ;
memcpy ( array , sample - > intr_regs . regs , sz ) ;
array = ( void * ) array + sz ;
} else {
* array + + = 0 ;
}
}
2017-08-29 13:11:08 -04:00
if ( type & PERF_SAMPLE_PHYS_ADDR ) {
* array = sample - > phys_addr ;
array + + ;
}
2011-11-28 12:03:31 +03:00
return 0 ;
}
2012-09-11 19:24:23 -03:00
2012-09-18 11:21:50 -03:00
struct format_field * perf_evsel__field ( struct perf_evsel * evsel , const char * name )
{
return pevent_find_field ( evsel - > tp_format , name ) ;
}
2012-09-26 20:22:00 -03:00
void * perf_evsel__rawptr ( struct perf_evsel * evsel , struct perf_sample * sample ,
2012-09-11 19:24:23 -03:00
const char * name )
{
2012-09-18 11:21:50 -03:00
struct format_field * field = perf_evsel__field ( evsel , name ) ;
2012-09-11 19:24:23 -03:00
int offset ;
2012-09-18 11:21:50 -03:00
if ( ! field )
return NULL ;
2012-09-11 19:24:23 -03:00
offset = field - > offset ;
if ( field - > flags & FIELD_IS_DYNAMIC ) {
offset = * ( int * ) ( sample - > raw_data + field - > offset ) ;
offset & = 0xffff ;
}
return sample - > raw_data + offset ;
}
2016-05-31 12:47:46 -03:00
u64 format_field__intval ( struct format_field * field , struct perf_sample * sample ,
bool needs_swap )
2012-09-11 19:24:23 -03:00
{
2012-09-26 13:13:04 -03:00
u64 value ;
2016-05-31 12:47:46 -03:00
void * ptr = sample - > raw_data + field - > offset ;
2012-09-11 19:24:23 -03:00
2012-09-26 13:13:04 -03:00
switch ( field - > size ) {
case 1 :
return * ( u8 * ) ptr ;
case 2 :
value = * ( u16 * ) ptr ;
break ;
case 4 :
value = * ( u32 * ) ptr ;
break ;
case 8 :
perf timechart: Fix SIBGUS error on sparc64
perf timechart -T on sparc64 is terminating due to SIGBUS. Backtrace:
Program received signal SIGBUS, Bus error.
0x0000000000173d7c in perf_evsel__intval (evsel=<value optimized out>, sample=0x7feffffda28, name=0x289b28 "prev_state")
at util/evsel.c:1918
1918 util/evsel.c: No such file or directory.
in util/evsel.c
Missing separate debuginfos, use: debuginfo-install audit-libs-2.3.7-1.0.1.el6.sparc64 bzip2-libs-1.0.5-7.el6_0.sparc64 elfutils-libelf-0.155-2.0.3.el6.sparc64 elfutils-libs-0.155-2.0.3.el6.sparc64 glibc-2.12-1.132.0.8.el6_5.sparc64 numactl-2.0.7-8.el6.sparc64 python-libs-2.6.6-52.0.2.el6.sparc64 slang-2.2.1-1.el6.sparc64 xz-libs-4.999.9-0.3.beta.20091007git.el6.sparc64 zlib-1.2.3-29.el6.sparc64
(gdb) bt
0 0x0000000000173d7c in perf_evsel__intval (evsel=<value optimized out>, sample=0x7feffffda28,
name=0x289b28 "prev_state") at util/evsel.c:1918
1 0x0000000000123b94 in process_sample_sched_switch (tchart=0x7feffffe040, evsel=0x4ca850, sample=0x7feffffda28,
backtrace=0xc39010 "") at builtin-timechart.c:627
2 0x0000000000122828 in process_sample_event (tool=0x7feffffe040, event=<value optimized out>, sample=0x7feffffda28,
evsel=0x4ca850, machine=0x4c9c88) at builtin-timechart.c:569
Another extended load on unaligned pointer. As before fix by copying to
a temporary variable using memcpy.
Signed-off-by: David Ahern <david.ahern@oracle.com>
Link: http://lkml.kernel.org/r/1427228049-51893-1-git-send-email-david.ahern@oracle.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2015-03-24 16:14:09 -04:00
memcpy ( & value , ptr , sizeof ( u64 ) ) ;
2012-09-26 13:13:04 -03:00
break ;
default :
return 0 ;
}
2016-05-31 12:47:46 -03:00
if ( ! needs_swap )
2012-09-26 13:13:04 -03:00
return value ;
switch ( field - > size ) {
case 2 :
return bswap_16 ( value ) ;
case 4 :
return bswap_32 ( value ) ;
case 8 :
return bswap_64 ( value ) ;
default :
return 0 ;
}
return 0 ;
2012-09-11 19:24:23 -03:00
}
2012-12-10 18:17:08 -03:00
2016-05-31 12:47:46 -03:00
u64 perf_evsel__intval ( struct perf_evsel * evsel , struct perf_sample * sample ,
const char * name )
{
struct format_field * field = perf_evsel__field ( evsel , name ) ;
if ( ! field )
return 0 ;
return field ? format_field__intval ( field , sample , evsel - > needs_swap ) : 0 ;
}
2012-12-13 14:16:30 -03:00
bool perf_evsel__fallback ( struct perf_evsel * evsel , int err ,
char * msg , size_t msgsize )
{
perf evsel: Handle EACCESS + perf_event_paranoid=2 in fallback()
Now with the default for the kernel.perf_event_paranoid sysctl being 2 [1]
we need to fall back to :u, i.e. to set perf_event_attr.exclude_kernel
to 1.
Before:
[acme@jouet linux]$ perf record usleep 1
Error:
You may not have permission to collect stats.
Consider tweaking /proc/sys/kernel/perf_event_paranoid,
which controls use of the performance events system by
unprivileged users (without CAP_SYS_ADMIN).
The current value is 2:
-1: Allow use of (almost) all events by all users
>= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
>= 1: Disallow CPU event access by users without CAP_SYS_ADMIN
>= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN
[acme@jouet linux]$
After:
[acme@jouet linux]$ perf record usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.016 MB perf.data (7 samples) ]
[acme@jouet linux]$ perf evlist
cycles:u
[acme@jouet linux]$ perf evlist -v
cycles:u: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1
[acme@jouet linux]$
And if the user turns on verbose mode, an explanation will appear:
[acme@jouet linux]$ perf record -v usleep 1
Warning:
kernel.perf_event_paranoid=2, trying to fall back to excluding kernel samples
mmap size 528384B
[ perf record: Woken up 1 times to write data ]
Looking at the vmlinux_path (8 entries long)
Using /lib/modules/4.6.0-rc7+/build/vmlinux for symbols
[ perf record: Captured and wrote 0.016 MB perf.data (7 samples) ]
[acme@jouet linux]$
[1] 0161028b7c8a ("perf/core: Change the default paranoia level to 2")
Reported-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-b20jmx4dxt5hpaa9t2rroi0o@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-05-12 16:07:47 -03:00
int paranoid ;
2013-07-18 17:27:59 -06:00
if ( ( err = = ENOENT | | err = = ENXIO | | err = = ENODEV ) & &
2012-12-13 14:16:30 -03:00
evsel - > attr . type = = PERF_TYPE_HARDWARE & &
evsel - > attr . config = = PERF_COUNT_HW_CPU_CYCLES ) {
/*
* If it ' s cycles then fall back to hrtimer based
* cpu - clock - tick sw counter , which is always available even if
* no PMU support .
*
* PPC returns ENXIO until 2.6 .37 ( behavior changed with commit
* b0a873e ) .
*/
scnprintf ( msg , msgsize , " %s " ,
" The cycles event is not supported, trying to fall back to cpu-clock-ticks " ) ;
evsel - > attr . type = PERF_TYPE_SOFTWARE ;
evsel - > attr . config = PERF_COUNT_SW_CPU_CLOCK ;
2013-12-26 17:41:15 -03:00
zfree ( & evsel - > name ) ;
perf evsel: Handle EACCESS + perf_event_paranoid=2 in fallback()
Now with the default for the kernel.perf_event_paranoid sysctl being 2 [1]
we need to fall back to :u, i.e. to set perf_event_attr.exclude_kernel
to 1.
Before:
[acme@jouet linux]$ perf record usleep 1
Error:
You may not have permission to collect stats.
Consider tweaking /proc/sys/kernel/perf_event_paranoid,
which controls use of the performance events system by
unprivileged users (without CAP_SYS_ADMIN).
The current value is 2:
-1: Allow use of (almost) all events by all users
>= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
>= 1: Disallow CPU event access by users without CAP_SYS_ADMIN
>= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN
[acme@jouet linux]$
After:
[acme@jouet linux]$ perf record usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.016 MB perf.data (7 samples) ]
[acme@jouet linux]$ perf evlist
cycles:u
[acme@jouet linux]$ perf evlist -v
cycles:u: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1
[acme@jouet linux]$
And if the user turns on verbose mode, an explanation will appear:
[acme@jouet linux]$ perf record -v usleep 1
Warning:
kernel.perf_event_paranoid=2, trying to fall back to excluding kernel samples
mmap size 528384B
[ perf record: Woken up 1 times to write data ]
Looking at the vmlinux_path (8 entries long)
Using /lib/modules/4.6.0-rc7+/build/vmlinux for symbols
[ perf record: Captured and wrote 0.016 MB perf.data (7 samples) ]
[acme@jouet linux]$
[1] 0161028b7c8a ("perf/core: Change the default paranoia level to 2")
Reported-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-b20jmx4dxt5hpaa9t2rroi0o@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-05-12 16:07:47 -03:00
return true ;
} else if ( err = = EACCES & & ! evsel - > attr . exclude_kernel & &
( paranoid = perf_event_paranoid ( ) ) > 1 ) {
const char * name = perf_evsel__name ( evsel ) ;
char * new_name ;
if ( asprintf ( & new_name , " %s%su " , name , strchr ( name , ' : ' ) ? " " : " : " ) < 0 )
return false ;
if ( evsel - > name )
free ( evsel - > name ) ;
evsel - > name = new_name ;
scnprintf ( msg , msgsize ,
" kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples " , paranoid ) ;
evsel - > attr . exclude_kernel = 1 ;
2012-12-13 14:16:30 -03:00
return true ;
}
return false ;
}
2012-12-13 15:10:58 -03:00
2017-06-20 12:05:38 -03:00
static bool find_process ( const char * name )
{
size_t len = strlen ( name ) ;
DIR * dir ;
struct dirent * d ;
int ret = - 1 ;
dir = opendir ( procfs__mountpoint ( ) ) ;
if ( ! dir )
return false ;
/* Walk through the directory. */
while ( ret & & ( d = readdir ( dir ) ) ! = NULL ) {
char path [ PATH_MAX ] ;
char * data ;
size_t size ;
if ( ( d - > d_type ! = DT_DIR ) | |
! strcmp ( " . " , d - > d_name ) | |
! strcmp ( " .. " , d - > d_name ) )
continue ;
scnprintf ( path , sizeof ( path ) , " %s/%s/comm " ,
procfs__mountpoint ( ) , d - > d_name ) ;
if ( filename__read_str ( path , & data , & size ) )
continue ;
ret = strncmp ( name , data , len ) ;
free ( data ) ;
}
closedir ( dir ) ;
return ret ? false : true ;
}
2013-11-12 16:46:16 -03:00
int perf_evsel__open_strerror ( struct perf_evsel * evsel , struct target * target ,
2012-12-13 15:10:58 -03:00
int err , char * msg , size_t size )
{
2014-08-14 02:22:36 +00:00
char sbuf [ STRERR_BUFSIZE ] ;
perf evsel: Return exact sub event which failed with EPERM for wildcards
The kernel has a special check for a specific irq_vectors trace event.
TRACE_EVENT_PERF_PERM(irq_work_exit,
is_sampling_event(p_event) ? -EPERM : 0);
The perf-record fails for this irq_vectors event when it is present,
like when using a wildcard:
root@skl:/tmp# perf record -a -e irq_vectors:* sleep 2
Error:
You may not have permission to collect system-wide stats.
Consider tweaking /proc/sys/kernel/perf_event_paranoid,
which controls use of the performance events system by
unprivileged users (without CAP_SYS_ADMIN).
The current value is 2:
-1: Allow use of (almost) all events by all users
>= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
>= 1: Disallow CPU event access by users without CAP_SYS_ADMIN
>= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN
To make this setting permanent, edit /etc/sysctl.conf too, e.g.:
kernel.perf_event_paranoid = -1
This patch prints out the exact sub event that failed with EPERM for
wildcards to help in understanding what went wrong when this event is
present:
After the patch:
root@skl:/tmp# perf record -a -e irq_vectors:* sleep 2
Error:
No permission to enable irq_vectors:irq_work_exit event.
You may not have permission to collect system-wide stats.
......
Committer notes:
So we have a lot of irq_vectors events:
[root@jouet ~]# perf list irq_vectors:*
List of pre-defined events (to be used in -e):
irq_vectors:call_function_entry [Tracepoint event]
irq_vectors:call_function_exit [Tracepoint event]
irq_vectors:call_function_single_entry [Tracepoint event]
irq_vectors:call_function_single_exit [Tracepoint event]
irq_vectors:deferred_error_apic_entry [Tracepoint event]
irq_vectors:deferred_error_apic_exit [Tracepoint event]
irq_vectors:error_apic_entry [Tracepoint event]
irq_vectors:error_apic_exit [Tracepoint event]
irq_vectors:irq_work_entry [Tracepoint event]
irq_vectors:irq_work_exit [Tracepoint event]
irq_vectors:local_timer_entry [Tracepoint event]
irq_vectors:local_timer_exit [Tracepoint event]
irq_vectors:reschedule_entry [Tracepoint event]
irq_vectors:reschedule_exit [Tracepoint event]
irq_vectors:spurious_apic_entry [Tracepoint event]
irq_vectors:spurious_apic_exit [Tracepoint event]
irq_vectors:thermal_apic_entry [Tracepoint event]
irq_vectors:thermal_apic_exit [Tracepoint event]
irq_vectors:threshold_apic_entry [Tracepoint event]
irq_vectors:threshold_apic_exit [Tracepoint event]
irq_vectors:x86_platform_ipi_entry [Tracepoint event]
irq_vectors:x86_platform_ipi_exit [Tracepoint event]
#
And some may be sampled:
[root@jouet ~]# perf record -e irq_vectors:local* sleep 20s
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.020 MB perf.data (2 samples) ]
[root@jouet ~]# perf report -D | egrep 'stats:|events:'
Aggregated stats:
TOTAL events: 155
MMAP events: 144
COMM events: 2
EXIT events: 1
SAMPLE events: 2
MMAP2 events: 4
FINISHED_ROUND events: 1
TIME_CONV events: 1
irq_vectors:local_timer_entry stats:
TOTAL events: 1
SAMPLE events: 1
irq_vectors:local_timer_exit stats:
TOTAL events: 1
SAMPLE events: 1
[root@jouet ~]#
But, as shown in the tracepoint definition at the start of this message,
some, like "irq_vectors:irq_work_exit", may not be sampled, just counted,
i.e. if we try to sample, as when using 'perf record', we get an error:
[root@jouet ~]# perf record -e irq_vectors:irq_work_exit
Error:
You may not have permission to collect system-wide stats.
Consider tweaking /proc/sys/kernel/perf_event_paranoid,
<SNIP>
The error message is misleading, this patch will help in pointing out
what is the event causing such an error, but the error message needs
improvement, i.e. we need to figure out a way to check if a tracepoint
is counting only, like this one, when all we can do is to count it with
'perf stat', at most printing the delta using interval printing, as in:
[root@jouet ~]# perf stat -I 5000 -e irq_vectors:irq_work_*
# time counts unit events
5.000168871 0 irq_vectors:irq_work_entry
5.000168871 0 irq_vectors:irq_work_exit
10.000676730 0 irq_vectors:irq_work_entry
10.000676730 0 irq_vectors:irq_work_exit
15.001122415 0 irq_vectors:irq_work_entry
15.001122415 0 irq_vectors:irq_work_exit
20.001298051 0 irq_vectors:irq_work_entry
20.001298051 0 irq_vectors:irq_work_exit
25.001485020 1 irq_vectors:irq_work_entry
25.001485020 1 irq_vectors:irq_work_exit
30.001658706 0 irq_vectors:irq_work_entry
30.001658706 0 irq_vectors:irq_work_exit
^C 32.045711878 0 irq_vectors:irq_work_entry
32.045711878 0 irq_vectors:irq_work_exit
[root@jouet ~]#
But at least, when we use a wildcard, this patch helps a bit.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1491566932-503-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-04-07 20:08:52 +08:00
int printed = 0 ;
2014-08-14 02:22:36 +00:00
2012-12-13 15:10:58 -03:00
switch ( err ) {
case EPERM :
case EACCES :
perf evsel: Return exact sub event which failed with EPERM for wildcards
The kernel has a special check for a specific irq_vectors trace event.
TRACE_EVENT_PERF_PERM(irq_work_exit,
is_sampling_event(p_event) ? -EPERM : 0);
The perf-record fails for this irq_vectors event when it is present,
like when using a wildcard:
root@skl:/tmp# perf record -a -e irq_vectors:* sleep 2
Error:
You may not have permission to collect system-wide stats.
Consider tweaking /proc/sys/kernel/perf_event_paranoid,
which controls use of the performance events system by
unprivileged users (without CAP_SYS_ADMIN).
The current value is 2:
-1: Allow use of (almost) all events by all users
>= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
>= 1: Disallow CPU event access by users without CAP_SYS_ADMIN
>= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN
To make this setting permanent, edit /etc/sysctl.conf too, e.g.:
kernel.perf_event_paranoid = -1
This patch prints out the exact sub event that failed with EPERM for
wildcards to help in understanding what went wrong when this event is
present:
After the patch:
root@skl:/tmp# perf record -a -e irq_vectors:* sleep 2
Error:
No permission to enable irq_vectors:irq_work_exit event.
You may not have permission to collect system-wide stats.
......
Committer notes:
So we have a lot of irq_vectors events:
[root@jouet ~]# perf list irq_vectors:*
List of pre-defined events (to be used in -e):
irq_vectors:call_function_entry [Tracepoint event]
irq_vectors:call_function_exit [Tracepoint event]
irq_vectors:call_function_single_entry [Tracepoint event]
irq_vectors:call_function_single_exit [Tracepoint event]
irq_vectors:deferred_error_apic_entry [Tracepoint event]
irq_vectors:deferred_error_apic_exit [Tracepoint event]
irq_vectors:error_apic_entry [Tracepoint event]
irq_vectors:error_apic_exit [Tracepoint event]
irq_vectors:irq_work_entry [Tracepoint event]
irq_vectors:irq_work_exit [Tracepoint event]
irq_vectors:local_timer_entry [Tracepoint event]
irq_vectors:local_timer_exit [Tracepoint event]
irq_vectors:reschedule_entry [Tracepoint event]
irq_vectors:reschedule_exit [Tracepoint event]
irq_vectors:spurious_apic_entry [Tracepoint event]
irq_vectors:spurious_apic_exit [Tracepoint event]
irq_vectors:thermal_apic_entry [Tracepoint event]
irq_vectors:thermal_apic_exit [Tracepoint event]
irq_vectors:threshold_apic_entry [Tracepoint event]
irq_vectors:threshold_apic_exit [Tracepoint event]
irq_vectors:x86_platform_ipi_entry [Tracepoint event]
irq_vectors:x86_platform_ipi_exit [Tracepoint event]
#
And some may be sampled:
[root@jouet ~]# perf record -e irq_vectors:local* sleep 20s
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.020 MB perf.data (2 samples) ]
[root@jouet ~]# perf report -D | egrep 'stats:|events:'
Aggregated stats:
TOTAL events: 155
MMAP events: 144
COMM events: 2
EXIT events: 1
SAMPLE events: 2
MMAP2 events: 4
FINISHED_ROUND events: 1
TIME_CONV events: 1
irq_vectors:local_timer_entry stats:
TOTAL events: 1
SAMPLE events: 1
irq_vectors:local_timer_exit stats:
TOTAL events: 1
SAMPLE events: 1
[root@jouet ~]#
But, as shown in the tracepoint definition at the start of this message,
some, like "irq_vectors:irq_work_exit", may not be sampled, just counted,
i.e. if we try to sample, as when using 'perf record', we get an error:
[root@jouet ~]# perf record -e irq_vectors:irq_work_exit
Error:
You may not have permission to collect system-wide stats.
Consider tweaking /proc/sys/kernel/perf_event_paranoid,
<SNIP>
The error message is misleading, this patch will help in pointing out
what is the event causing such an error, but the error message needs
improvement, i.e. we need to figure out a way to check if a tracepoint
is counting only, like this one, when all we can do is to count it with
'perf stat', at most printing the delta using interval printing, as in:
[root@jouet ~]# perf stat -I 5000 -e irq_vectors:irq_work_*
# time counts unit events
5.000168871 0 irq_vectors:irq_work_entry
5.000168871 0 irq_vectors:irq_work_exit
10.000676730 0 irq_vectors:irq_work_entry
10.000676730 0 irq_vectors:irq_work_exit
15.001122415 0 irq_vectors:irq_work_entry
15.001122415 0 irq_vectors:irq_work_exit
20.001298051 0 irq_vectors:irq_work_entry
20.001298051 0 irq_vectors:irq_work_exit
25.001485020 1 irq_vectors:irq_work_entry
25.001485020 1 irq_vectors:irq_work_exit
30.001658706 0 irq_vectors:irq_work_entry
30.001658706 0 irq_vectors:irq_work_exit
^C 32.045711878 0 irq_vectors:irq_work_entry
32.045711878 0 irq_vectors:irq_work_exit
[root@jouet ~]#
But at least, when we use a wildcard, this patch helps a bit.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1491566932-503-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-04-07 20:08:52 +08:00
if ( err = = EPERM )
printed = scnprintf ( msg , size ,
" No permission to enable %s event. \n \n " ,
perf_evsel__name ( evsel ) ) ;
return scnprintf ( msg + printed , size - printed ,
2016-01-19 21:35:15 +00:00
" You may not have permission to collect %sstats. \n \n "
" Consider tweaking /proc/sys/kernel/perf_event_paranoid, \n "
" which controls use of the performance events system by \n "
" unprivileged users (without CAP_SYS_ADMIN). \n \n "
2016-05-12 15:44:55 -03:00
" The current value is %d: \n \n "
2016-01-19 21:35:15 +00:00
" -1: Allow use of (almost) all events by all users \n "
2017-08-20 14:39:20 +03:00
" Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK \n "
" >= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN \n "
" Disallow raw tracepoint access by users without CAP_SYS_ADMIN \n "
2016-01-19 21:35:15 +00:00
" >= 1: Disallow CPU event access by users without CAP_SYS_ADMIN \n "
2017-02-13 16:45:24 -03:00
" >= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN \n \n "
" To make this setting permanent, edit /etc/sysctl.conf too, e.g.: \n \n "
" kernel.perf_event_paranoid = -1 \n " ,
2016-05-12 15:44:55 -03:00
target - > system_wide ? " system-wide " : " " ,
perf_event_paranoid ( ) ) ;
2012-12-13 15:10:58 -03:00
case ENOENT :
return scnprintf ( msg , size , " The %s event is not supported. " ,
perf_evsel__name ( evsel ) ) ;
case EMFILE :
return scnprintf ( msg , size , " %s " ,
" Too many events are opened. \n "
2015-05-25 22:51:54 +02:00
" Probably the maximum number of open file descriptors has been reached. \n "
" Hint: Try again after reducing the number of events. \n "
" Hint: Try increasing the limit with 'ulimit -n <limit>' " ) ;
2016-04-27 17:51:45 -03:00
case ENOMEM :
if ( ( evsel - > attr . sample_type & PERF_SAMPLE_CALLCHAIN ) ! = 0 & &
access ( " /proc/sys/kernel/perf_event_max_stack " , F_OK ) = = 0 )
return scnprintf ( msg , size ,
" Not enough memory to setup event with callchain. \n "
" Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack \n "
" Hint: Current value: %d " , sysctl_perf_event_max_stack ) ;
break ;
2012-12-13 15:10:58 -03:00
case ENODEV :
if ( target - > cpu_list )
return scnprintf ( msg , size , " %s " ,
2016-04-27 17:56:53 -03:00
" No such device - did you specify an out-of-range profile CPU? " ) ;
2012-12-13 15:10:58 -03:00
break ;
case EOPNOTSUPP :
2016-05-09 15:07:39 +05:30
if ( evsel - > attr . sample_period ! = 0 )
2017-11-14 15:04:52 -06:00
return scnprintf ( msg , size ,
" %s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' " ,
perf_evsel__name ( evsel ) ) ;
2012-12-13 15:10:58 -03:00
if ( evsel - > attr . precise_ip )
return scnprintf ( msg , size , " %s " ,
" \' precise \' request may not be supported. Try removing 'p' modifier. " ) ;
# if defined(__i386__) || defined(__x86_64__)
if ( evsel - > attr . type = = PERF_TYPE_HARDWARE )
return scnprintf ( msg , size , " %s " ,
" No hardware sampling interrupt available. \n "
" No APIC? If so then you can boot the kernel with the \" lapic \" boot parameter to force-enable it. " ) ;
# endif
break ;
2014-08-01 17:46:54 +02:00
case EBUSY :
if ( find_process ( " oprofiled " ) )
return scnprintf ( msg , size ,
" The PMU counters are busy/taken by another profiler. \n "
" We found oprofile daemon running, please stop it and try again. " ) ;
break ;
2015-03-31 00:19:31 +02:00
case EINVAL :
2016-07-14 08:34:33 +00:00
if ( evsel - > attr . write_backward & & perf_missing_features . write_backward )
2016-06-20 10:47:18 +00:00
return scnprintf ( msg , size , " Reading from overwrite event is not supported by this kernel. " ) ;
2015-03-31 00:19:31 +02:00
if ( perf_missing_features . clockid )
return scnprintf ( msg , size , " clockid feature not supported. " ) ;
if ( perf_missing_features . clockid_wrong )
return scnprintf ( msg , size , " wrong clockid (%d) . " , clockid) ;
break ;
2012-12-13 15:10:58 -03:00
default :
break ;
}
return scnprintf ( msg , size ,
2014-08-14 02:22:36 +00:00
" The sys_perf_event_open() syscall returned with %d (%s) for event (%s). \n "
2012-12-13 15:10:58 -03:00
" /bin/dmesg may provide additional information. \n "
2016-04-27 17:56:53 -03:00
" No CONFIG_PERF_EVENTS=y kernel support configured? " ,
tools: Introduce str_error_r()
The tools so far have been using the strerror_r() GNU variant, that
returns a string, be it the buffer passed or something else.
But that, besides being tricky in cases where we expect that the
function using strerror_r() returns the error formatted in a provided
buffer (we have to check if it returned something else and copy that
instead), breaks the build on systems not using glibc, like Alpine
Linux, where musl libc is used.
So, introduce yet another wrapper, str_error_r(), that has the GNU
interface, but uses the portable XSI variant of strerror_r(), so that
users rest asured that the provided buffer is used and it is what is
returned.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-d4t42fnf48ytlk8rjxs822tf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-07-06 11:56:20 -03:00
err , str_error_r ( err , sbuf , sizeof ( sbuf ) ) ,
2014-08-14 02:22:36 +00:00
perf_evsel__name ( evsel ) ) ;
2012-12-13 15:10:58 -03:00
}
2016-06-30 11:44:19 +05:30
2017-12-11 12:46:11 -03:00
struct perf_env * perf_evsel__env ( struct perf_evsel * evsel )
perf annotate: Check for fused instructions
Macro fusion merges two instructions to a single micro-op. Intel core
platform performs this hardware optimization under limited
circumstances.
For example, CMP + JCC can be "fused" and executed /retired together.
While with sampling this can result in the sample sometimes being on the
JCC and sometimes on the CMP. So for the fused instruction pair, they
could be considered together.
On Nehalem, fused instruction pairs:
cmp/test + jcc.
On other new CPU:
cmp/test/add/sub/and/inc/dec + jcc.
This patch adds an x86-specific function which checks if 2 instructions
are in a "fused" pair. For non-x86 arch, the function is just NULL.
Changelog:
v4: Move the CPU model checking to symbol__disassemble and save the CPU
family/model in arch structure.
It avoids checking every time when jump arrow printed.
v3: Add checking for Nehalem (CMP, TEST). For other newer Intel CPUs
just check it by default (CMP, TEST, ADD, SUB, AND, INC, DEC).
v2: Remove the original weak function. Arnaldo points out that doing it
as a weak function that will be overridden by the host arch doesn't
work. So now it's implemented as an arch-specific function.
Committer fix:
Do not access evsel->evlist->env->cpuid, ->env can be null, introduce
perf_evsel__env_cpuid(), just like perf_evsel__env_arch(), also used in
this function call.
The original patch was segfaulting 'perf top' + annotation.
But this essentially disables this fused instructions augmentation in
'perf top', the right thing is to get the cpuid from the running kernel,
left for a later patch tho.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1499403995-19857-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-07-07 13:06:34 +08:00
{
2017-12-11 12:46:11 -03:00
if ( evsel & & evsel - > evlist )
return evsel - > evlist - > env ;
perf annotate: Check for fused instructions
Macro fusion merges two instructions to a single micro-op. Intel core
platform performs this hardware optimization under limited
circumstances.
For example, CMP + JCC can be "fused" and executed /retired together.
While with sampling this can result in the sample sometimes being on the
JCC and sometimes on the CMP. So for the fused instruction pair, they
could be considered together.
On Nehalem, fused instruction pairs:
cmp/test + jcc.
On other new CPU:
cmp/test/add/sub/and/inc/dec + jcc.
This patch adds an x86-specific function which checks if 2 instructions
are in a "fused" pair. For non-x86 arch, the function is just NULL.
Changelog:
v4: Move the CPU model checking to symbol__disassemble and save the CPU
family/model in arch structure.
It avoids checking every time when jump arrow printed.
v3: Add checking for Nehalem (CMP, TEST). For other newer Intel CPUs
just check it by default (CMP, TEST, ADD, SUB, AND, INC, DEC).
v2: Remove the original weak function. Arnaldo points out that doing it
as a weak function that will be overridden by the host arch doesn't
work. So now it's implemented as an arch-specific function.
Committer fix:
Do not access evsel->evlist->env->cpuid, ->env can be null, introduce
perf_evsel__env_cpuid(), just like perf_evsel__env_arch(), also used in
this function call.
The original patch was segfaulting 'perf top' + annotation.
But this essentially disables this fused instructions augmentation in
'perf top', the right thing is to get the cpuid from the running kernel,
left for a later patch tho.
Signed-off-by: Yao Jin <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1499403995-19857-2-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2017-07-07 13:06:34 +08:00
return NULL ;
}