From 67d5268908283c187e0a460048a423256c2fb288 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Feb 2016 21:21:12 +0100 Subject: [PATCH 01/23] perf tools: Fix python extension build The util/python-ext-sources file contains source files required to build the python extension relative to $(srctree)/tools/perf, Such a file path $(FILE).c is handed over to the python extension build system, which builds the final object in the $(PYTHON_EXTBUILD)/tmp/$(FILE).o path. After the build is done all files from $(PYTHON_EXTBUILD)lib/ are carried as the result binaries. Above system fails when we add source file relative to ../lib, which we do for: ../lib/bitmap.c ../lib/find_bit.c ../lib/hweight.c ../lib/rbtree.c All above objects will be built like: $(PYTHON_EXTBUILD)/tmp/../lib/bitmap.c $(PYTHON_EXTBUILD)/tmp/../lib/find_bit.c $(PYTHON_EXTBUILD)/tmp/../lib/hweight.c $(PYTHON_EXTBUILD)/tmp/../lib/rbtree.c which accidentally happens to be final library path: $(PYTHON_EXTBUILD)/lib/ Changing setup.py to pass full paths of source files to Extension build class and thus keep all built objects under $(PYTHON_EXTBUILD)tmp directory. Reported-by: Jeff Bastian Signed-off-by: Jiri Olsa Tested-by: Josh Boyer Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org # v4.2+ Link: http://lkml.kernel.org/r/20160227201350.GB28494@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 1833103768cb..c8680984d2d6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') @@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] +# use full paths with source files +ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources) + perf = Extension('perf', sources = ext_sources, include_dirs = ['util/include'], From c42de706dad3f39c1f65e473a1d165ea33f8b6e8 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 26 Feb 2016 22:14:25 +0900 Subject: [PATCH 02/23] perf trace: Check and discard not only 'nr' but also '__syscall_nr' Format fields of a syscall have the first variable '__syscall_nr' or 'nr' that mean the syscall number. But it isn't relevant here so drop it. 'nr' among fields of syscall was renamed '__syscall_nr'. So add exception handling to drop '__syscall_nr' and modify the comment for this excpetion handling. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1456492465-5946-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 26a337f939d8..8dc98c598b1a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1725,8 +1725,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->args = sc->tp_format->format.fields; sc->nr_args = sc->tp_format->format.nr_fields; - /* drop nr field - not relevant here; does not exist on older kernels */ - if (sc->args && strcmp(sc->args->name, "nr") == 0) { + /* + * We need to check and discard the first variable '__syscall_nr' + * or 'nr' that mean the syscall number. It is needless here. + * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. + */ + if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { sc->args = sc->args->next; --sc->nr_args; } From 026842d148b920dc28f0499ede4950dcb098d4d5 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Fri, 26 Feb 2016 13:23:01 -0500 Subject: [PATCH 03/23] tracing/syscalls: Rename "/format" tracepoint field name "nr" to "__syscall_nr: Some tracepoint have multiple fields with the same name, "nr", the first one is a unique syscall ID, the other is a syscall argument: # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_io_getevents/format name: sys_enter_io_getevents ID: 747 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:int nr; offset:8; size:4; signed:1; field:aio_context_t ctx_id; offset:16; size:8; signed:0; field:long min_nr; offset:24; size:8; signed:0; field:long nr; offset:32; size:8; signed:0; field:struct io_event * events; offset:40; size:8; signed:0; field:struct timespec * timeout; offset:48; size:8; signed:0; print fmt: "ctx_id: 0x%08lx, min_nr: 0x%08lx, nr: 0x%08lx, events: 0x%08lx, timeout: 0x%08lx", ((unsigned long)(REC->ctx_id)), ((unsigned long)(REC->min_nr)), ((unsigned long)(REC->nr)), ((unsigned long)(REC->events)), ((unsigned long)(REC->timeout)) # Fix it by renaming the "/format" common tracepoint field "nr" to "__syscall_nr". Signed-off-by: Taeung Song [ Do not rename the struct member, just the '/format' field name ] Signed-off-by: Steven Rostedt Acked-by: Peter Zijlstra Cc: Jiri Olsa Cc: Lai Jiangshan Cc: Namhyung Kim Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160226132301.3ae065a4@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- kernel/trace/trace_syscalls.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 0655afbea83f..d1663083d903 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -186,11 +186,11 @@ print_syscall_exit(struct trace_iterator *iter, int flags, extern char *__bad_type_size(void); -#define SYSCALL_FIELD(type, name) \ - sizeof(type) != sizeof(trace.name) ? \ +#define SYSCALL_FIELD(type, field, name) \ + sizeof(type) != sizeof(trace.field) ? \ __bad_type_size() : \ - #type, #name, offsetof(typeof(trace), name), \ - sizeof(trace.name), is_signed_type(type) + #type, #name, offsetof(typeof(trace), field), \ + sizeof(trace.field), is_signed_type(type) static int __init __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) @@ -261,7 +261,8 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call) int i; int offset = offsetof(typeof(trace), args); - ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); + ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), + FILTER_OTHER); if (ret) return ret; @@ -281,11 +282,12 @@ static int __init syscall_exit_define_fields(struct trace_event_call *call) struct syscall_trace_exit trace; int ret; - ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); + ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr), + FILTER_OTHER); if (ret) return ret; - ret = trace_define_field(call, SYSCALL_FIELD(long, ret), + ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret), FILTER_OTHER); return ret; From a6745330789f25103e67011bcfeec908fcc3b341 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Feb 2016 09:01:28 -0500 Subject: [PATCH 04/23] tools lib traceevent: Split pevent_print_event() into specific functionality functions Currently there's a single function that is used to display a record's data in human readable format. That's pevent_print_event(). Unfortunately, this gives little room for adding other output within the line without updating that function call. I've decided to split that function into 3 parts. pevent_print_event_task() which prints the task comm, pid and the CPU pevent_print_event_time() which outputs the record's timestamp pevent_print_event_data() which outputs the rest of the event data. pevent_print_event() now simply calls these three functions. To save time from doing the search for event from the record's type, I created a new helper function called pevent_find_event_by_record(), which returns the record's event, and this event has to be passed to the above functions. Signed-off-by: Steven Rostedt Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20160229090128.43a56704@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 136 ++++++++++++++++++++++------- tools/lib/traceevent/event-parse.h | 13 +++ 2 files changed, 117 insertions(+), 32 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 575e75174087..9a1e48afcf89 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5339,19 +5339,74 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock) return false; } -void pevent_print_event(struct pevent *pevent, struct trace_seq *s, - struct pevent_record *record, bool use_trace_clock) +/** + * pevent_find_event_by_record - return the event from a given record + * @pevent: a handle to the pevent + * @record: The record to get the event from + * + * Returns the associated event for a given record, or NULL if non is + * is found. + */ +struct event_format * +pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record) +{ + int type; + + if (record->size < 0) { + do_warning("ug! negative record size %d", record->size); + return NULL; + } + + type = trace_parse_common_type(pevent, record->data); + + return pevent_find_event(pevent, type); +} + +/** + * pevent_print_event_task - Write the event task comm, pid and CPU + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * + * Writes the tasks comm, pid and CPU to @s. + */ +void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record) +{ + void *data = record->data; + const char *comm; + int pid; + + pid = parse_common_pid(pevent, data); + comm = find_cmdline(pevent, pid); + + if (pevent->latency_format) { + trace_seq_printf(s, "%8.8s-%-5d %3d", + comm, pid, record->cpu); + } else + trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); +} + +/** + * pevent_print_event_time - Write the event timestamp + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * @use_trace_clock: Set to parse according to the @pevent->trace_clock + * + * Writes the timestamp of the record into @s. + */ +void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record, + bool use_trace_clock) { - static const char *spaces = " "; /* 20 spaces */ - struct event_format *event; unsigned long secs; unsigned long usecs; unsigned long nsecs; - const char *comm; - void *data = record->data; - int type; - int pid; - int len; int p; bool use_usec_format; @@ -5362,28 +5417,11 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, nsecs = record->ts - secs * NSECS_PER_SEC; } - if (record->size < 0) { - do_warning("ug! negative record size %d", record->size); - return; - } - - type = trace_parse_common_type(pevent, data); - - event = pevent_find_event(pevent, type); - if (!event) { - do_warning("ug! no event found for type %d", type); - return; - } - - pid = parse_common_pid(pevent, data); - comm = find_cmdline(pevent, pid); - if (pevent->latency_format) { - trace_seq_printf(s, "%8.8s-%-5d %3d", - comm, pid, record->cpu); + trace_seq_printf(s, " %3d", record->cpu); pevent_data_lat_fmt(pevent, s, record); } else - trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); + trace_seq_printf(s, " [%03d]", record->cpu); if (use_usec_format) { if (pevent->flags & PEVENT_NSEC_OUTPUT) { @@ -5394,11 +5432,28 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, p = 6; } - trace_seq_printf(s, " %5lu.%0*lu: %s: ", - secs, p, usecs, event->name); + trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs); } else - trace_seq_printf(s, " %12llu: %s: ", - record->ts, event->name); + trace_seq_printf(s, " %12llu:", record->ts); +} + +/** + * pevent_print_event_data - Write the event data section + * @pevent: a handle to the pevent + * @s: the trace_seq to write to + * @event: the handle to the record's event + * @record: The record to get the event from + * + * Writes the parsing of the record's data to @s. + */ +void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record) +{ + static const char *spaces = " "; /* 20 spaces */ + int len; + + trace_seq_printf(s, " %s: ", event->name); /* Space out the event names evenly. */ len = strlen(event->name); @@ -5408,6 +5463,23 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, pevent_event_info(s, event, record); } +void pevent_print_event(struct pevent *pevent, struct trace_seq *s, + struct pevent_record *record, bool use_trace_clock) +{ + struct event_format *event; + + event = pevent_find_event_by_record(pevent, record); + if (!event) { + do_warning("ug! no event found for type %d", + trace_parse_common_type(pevent, record->data)); + return; + } + + pevent_print_event_task(pevent, s, event, record); + pevent_print_event_time(pevent, s, event, record, use_trace_clock); + pevent_print_event_data(pevent, s, event, record); +} + static int events_id_cmp(const void *a, const void *b) { struct event_format * const * ea = a; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 706d9bc24066..9ffde377e89d 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -628,6 +628,16 @@ int pevent_register_print_string(struct pevent *pevent, const char *fmt, unsigned long long addr); int pevent_pid_is_registered(struct pevent *pevent, int pid); +void pevent_print_event_task(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record); +void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record, + bool use_trace_clock); +void pevent_print_event_data(struct pevent *pevent, struct trace_seq *s, + struct event_format *event, + struct pevent_record *record); void pevent_print_event(struct pevent *pevent, struct trace_seq *s, struct pevent_record *record, bool use_trace_clock); @@ -694,6 +704,9 @@ struct event_format *pevent_find_event(struct pevent *pevent, int id); struct event_format * pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name); +struct event_format * +pevent_find_event_by_record(struct pevent *pevent, struct pevent_record *record); + void pevent_data_lat_fmt(struct pevent *pevent, struct trace_seq *s, struct pevent_record *record); int pevent_data_type(struct pevent *pevent, struct pevent_record *rec); From f9a5978ac4ede901fa73d7c28ae1c5d89bc2a46a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Mar 2016 10:53:48 +0100 Subject: [PATCH 05/23] perf tools: Fix locale handling in pmu parsing Ingo reported regression on display format of big numbers, which is missing separators (in default perf stat output). triton:~/tip> perf stat -a sleep 1 ... 127008602 cycles # 0.011 GHz 279538533 stalled-cycles-frontend # 220.09% frontend cycles idle 119213269 instructions # 0.94 insn per cycle This is caused by recent change: perf stat: Check existence of frontend/backed stalled cycles that added call to pmu_have_event, that subsequently calls perf_pmu__parse_scale, which has a bug in locale handling. The lc string returned from setlocale, that we use to store old locale value, may be allocated in static storage. Getting a dynamic copy to make it survive another setlocale call. $ perf stat ls ... 2,360,602 cycles # 3.080 GHz 2,703,090 instructions # 1.15 insn per cycle 546,031 branches # 712.511 M/sec Committer note: Since the patch introducing the regression didn't made to perf/core, move it to just before where the regression was introduced, so that we don't break bisection for this feature. Reported-by: Ingo Molnar Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20160303095348.GA24511@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ce61f79dbaae..d8cd038baed2 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -123,6 +123,17 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * */ lc = setlocale(LC_NUMERIC, NULL); + /* + * The lc string may be allocated in static storage, + * so get a dynamic copy to make it survive setlocale + * call below. + */ + lc = strdup(lc); + if (!lc) { + ret = -ENOMEM; + goto error; + } + /* * force to C locale to ensure kernel * scale string is converted correctly. @@ -135,6 +146,8 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * /* restore locale */ setlocale(LC_NUMERIC, lc); + free((char *) lc); + ret = 0; error: close(fd); From 9dec4473abe7967c204fe700baf5344ade34e9c8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 26 Feb 2016 16:27:56 -0800 Subject: [PATCH 06/23] perf stat: Check existence of frontend/backed stalled cycles Only put the frontend/backend stalled cycles into the default perf stat events when the CPU actually supports them. This avoids empty columns with --metric-only on newer Intel CPUs. Committer note: Before: $ perf stat ls Performance counter stats for 'ls': 1.080893 task-clock (msec) # 0.619 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 97 page-faults # 0.090 M/sec 3,327,741 cycles # 3.079 GHz stalled-cycles-frontend stalled-cycles-backend 1,609,544 instructions # 0.48 insn per cycle 319,117 branches # 295.235 M/sec 12,246 branch-misses # 3.84% of all branches 0.001746508 seconds time elapsed $ After: $ perf stat ls Performance counter stats for 'ls': 0.693948 task-clock (msec) # 0.662 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 95 page-faults # 0.137 M/sec 1,792,509 cycles # 2.583 GHz 1,599,047 instructions # 0.89 insn per cycle 316,328 branches # 455.838 M/sec 12,453 branch-misses # 3.94% of all branches 0.001048987 seconds time elapsed $ Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1456532881-26621-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8c0bc0fe5179..24f222dd2a8a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1441,7 +1441,7 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) */ static int add_default_attributes(void) { - struct perf_event_attr default_attrs[] = { + struct perf_event_attr default_attrs0[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, @@ -1449,8 +1449,14 @@ static int add_default_attributes(void) { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, +}; + struct perf_event_attr frontend_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, +}; + struct perf_event_attr backend_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, +}; + struct perf_event_attr default_attrs1[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, @@ -1567,7 +1573,19 @@ static int add_default_attributes(void) } if (!evsel_list->nr_entries) { - if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) + return -1; + if (pmu_have_event("cpu", "stalled-cycles-frontend")) { + if (perf_evlist__add_default_attrs(evsel_list, + frontend_attrs) < 0) + return -1; + } + if (pmu_have_event("cpu", "stalled-cycles-backend")) { + if (perf_evlist__add_default_attrs(evsel_list, + backend_attrs) < 0) + return -1; + } + if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; } From 6122d57e9f7c6cb0f0aa276fbd3a12e3af826ef2 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:56 +0000 Subject: [PATCH 07/23] perf data: Support converting data from bpf_perf_event_output() bpf_perf_event_output() outputs data through sample->raw_data. This patch adds support to convert those data into CTF. A python script then can be used to process output data from BPF programs. Test result: # cat ./test_bpf_output_2.c /************************ BEGIN **************************/ #include struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) static u64 (*ktime_get_ns)(void) = (void *)BPF_FUNC_ktime_get_ns; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; static inline int __attribute__((always_inline)) func(void *ctx, int type) { struct { u64 ktime; int type; } __attribute__((packed)) output_data; char error_data[] = "Error: failed to output\n"; int err; output_data.type = type; output_data.ktime = ktime_get_ns(); err = perf_event_output(ctx, &channel, get_smp_processor_id(), &output_data, sizeof(output_data)); if (err) trace_printk(error_data, sizeof(error_data)); return 0; } SEC("func_begin=sys_nanosleep") int func_begin(void *ctx) {return func(ctx, 1);} SEC("func_end=sys_nanosleep%return") int func_end(void *ctx) { return func(ctx, 2);} char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # ./perf record -e bpf-output/no-inherit,name=evt/ \ -e ./test_bpf_output_2.c/map:channel.event=evt/ \ usleep 100000 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data (2 samples) ] # ./perf script usleep 14942 92503.198504: evt: ffffffff810e0ba1 sys_nanosleep (/lib/modules/4.3.0.... usleep 14942 92503.298562: evt: ffffffff810585e9 kretprobe_trampoline_holder (/lib.... # ./perf data convert --to-ctf ./out.ctf [ perf data convert: Converted 'perf.data' into CTF data './out.ctf' ] [ perf data convert: Converted and wrote 0.000 MB (2 samples) ] # babeltrace ./out.ctf [01:41:43.198504134] (+?.?????????) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810E0BA1, perf_tid = 14942, perf_pid = 14942, perf_id = 1044, raw_len = 3, raw_data = [ [0] = 0x32C0C07B, [1] = 0x5421, [2] = 0x1 ] } [01:41:43.298562257] (+0.100058123) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810585E9, perf_tid = 14942, perf_pid = 14942, perf_id = 1044, raw_len = 3, raw_data = [ [0] = 0x38B77FAA, [1] = 0x5421, [2] = 0x2 ] } # cat ./test_bpf_output_2.py from babeltrace import TraceCollection tc = TraceCollection() tc.add_trace('./out.ctf', 'ctf') d = {1:[], 2:[]} for event in tc.events: if not event.name.startswith('evt'): continue raw_data = event['raw_data'] (time, type) = ((raw_data[0] + (raw_data[1] << 32)), raw_data[2]) d[type].append(time) print(list(map(lambda i: d[2][i] - d[1][i], range(len(d[1]))))); # python3 ./test_bpf_output_2.py [100056879] Committer note: Make sure you have python3-devel installed, not python-devel, which may be for python2, which will lead to some "PyInstance_Type" errors. Also make sure that you use the right libbabeltrace, because it is shipped in Fedora, for instance, but an older version. To build libbabeltrace's python binding one also needs to use: ./configure --enable-python-bindings And then set PYTHONPATH=/usr/local/lib64/python3.4/site-packages/. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-9-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-bt.c | 112 +++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 6729f4d9df7c..1f608a6e2c14 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -352,6 +352,84 @@ static int add_tracepoint_values(struct ctf_writer *cw, return ret; } +static int +add_bpf_output_values(struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_sample *sample) +{ + struct bt_ctf_field_type *len_type, *seq_type; + struct bt_ctf_field *len_field, *seq_field; + unsigned int raw_size = sample->raw_size; + unsigned int nr_elements = raw_size / sizeof(u32); + unsigned int i; + int ret; + + if (nr_elements * sizeof(u32) != raw_size) + pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n", + raw_size, nr_elements * sizeof(u32) - raw_size); + + len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len"); + len_field = bt_ctf_field_create(len_type); + if (!len_field) { + pr_err("failed to create 'raw_len' for bpf output event\n"); + ret = -1; + goto put_len_type; + } + + ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements); + if (ret) { + pr_err("failed to set field value for raw_len\n"); + goto put_len_field; + } + ret = bt_ctf_event_set_payload(event, "raw_len", len_field); + if (ret) { + pr_err("failed to set payload to raw_len\n"); + goto put_len_field; + } + + seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data"); + seq_field = bt_ctf_field_create(seq_type); + if (!seq_field) { + pr_err("failed to create 'raw_data' for bpf output event\n"); + ret = -1; + goto put_seq_type; + } + + ret = bt_ctf_field_sequence_set_length(seq_field, len_field); + if (ret) { + pr_err("failed to set length of 'raw_data'\n"); + goto put_seq_field; + } + + for (i = 0; i < nr_elements; i++) { + struct bt_ctf_field *elem_field = + bt_ctf_field_sequence_get_field(seq_field, i); + + ret = bt_ctf_field_unsigned_integer_set_value(elem_field, + ((u32 *)(sample->raw_data))[i]); + + bt_ctf_field_put(elem_field); + if (ret) { + pr_err("failed to set raw_data[%d]\n", i); + goto put_seq_field; + } + } + + ret = bt_ctf_event_set_payload(event, "raw_data", seq_field); + if (ret) + pr_err("failed to set payload for raw_data\n"); + +put_seq_field: + bt_ctf_field_put(seq_field); +put_seq_type: + bt_ctf_field_type_put(seq_type); +put_len_field: + bt_ctf_field_put(len_field); +put_len_type: + bt_ctf_field_type_put(len_type); + return ret; +} + static int add_generic_values(struct ctf_writer *cw, struct bt_ctf_event *event, struct perf_evsel *evsel, @@ -597,6 +675,12 @@ static int process_sample_event(struct perf_tool *tool, return -1; } + if (perf_evsel__is_bpf_output(evsel)) { + ret = add_bpf_output_values(event_class, event, sample); + if (ret) + return -1; + } + cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel)); if (cs) { if (is_flush_needed(cs)) @@ -744,6 +828,25 @@ static int add_tracepoint_types(struct ctf_writer *cw, return ret; } +static int add_bpf_output_types(struct ctf_writer *cw, + struct bt_ctf_event_class *class) +{ + struct bt_ctf_field_type *len_type = cw->data.u32; + struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex; + struct bt_ctf_field_type *seq_type; + int ret; + + ret = bt_ctf_event_class_add_field(class, len_type, "raw_len"); + if (ret) + return ret; + + seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len"); + if (!seq_type) + return -1; + + return bt_ctf_event_class_add_field(class, seq_type, "raw_data"); +} + static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, struct bt_ctf_event_class *event_class) { @@ -755,7 +858,8 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, * ctf event header * PERF_SAMPLE_READ - TODO * PERF_SAMPLE_CALLCHAIN - TODO - * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_RAW - tracepoint fields and BPF output + * are handled separately * PERF_SAMPLE_BRANCH_STACK - TODO * PERF_SAMPLE_REGS_USER - TODO * PERF_SAMPLE_STACK_USER - TODO @@ -824,6 +928,12 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) goto err; } + if (perf_evsel__is_bpf_output(evsel)) { + ret = add_bpf_output_types(cw, event_class); + if (ret) + goto err; + } + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); if (ret) { pr("Failed to add event class into stream.\n"); From f8dd2d5ff953bc498d682ae8022439c940a7d5c4 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:31:57 +0000 Subject: [PATCH 08/23] perf data: Explicitly set byte order for integer types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After babeltrace commit 5cec03e402aa ("ir: copy variants and sequences when setting a field path"), 'perf data convert' gets incorrect result if there's bpf output data. For example: # perf data convert --to-ctf ./out.ctf # babeltrace ./out.ctf [10:44:31.186045346] (+?.?????????) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810E7DD1, perf_tid = 23819, perf_pid = 23819, perf_id = 518, raw_len = 3, raw_data = [ [0] = 0xC028E32F, [1] = 0x815D0100, [2] = 0x1000000 ] } [10:44:31.286101003] (+0.100055657) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF8105B609, perf_tid = 23819, perf_pid = 23819, perf_id = 518, raw_len = 3, raw_data = [ [0] = 0x35D9F1EB, [1] = 0x15D81, [2] = 0x2 ] } The expected result of the first sample should be: raw_data = [ [0] = 0x2FE328C0, [1] = 0x15D81, [2] = 0x1 ] } however, 'perf data convert' output big endian value to resuling CTF file. The reason is a internal change (or a bug?) of babeltrace. Before this patch, at the first add_bpf_output_values(), byte order of all integer type is uncertain (is 0, neither 1234 (le) nor 4321 (be)). It would be fixed by: perf_evlist__deliver_sample -> process_sample_event -> ctf_stream ... ->bt_ctf_trace_add_stream_class ->bt_ctf_field_type_structure_set_byte_order ->bt_ctf_field_type_integer_set_byte_order during creating the stream. However, the babeltrace commit mentioned above duplicates types in sequence to prevent potential conflict in following call stack and link the newly allocated type into the 'raw_data' sequence: perf_evlist__deliver_sample -> process_sample_event -> ctf_stream ... -> bt_ctf_trace_add_stream_class -> bt_ctf_stream_class_resolve_types ... -> bt_ctf_field_type_sequence_copy ->bt_ctf_field_type_integer_copy This happens before byte order setting, so only the newly allocated type is initialized, the byte order of original type perf choose to create the first raw_data is still uncertain. Byte order in CTF output is not related to byte order in perf.data. Setting it to anything other than BT_CTF_BYTE_ORDER_NATIVE solves this problem (only BT_CTF_BYTE_ORDER_NATIVE needs to be fixed). To reduce behavior changing, set byte order according to compiling options. Signed-off-by: Wang Nan Cc: Jeremie Galarneau Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Jiri Olsa Cc: Jérémie Galarneau Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-10-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-bt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 1f608a6e2c14..811af89ce0bb 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1080,6 +1080,12 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) goto err; +#if __BYTE_ORDER == __BIG_ENDIAN + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN); +#else + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN); +#endif + pr2("Created type: INTEGER %d-bit %ssigned %s\n", size, sign ? "un" : "", hex ? "hex" : ""); return type; From d8871ea71281ed689dc3303d1b50eb00c5d06141 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:32:06 +0000 Subject: [PATCH 09/23] perf record: Use WARN_ONCE to replace 'if' condition Commits in a BPF patchkit will extract kernel and module synthesizing code into a separated function and call it multiple times. This patch replace 'if (err < 0)' using WARN_ONCE, makes sure the error message show one time. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-19-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7d11162b6c41..9dec7e529832 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -33,6 +33,7 @@ #include "util/parse-regs-options.h" #include "util/llvm-utils.h" #include "util/bpf-loader.h" +#include "asm/bug.h" #include #include @@ -615,17 +616,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, machine); - if (err < 0) - pr_err("Couldn't record kernel reference relocation symbol\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/kallsyms permission or run as root.\n"); + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); err = perf_event__synthesize_modules(tool, process_synthesized_event, machine); - if (err < 0) - pr_err("Couldn't record kernel module information.\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/modules permission or run as root.\n"); + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); if (perf_guest) { machines__process_guests(&session->machines, From c45c86eb70964615bd13b4c1e647bf9ee60c3db9 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:32:07 +0000 Subject: [PATCH 10/23] perf record: Extract synthesize code to record__synthesize() Create record__synthesize(). It can be used to create tracking events for each perf.data after perf supporting splitting into multiple outputs. Signed-off-by: He Kuang Cc: Alexei Starovoitov Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-20-git-send-email-wangnan0@huawei.com Signed-off-by: Wang Nan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 125 ++++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 55 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9dec7e529832..cb583b49a175 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -486,6 +486,74 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); +static int record__synthesize(struct record *rec) +{ + struct perf_session *session = rec->session; + struct machine *machine = &session->machines.host; + struct perf_data_file *file = &rec->file; + struct record_opts *opts = &rec->opts; + struct perf_tool *tool = &rec->tool; + int fd = perf_data_file__fd(file); + int err = 0; + + if (file->is_pipe) { + err = perf_event__synthesize_attrs(tool, session, + process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + goto out; + } + + if (have_tracepoints(&rec->evlist->entries)) { + /* + * FIXME err <= 0 here actually means that + * there were no tracepoints so its not really + * an error, just that we don't need to + * synthesize anything. We really have to + * return this more properly and also + * propagate errors that now are calling die() + */ + err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, + process_synthesized_event); + if (err <= 0) { + pr_err("Couldn't record tracing data.\n"); + goto out; + } + rec->bytes_written += err; + } + } + + if (rec->opts.full_auxtrace) { + err = perf_event__synthesize_auxtrace_info(rec->itr, tool, + session, process_synthesized_event); + if (err) + goto out; + } + + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); + + err = perf_event__synthesize_modules(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); + + if (perf_guest) { + machines__process_guests(&session->machines, + perf_event__synthesize_guest_os, tool); + } + + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, + process_synthesized_event, opts->sample_address, + opts->proc_map_timeout); +out: + return err; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -580,61 +648,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) machine = &session->machines.host; - if (file->is_pipe) { - err = perf_event__synthesize_attrs(tool, session, - process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize attrs.\n"); - goto out_child; - } - - if (have_tracepoints(&rec->evlist->entries)) { - /* - * FIXME err <= 0 here actually means that - * there were no tracepoints so its not really - * an error, just that we don't need to - * synthesize anything. We really have to - * return this more properly and also - * propagate errors that now are calling die() - */ - err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, - process_synthesized_event); - if (err <= 0) { - pr_err("Couldn't record tracing data.\n"); - goto out_child; - } - rec->bytes_written += err; - } - } - - if (rec->opts.full_auxtrace) { - err = perf_event__synthesize_auxtrace_info(rec->itr, tool, - session, process_synthesized_event); - if (err) - goto out_delete_session; - } - - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine); - WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/kallsyms permission or run as root.\n"); - - err = perf_event__synthesize_modules(tool, process_synthesized_event, - machine); - WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/modules permission or run as root.\n"); - - if (perf_guest) { - machines__process_guests(&session->machines, - perf_event__synthesize_guest_os, tool); - } - - err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, - process_synthesized_event, opts->sample_address, - opts->proc_map_timeout); - if (err != 0) + err = record__synthesize(rec); + if (err < 0) goto out_child; if (rec->realtime_prio) { From e1ab48ba63ee6b2494a67bb60bf99692ecdaca7c Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:32:10 +0000 Subject: [PATCH 11/23] perf record: Introduce record__finish_output() to finish a perf.data Move code for finalizing 'perf.data' to record__finish_output(). It will be used by following commits to split output to multiple files. Signed-off-by: He Kuang Cc: Alexei Starovoitov Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-23-git-send-email-wangnan0@huawei.com Signed-off-by: Wang Nan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index cb583b49a175..46e2772f838e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -468,6 +468,29 @@ static void record__init_features(struct record *rec) perf_header__clear_feat(&session->header, HEADER_STAT); } +static void +record__finish_output(struct record *rec) +{ + struct perf_data_file *file = &rec->file; + int fd = perf_data_file__fd(file); + + if (file->is_pipe) + return; + + rec->session->header.data_size += rec->bytes_written; + file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); + + if (!rec->no_buildid) { + process_buildids(rec); + + if (rec->buildid_all) + dsos__hit_all(rec->session); + } + perf_session__write_header(rec->session, rec->evlist, fd, true); + + return; +} + static volatile int workload_exec_errno; /* @@ -785,18 +808,8 @@ out_child: /* this will be recalculated during process_buildids() */ rec->samples = 0; - if (!err && !file->is_pipe) { - rec->session->header.data_size += rec->bytes_written; - file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); - - if (!rec->no_buildid) { - process_buildids(rec); - - if (rec->buildid_all) - dsos__hit_all(rec->session); - } - perf_session__write_header(rec->session, rec->evlist, fd, true); - } + if (!err) + record__finish_output(rec); if (!err && !quiet) { char samples[128]; From 95c365617aa37878592f2f1c6c64e1abb19f0d4a Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 26 Feb 2016 09:32:17 +0000 Subject: [PATCH 12/23] perf record: Ensure return non-zero rc when mmap fail perf_evlist__mmap_ex() can fail without setting errno (for example, fail in condition checking. In this case all syscall is success). If this happen, record__open() incorrectly returns 0. Force setting rc is a quick way to avoid this problem, or we have to follow all possible code path in perf_evlist__mmap_ex() to make sure there's at least one system call before returning an error. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: He Kuang Cc: Jiri Olsa Cc: Li Zefan Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-30-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 46e2772f838e..515510ecc76a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -324,7 +324,10 @@ try_again: } else { pr_err("failed to mmap with %d (%s)\n", errno, strerror_r(errno, msg, sizeof(msg))); - rc = -errno; + if (errno) + rc = -errno; + else + rc = -EINVAL; } goto out; } From 92a61f6412d3a09d6462252a522fa79c9290f405 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 29 Feb 2016 14:36:21 -0800 Subject: [PATCH 13/23] perf stat: Implement CSV metrics output Now support CSV output for metrics. With the new output callbacks this is relatively straight forward by creating new callbacks. This allows to easily plot metrics from CSV files. The new line callback needs to know the number of fields to skip them correctly Example output before: % perf stat -x, true 0.200687,,task-clock,200687,100.00 0,,context-switches,200687,100.00 0,,cpu-migrations,200687,100.00 40,,page-faults,200687,100.00 730871,,cycles,203601,100.00 551056,,stalled-cycles-frontend,203601,100.00 ,,stalled-cycles-backend,0,100.00 385523,,instructions,203601,100.00 78028,,branches,203601,100.00 3946,,branch-misses,203601,100.00 After: % perf stat -x, true .502457,,task-clock,502457,100.00,0.485,CPUs utilized 0,,context-switches,502457,100.00,0.000,K/sec 0,,cpu-migrations,502457,100.00,0.000,K/sec 45,,page-faults,502457,100.00,0.090,M/sec 644692,,cycles,509102,100.00,1.283,GHz 423470,,stalled-cycles-frontend,509102,100.00,65.69,frontend cycles idle ,,stalled-cycles-backend,0,100.00,,,, 492701,,instructions,509102,100.00,0.76,insn per cycle ,,,,,0.86,stalled cycles per insn 97767,,branches,509102,100.00,194.578,M/sec 4788,,branch-misses,509102,100.00,4.90,of all branches or easier readable $ perf stat -x, -o x.csv true $ column -s, -t x.csv 0.490635 task-clock 490635 100.00 0.489 CPUs utilized 0 context-switches 490635 100.00 0.000 K/sec 0 cpu-migrations 490635 100.00 0.000 K/sec 45 page-faults 490635 100.00 0.092 M/sec 629080 cycles 497698 100.00 1.282 GHz 409498 stalled-cycles-frontend 497698 100.00 65.09 frontend cycles idle stalled-cycles-backend 0 100.00 491424 instructions 497698 100.00 0.78 insn per cycle 0.83 stalled cycles per insn 97278 branches 497698 100.00 198.270 M/sec 4569 branch-misses 497698 100.00 4.70 of all branches Two new fields are added: metric value and metric name. v2: Split out function argument changes v3: Reenable metrics for real. v4: Fix wrong hunk from refactoring. v5: Remove extra "noise" printing (Jiri), but add it to the not counted case. Print empty metrics for not counted. v6: Avoid outputting metric on empty format. v7: Print metric at the end v8: Remove extra run, ena fields v9: Avoid extra new line for unsupported counters Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1456785386-19481-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 73 +++++++++++++++++++++++++++++++++-- tools/perf/util/stat-shadow.c | 2 +- 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 24f222dd2a8a..2ffb8221917a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -739,6 +739,7 @@ struct outstate { FILE *fh; bool newline; const char *prefix; + int nfields; }; #define METRIC_LEN 35 @@ -789,6 +790,43 @@ static void print_metric_std(void *ctx, const char *color, const char *fmt, fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); } +static void new_line_csv(void *ctx) +{ + struct outstate *os = ctx; + int i; + + fputc('\n', os->fh); + if (os->prefix) + fprintf(os->fh, "%s%s", os->prefix, csv_sep); + for (i = 0; i < os->nfields; i++) + fputs(csv_sep, os->fh); +} + +static void print_metric_csv(void *ctx, + const char *color __maybe_unused, + const char *fmt, const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); + return; + } + snprintf(buf, sizeof(buf), fmt, val); + vals = buf; + while (isspace(*vals)) + vals++; + ends = vals; + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + while (isspace(*unit)) + unit++; + fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); +} + static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -860,6 +898,22 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, nl = new_line_std; + if (csv_output) { + static int aggr_fields[] = { + [AGGR_GLOBAL] = 0, + [AGGR_THREAD] = 1, + [AGGR_NONE] = 1, + [AGGR_SOCKET] = 2, + [AGGR_CORE] = 2, + }; + + pm = print_metric_csv; + nl = new_line_csv; + os.nfields = 3; + os.nfields += aggr_fields[stat_config.aggr_mode]; + if (counter->cgrp) + os.nfields++; + } if (run == 0 || ena == 0 || counter->counts->scaled == -1) { aggr_printout(counter, id, nr); @@ -880,7 +934,12 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, fprintf(stat_config.output, "%s%s", csv_sep, counter->cgrp->name); + if (!csv_output) + pm(&os, NULL, NULL, "", 0); + print_noise(counter, noise); print_running(run, ena); + if (csv_output) + pm(&os, NULL, NULL, "", 0); return; } @@ -893,14 +952,20 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, out.new_line = nl; out.ctx = &os; - if (!csv_output) - perf_stat__print_shadow_stats(counter, uval, + if (csv_output) { + print_noise(counter, noise); + print_running(run, ena); + } + + perf_stat__print_shadow_stats(counter, uval, stat_config.aggr_mode == AGGR_GLOBAL ? 0 : cpu_map__id_to_cpu(id), &out); - print_noise(counter, noise); - print_running(run, ena); + if (!csv_output) { + print_noise(counter, noise); + print_running(run, ena); + } } static void print_aggr(char *prefix) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 4d8f18581b9b..367e220e93d5 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -310,8 +310,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); - out->new_line(ctxp); if (total && avg) { + out->new_line(ctxp); ratio = total / avg; print_metric(ctxp, NULL, "%7.2f ", "stalled cycles per insn", From 44d49a6002595ccb95712e86ad2857cd55207602 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 29 Feb 2016 14:36:22 -0800 Subject: [PATCH 14/23] perf stat: Support metrics in --per-core/socket mode Enable metrics printing in --per-core / --per-socket mode. We need to save the shadow metrics in a unique place. Always use the first CPU in the aggregation. Then use the same CPU to retrieve the shadow value later. Example output: % perf stat --per-core -a ./BC1s Performance counter stats for 'system wide': S0-C0 2 2966.020381 task-clock (msec) # 2.004 CPUs utilized (100.00%) S0-C0 2 49 context-switches # 0.017 K/sec (100.00%) S0-C0 2 4 cpu-migrations # 0.001 K/sec (100.00%) S0-C0 2 467 page-faults # 0.157 K/sec S0-C0 2 4,599,061,773 cycles # 1.551 GHz (100.00%) S0-C0 2 9,755,886,883 instructions # 2.12 insn per cycle (100.00%) S0-C0 2 1,906,272,125 branches # 642.704 M/sec (100.00%) S0-C0 2 81,180,867 branch-misses # 4.26% of all branches S0-C1 2 2965.995373 task-clock (msec) # 2.003 CPUs utilized (100.00%) S0-C1 2 62 context-switches # 0.021 K/sec (100.00%) S0-C1 2 8 cpu-migrations # 0.003 K/sec (100.00%) S0-C1 2 281 page-faults # 0.095 K/sec S0-C1 2 6,347,290 cycles # 0.002 GHz (100.00%) S0-C1 2 4,654,156 instructions # 0.73 insn per cycle (100.00%) S0-C1 2 947,121 branches # 0.319 M/sec (100.00%) S0-C1 2 37,322 branch-misses # 3.94% of all branches 1.480409747 seconds time elapsed v2: Rebase to older patches v3: Document shadow cpus. Fix aggr_get_id argument. Fix -A shadows (Jiri) Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1456785386-19481-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 64 ++++++++++++++++++++++++++++++----- tools/perf/util/stat-shadow.c | 7 ++++ 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2ffb8221917a..9b5089c5dffe 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -740,6 +740,8 @@ struct outstate { bool newline; const char *prefix; int nfields; + int id, nr; + struct perf_evsel *evsel; }; #define METRIC_LEN 35 @@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os) { fputc('\n', os->fh); fputs(os->prefix, os->fh); + aggr_printout(os->evsel, os->id, os->nr); if (stat_config.aggr_mode == AGGR_NONE) fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_CORE) - fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_SOCKET) - fprintf(os->fh, " "); fprintf(os->fh, " "); } @@ -798,6 +797,7 @@ static void new_line_csv(void *ctx) fputc('\n', os->fh); if (os->prefix) fprintf(os->fh, "%s%s", os->prefix, csv_sep); + aggr_printout(os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(csv_sep, os->fh); } @@ -855,6 +855,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static int first_shadow_cpu(struct perf_evsel *evsel, int id) +{ + int i; + + if (!aggr_get_id) + return 0; + + if (stat_config.aggr_mode == AGGR_NONE) + return id; + + if (stat_config.aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (aggr_get_id(evsel_list->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -891,7 +913,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, struct perf_stat_output_ctx out; struct outstate os = { .fh = stat_config.output, - .prefix = prefix ? prefix : "" + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, }; print_metric_t pm = print_metric_std; void (*nl)(void *); @@ -958,16 +983,37 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, } perf_stat__print_shadow_stats(counter, uval, - stat_config.aggr_mode == AGGR_GLOBAL ? 0 : - cpu_map__id_to_cpu(id), + first_shadow_cpu(counter, id), &out); - if (!csv_output) { print_noise(counter, noise); print_running(run, ena); } } +static void aggr_update_shadow(void) +{ + int cpu, s2, id, s; + u64 val; + struct perf_evsel *counter; + + for (s = 0; s < aggr_map->nr; s++) { + id = aggr_map->map[s]; + evlist__for_each(evsel_list, counter) { + val = 0; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + s2 = aggr_get_id(evsel_list->cpus, cpu); + if (s2 != id) + continue; + val += perf_counts(counter->counts, cpu, 0)->val; + } + val = val * counter->scale; + perf_stat__update_shadow_stats(counter, &val, + first_shadow_cpu(counter, id)); + } + } +} + static void print_aggr(char *prefix) { FILE *output = stat_config.output; @@ -979,6 +1025,8 @@ static void print_aggr(char *prefix) if (!(aggr_map || aggr_get_id)) return; + aggr_update_shadow(); + for (s = 0; s < aggr_map->nr; s++) { id = aggr_map->map[s]; evlist__for_each(evsel_list, counter) { diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 367e220e93d5..5e2d2e34e1bc 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -14,6 +14,13 @@ enum { #define NUM_CTX CTX_BIT_MAX +/* + * AGGR_GLOBAL: Use CPU 0 + * AGGR_SOCKET: Use first CPU of socket + * AGGR_CORE: Use first CPU of core + * AGGR_NONE: Use matching CPU + * AGGR_THREAD: Not supported? + */ static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; From 676787939ef8ccfcf8039104f766ebe5ebe23924 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 1 Feb 2016 02:59:00 +0900 Subject: [PATCH 15/23] tools build: Use .s extension for preprocessed assembler code The "man gcc" says .i extension represents the file is C source code that should not be preprocessed. Here, .s should be used. For clarification, .c ---(preprocess)---> .i .S ---(preprocess)---> .s Signed-off-by: Masahiro Yamada Acked-by: Jiri Olsa Cc: Aaro Koskinen Cc: Adrian Hunter Cc: Lukas Wunner Link: http://lkml.kernel.org/r/1454263140-19670-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 4a96473b180f..ee566e8bd1cf 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -85,7 +85,7 @@ $(OUTPUT)%.i: %.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_i_c) -$(OUTPUT)%.i: %.S FORCE +$(OUTPUT)%.s: %.S FORCE $(call rule_mkdir) $(call if_changed_dep,cc_i_c) From 979ac257b00c53aacec3eacf86142e8c00bee889 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 1 Mar 2016 23:46:20 +0000 Subject: [PATCH 16/23] perf script: Fix double free on command_line The 'command_line' variable is free'd twice if db_export__branch_types() fails. To avoid this, defer the free'ing of 'command_line' to after this call so that the error return path will just free 'command_line' once. Signed-off-by: Colin Ian King Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: He Kuang Cc: Javi Merino Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1456875980-25606-1-git-send-email-colin.king@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 309d90fa7698..fbd05242b4e5 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1094,8 +1094,6 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } - free(command_line); - set_table_handlers(tables); if (tables->db_export_mode) { @@ -1104,6 +1102,8 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } + free(command_line); + return err; error: Py_Finalize(); From 21a30100453516004905d4d5f0806ebaffa95131 Mon Sep 17 00:00:00 2001 From: "Chaos.Chen" Date: Tue, 9 Feb 2016 15:40:14 -0500 Subject: [PATCH 17/23] tools lib traceevent: Fix time stamp rounding issue When rounding to microseconds, if the timestamp subsecond is between .999999500 and .999999999, it is rounded to .1000000, when it should instead increment the second counter due to the overflow. For example, if the timestamp is 1234.999999501 instead of seeing: 1235.000000 we see: 1234.1000000 Signed-off-by: Chaos.Chen Cc: Andrew Morton Link: http://lkml.kernel.org/r/20160209204236.824426460@goodmis.org [ fixed incrementing "secs" instead of decrementing it ] Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 9a1e48afcf89..ce59f4891fa2 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5429,6 +5429,11 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, p = 9; } else { usecs = (nsecs + 500) / NSECS_PER_USEC; + /* To avoid usecs larger than 1 sec */ + if (usecs >= 1000000) { + usecs -= 1000000; + secs++; + } p = 6; } From 9ec72eafee61f68cd57310a99db129ffb71302f3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 9 Feb 2016 15:40:16 -0500 Subject: [PATCH 18/23] tools lib traceevent: Set int_array fields to NULL if freeing from error Had a bug where on error of parsing __print_array() where the fields are freed after they were allocated, but since they were not set to NULL, the freeing of the arg also tried to free the already freed fields causing a double free. Fix process_hex() while at it. Signed-off-by: Steven Rostedt Cc: Andrew Morton Link: http://lkml.kernel.org/r/20160209204237.188327674@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index ce59f4891fa2..fb790aa757eb 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -2635,6 +2635,7 @@ process_hex(struct event_format *event, struct print_arg *arg, char **tok) free_field: free_arg(arg->hex.field); + arg->hex.field = NULL; out: *tok = NULL; return EVENT_ERROR; @@ -2659,8 +2660,10 @@ process_int_array(struct event_format *event, struct print_arg *arg, char **tok) free_size: free_arg(arg->int_array.count); + arg->int_array.count = NULL; free_field: free_arg(arg->int_array.field); + arg->int_array.field = NULL; out: *tok = NULL; return EVENT_ERROR; From a66673a07e260807f570db8f08a9c1207932c665 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 9 Feb 2016 15:40:17 -0500 Subject: [PATCH 19/23] tools lib traceevent: Fix output of %llu for 64 bit values read on 32 bit machines When a long value is read on 32 bit machines for 64 bit output, the parsing needs to change "%lu" into "%llu", as the value is read natively. Unfortunately, if "%llu" is already there, the code will add another "l" to it and fail to parse it properly. Signed-off-by: Steven Rostedt Cc: Andrew Morton Link: http://lkml.kernel.org/r/20160209204237.337024613@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index fb790aa757eb..865dea55454b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4978,7 +4978,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event break; } } - if (pevent->long_size == 8 && ls && + if (pevent->long_size == 8 && ls == 1 && sizeof(long) != 8) { char *p; From 9b240637eb9b9677a9e9bc2dc568f5e0811e04d6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 2 Mar 2016 09:58:00 -0300 Subject: [PATCH 20/23] perf test: Fix hists related entries That got broken by d3a72fd8187b ("perf report: Fix indentation of dynamic entries in hierarchy"), by using the evlist in setup_sorting() without checking if it is NULL, as done in some 'perf test' entries: $ find tools/ -name "*.c" | xargs grep 'setup_sorting(NULL);' tools/perf/tests/hists_output.c: setup_sorting(NULL); tools/perf/tests/hists_output.c: setup_sorting(NULL); tools/perf/tests/hists_output.c: setup_sorting(NULL); tools/perf/tests/hists_output.c: setup_sorting(NULL); tools/perf/tests/hists_output.c: setup_sorting(NULL); tools/perf/tests/hists_cumulate.c: setup_sorting(NULL); tools/perf/tests/hists_cumulate.c: setup_sorting(NULL); tools/perf/tests/hists_cumulate.c: setup_sorting(NULL); tools/perf/tests/hists_cumulate.c: setup_sorting(NULL); $ Fix it. Before: [root@jouet ~]# perf test 15: Test matching and linking multiple hists : FAILED! 16: Try 'import perf' in python, checking link problems : Ok 17: Test breakpoint overflow signal handler : Ok 18: Test breakpoint overflow sampling : Ok 19: Test number of exit event of a simple workload : Ok 20: Test software clock events have valid period values : Ok 21: Test object code reading : Ok 22: Test sample parsing : Ok 23: Test using a dummy software event to keep tracking : Ok 24: Test parsing with no sample_id_all bit set : Ok 25: Test filtering hist entries : FAILED! 26: Test mmap thread lookup : Ok 27: Test thread mg sharing : Ok 28: Test output sorting of hist entries : FAILED! 29: Test cumulation of child hist entries : FAILED! After the patch the above failed tests complete successfully. Acked-by: Namhyung Kim Cc: David Ahern Cc: Jiri Olsa Cc: Wang Nan Fixes: d3a72fd8187b ("perf report: Fix indentation of dynamic entries in hierarchy") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 5888bfe9a193..4380a2858802 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2635,25 +2635,14 @@ out: return ret; } -int setup_sorting(struct perf_evlist *evlist) +static void evlist__set_hists_nr_sort_keys(struct perf_evlist *evlist) { - int err; - struct hists *hists; struct perf_evsel *evsel; - struct perf_hpp_fmt *fmt; - - err = __setup_sorting(evlist); - if (err < 0) - return err; - - if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add("parent", evlist); - if (err < 0) - return err; - } evlist__for_each(evlist, evsel) { - hists = evsel__hists(evsel); + struct perf_hpp_fmt *fmt; + struct hists *hists = evsel__hists(evsel); + hists->nr_sort_keys = perf_hpp_list.nr_sort_keys; /* @@ -2667,6 +2656,24 @@ int setup_sorting(struct perf_evlist *evlist) hists->nr_sort_keys--; } } +} + +int setup_sorting(struct perf_evlist *evlist) +{ + int err; + + err = __setup_sorting(evlist); + if (err < 0) + return err; + + if (parent_pattern != default_parent_pattern) { + err = sort_dimension__add("parent", evlist); + if (err < 0) + return err; + } + + if (evlist != NULL) + evlist__set_hists_nr_sort_keys(evlist); reset_dimensions(); From e17a0e16ca3a63d1bafbcba313586cf137418f45 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 2 Mar 2016 12:55:22 +0000 Subject: [PATCH 21/23] perf tests: Initialize sa.sa_flags The sa_flags field is not being initialized, so a garbage value is being passed to sigaction. Initialize it to zero. Signed-off-by: Colin Ian King Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1456923322-29697-1-git-send-email-colin.king@canonical.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/rdpmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index 7bb0d13c235f..7945462851a4 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -103,6 +103,7 @@ static int __test__rdpmc(void) sigfillset(&sa.sa_mask); sa.sa_sigaction = segfault_handler; + sa.sa_flags = 0; sigaction(SIGSEGV, &sa, NULL); fd = sys_perf_event_open(&attr, 0, -1, -1, From 1b69317d2dc80bc8e1d005e1a771c4f5bff3dabe Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 2 Mar 2016 13:50:25 +0000 Subject: [PATCH 22/23] tools/power turbostat: fix various build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with gcc 6 we're getting various build warnings that just require some trivial function declaration and call fixes: turbostat.c: In function ‘dump_cstate_pstate_config_info’: turbostat.c:1973:1: warning: type of ‘family’ defaults to ‘int’ dump_cstate_pstate_config_info(family, model) turbostat.c:1973:1: warning: type of ‘model’ defaults to ‘int’ turbostat.c: In function ‘get_tdp’: turbostat.c:2145:8: warning: type of ‘model’ defaults to ‘int’ double get_tdp(model) turbostat.c: In function ‘perf_limit_reasons_probe’: turbostat.c:2259:6: warning: type of ‘family’ defaults to ‘int’ void perf_limit_reasons_probe(family, model) turbostat.c:2259:6: warning: type of ‘model’ defaults to ‘int’ Signed-off-by: Colin Ian King Cc: Matt Fleming Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wbicer8n0s9qe6ql8h9x478e@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/power/x86/turbostat/turbostat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dac7e05a6ac..3fa94e291d16 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1970,7 +1970,7 @@ int has_config_tdp(unsigned int family, unsigned int model) } static void -dump_cstate_pstate_config_info(family, model) +dump_cstate_pstate_config_info(unsigned int family, unsigned int model) { if (!do_nhm_platform_info) return; @@ -2142,7 +2142,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ -double get_tdp(model) +double get_tdp(unsigned int model) { unsigned long long msr; @@ -2256,7 +2256,7 @@ void rapl_probe(unsigned int family, unsigned int model) return; } -void perf_limit_reasons_probe(family, model) +void perf_limit_reasons_probe(unsigned int family, unsigned int model) { if (!genuine_intel) return; @@ -2792,7 +2792,7 @@ void process_cpuid() perf_limit_reasons_probe(family, model); if (debug) - dump_cstate_pstate_config_info(); + dump_cstate_pstate_config_info(family, model); if (has_skl_msrs(family, model)) calculate_tsc_tweak(); From fb4605ba47e772ff9d62d1d54218a832ec8b3e1d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 1 Mar 2016 10:57:52 -0800 Subject: [PATCH 23/23] perf stat: Check for frontend stalled for metrics Add an extra check for frontend stalled in the metrics. This avoids an extra column for the --metric-only case when the CPU does not support frontend stalled. v2: Add separate init function Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1456858672-21594-8-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 + tools/perf/util/stat-shadow.c | 9 ++++++++- tools/perf/util/stat.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9b5089c5dffe..baa82078c148 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1966,6 +1966,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, (const char **) stat_usage, PARSE_OPT_STOP_AT_NON_OPTION); + perf_stat__init_shadow_stats(); if (csv_sep) { csv_output = true; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 5e2d2e34e1bc..b33ffb2af2cf 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -2,6 +2,7 @@ #include "evsel.h" #include "stat.h" #include "color.h" +#include "pmu.h" enum { CTX_BIT_USER = 1 << 0, @@ -35,9 +36,15 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; +static bool have_frontend_stalled; struct stats walltime_nsecs_stats; +void perf_stat__init_shadow_stats(void) +{ + have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); +} + static int evsel_context(struct perf_evsel *evsel) { int ctx = 0; @@ -323,7 +330,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, "%7.2f ", "stalled cycles per insn", ratio); - } else { + } else if (have_frontend_stalled) { print_metric(ctxp, NULL, NULL, "stalled cycles per insn", 0); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f02af68adc04..0150e786ccc7 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -72,6 +72,7 @@ typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, const char *fmt, double val); typedef void (*new_line_t )(void *ctx); +void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, int cpu);