From ccb5597f9ba11b67b8aa8c6f4682675eceee0e21 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Oct 2015 20:06:01 +0200 Subject: [PATCH 01/16] tools lib api fs: No need to use PATH_MAX + 1 Because there's no point, PATH_MAX is big enough. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Don Zickus Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444068369-20978-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/api/fs/fs.c | 2 +- tools/lib/api/fs/tracing_path.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 732dbef588b0..459599d1b6c4 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -69,7 +69,7 @@ static const char * const tracefs__known_mountpoints[] = { struct fs { const char *name; const char * const *mounts; - char path[PATH_MAX + 1]; + char path[PATH_MAX]; bool found; long magic; }; diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 0406a7d5c891..a26bb5ea8283 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -12,9 +12,9 @@ #include "tracing_path.h" -char tracing_mnt[PATH_MAX + 1] = "/sys/kernel/debug"; -char tracing_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing"; -char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; +char tracing_mnt[PATH_MAX] = "/sys/kernel/debug"; +char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing"; +char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events"; static void __tracing_path_set(const char *tracing, const char *mountpoint) From 84422592e58f6f1ea03688fcf92143bbc095fa88 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Oct 2015 20:06:02 +0200 Subject: [PATCH 02/16] perf evlist: Display DATA_SRC sample type bit Adding DATA_SRC bit_name call to display sample_type properly. $ perf evlist -v cpu/mem-loads/pp: ...SNIP... sample_type: IP|TID|TIME|ADDR|CPU|PERIOD|DATA_SRC, ... Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Don Zickus Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444068369-20978-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 58890044d835..0b1c289da36d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1178,7 +1178,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), - bit_name(IDENTIFIER), bit_name(REGS_INTR), + bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), { .name = NULL, } }; #undef bit_name From 5ec4502d774699194952209ff3ebe65d2472e15a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Oct 2015 20:06:03 +0200 Subject: [PATCH 03/16] perf annotate: Fix sizeof_sym_hist overflow issue The annotated_source::sizeof_sym_hist could easily overflow int size, resulting in crash in __symbol__inc_addr_samples. Changing its type int size_t as was probably intended from beginning based on the initialization code in symbol__alloc_hist. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Don Zickus Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444068369-20978-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index e9996092a093..cea323d9ee7e 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -122,7 +122,7 @@ struct annotated_source { struct list_head source; struct source_line *lines; int nr_histograms; - int sizeof_sym_hist; + size_t sizeof_sym_hist; struct cyc_hist *cycles_hist; struct sym_hist histograms[0]; }; From 45cf6c33f95448752dd3d5531388429c3a5012d0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Oct 2015 20:06:04 +0200 Subject: [PATCH 04/16] perf tools: Export perf_event_attr__set_max_precise_ip() It'll be used in following patch. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Don Zickus Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444068369-20978-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- tools/perf/util/evlist.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e7e195d867ea..d1392194a9a9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -205,7 +205,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist) } } -static void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) +void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) { attr->precise_ip = 3; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 66bc9d4c0869..a459fe71b452 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -290,4 +290,6 @@ void perf_evlist__to_front(struct perf_evlist *evlist, void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); + +void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); #endif /* __PERF_EVLIST_H */ From 7f94af7a489fada17d28cc60e8f4409ce216bd6d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Oct 2015 20:06:05 +0200 Subject: [PATCH 05/16] perf tools: Introduce 'P' modifier to request max precision The 'P' will cause the event to get maximum possible detected precise level. Following record: $ perf record -e cycles:P ... will detect maximum precise level for 'cycles' event and use it. Commiter note: Testing it: $ perf record -e cycles:P usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (9 samples) ] $ perf evlist cycles:P $ perf evlist -v cycles:P: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, precise_ip: 2, sample_id_all: 1, mmap2: 1, comm_exec: 1 $ Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444068369-20978-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 1 + tools/perf/util/evsel.c | 3 +++ tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 8 +++++++- tools/perf/util/parse-events.l | 2 +- 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index ad60c6ea1997..79483f40e991 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -30,6 +30,7 @@ counted. The following modifiers exist: G - guest counting (in KVM guests) H - host counting (not in KVM guests) p - precise level + P - use maximum detected precise level S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0b1c289da36d..8be867ccefe9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -882,6 +882,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->clockid = opts->clockid; } + if (evsel->precise_max) + perf_event_attr__set_max_precise_ip(attr); + /* * Apply event specific term settings, * it overloads any global configuration. diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 7906666580da..02a5fed8d924 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -111,6 +111,7 @@ struct perf_evsel { bool system_wide; bool tracking; bool per_pkg; + bool precise_max; /* parse modifier helper */ int exclude_GH; int nr_members; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5d0cfce2599b..3ed8bf175163 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -923,6 +923,7 @@ struct event_modifier { int eG; int eI; int precise; + int precise_max; int exclude_GH; int sample_read; int pinned; @@ -938,6 +939,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int eG = evsel ? evsel->attr.exclude_guest : 0; int eI = evsel ? evsel->attr.exclude_idle : 0; int precise = evsel ? evsel->attr.precise_ip : 0; + int precise_max = 0; int sample_read = 0; int pinned = evsel ? evsel->attr.pinned : 0; @@ -974,6 +976,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, /* use of precise requires exclude_guest */ if (!exclude_GH) eG = 1; + } else if (*str == 'P') { + precise_max = 1; } else if (*str == 'S') { sample_read = 1; } else if (*str == 'D') { @@ -1004,6 +1008,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->eG = eG; mod->eI = eI; mod->precise = precise; + mod->precise_max = precise_max; mod->exclude_GH = exclude_GH; mod->sample_read = sample_read; mod->pinned = pinned; @@ -1020,7 +1025,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppSDI") - 1)) + if (strlen(str) > (sizeof("ukhGHpppPSDI") - 1)) return -1; while (*p) { @@ -1059,6 +1064,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->attr.exclude_idle = mod.eI; evsel->exclude_GH = mod.exclude_GH; evsel->sample_read = mod.sample_read; + evsel->precise_max = mod.precise_max; if (perf_evsel__is_group_leader(evsel)) evsel->attr.pinned = mod.pinned; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index c29832bce496..be244573a02e 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -122,7 +122,7 @@ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?.]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpGHSDI]+ +modifier_event [ukhpPGHSDI]+ modifier_bp [rwx]{1,3} %% From ddd83c9717ef8204f17cc63d6dcb5053d472caee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Oct 2015 20:06:06 +0200 Subject: [PATCH 06/16] perf tests: Add parsing test for 'P' modifier We cant test 'P' modifier gets properly parsed, the functionality test itself is beyond this suite. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Don Zickus Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444068369-20978-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 3a2ebe666192..0648b84a9171 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1259,6 +1259,17 @@ test__checkevent_breakpoint_len_rw_modifier(struct perf_evlist *evlist) return test__checkevent_breakpoint_rw(evlist); } +static int test__checkevent_precise_max_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_SW_TASK_CLOCK == evsel->attr.config); + return 0; +} + static int count_tracepoints(void) { struct dirent *events_ent; @@ -1562,6 +1573,11 @@ static struct evlist_test test__events[] = { .check = test__checkevent_exclude_idle_modifier_1, .id = 46, }, + { + .name = "task-clock:P,cycles", + .check = test__checkevent_precise_max_modifier, + .id = 47, + }, }; static struct evlist_test test__events_pmu[] = { From 28e6db205b3ed3f1d86a00c69b3304190377da5f Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 5 Oct 2015 20:06:07 +0200 Subject: [PATCH 07/16] perf tools: Add support for sorting on the iaddr Sorting on 'symbol' gives to broad a resolution as it can cover a range of IP address. Use the iaddr instead to get proper sorting on IP addresses. Need to use the 'mem_sort' feature of perf record. New sort option is: symbol_iaddr, header label is 'Code Symbol'. $ perf mem report --stdio -F +symbol_iaddr # Overhead Samples Code Symbol Local Weight # ........ ............ ........................ ............ # 54.08% 1 [k] nmi_handle 192 4.51% 1 [k] finish_task_switch 16 3.66% 1 [.] malloc 13 3.10% 1 [.] __strcoll_l 11 Signed-off-by: Don Zickus Cc: David Ahern Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444068369-20978-8-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 37 +++++++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 1 + 3 files changed, 39 insertions(+) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 8c20a8f6b214..a48a2078d288 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -49,6 +49,7 @@ enum hist_column { HISTC_MEM_LVL, HISTC_MEM_SNOOP, HISTC_MEM_DCACHELINE, + HISTC_MEM_IADDR_SYMBOL, HISTC_TRANSACTION, HISTC_CYCLES, HISTC_NR_COLS, /* Last entry */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6b9556d298c9..ee94b728fca4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -654,6 +654,35 @@ static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf, width); } +static int64_t +sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = 0, r = 0; + + if (left->mem_info) + l = left->mem_info->iaddr.addr; + if (right->mem_info) + r = right->mem_info->iaddr.addr; + + return (int64_t)(r - l); +} + +static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + uint64_t addr = 0; + struct map *map = NULL; + struct symbol *sym = NULL; + + if (he->mem_info) { + addr = he->mem_info->iaddr.addr; + map = he->mem_info->iaddr.map; + sym = he->mem_info->iaddr.sym; + } + return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size, + width); +} + static int64_t sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1077,6 +1106,13 @@ struct sort_entry sort_mem_daddr_sym = { .se_width_idx = HISTC_MEM_DADDR_SYMBOL, }; +struct sort_entry sort_mem_iaddr_sym = { + .se_header = "Code Symbol", + .se_cmp = sort__iaddr_cmp, + .se_snprintf = hist_entry__iaddr_snprintf, + .se_width_idx = HISTC_MEM_IADDR_SYMBOL, +}; + struct sort_entry sort_mem_daddr_dso = { .se_header = "Data Object", .se_cmp = sort__dso_daddr_cmp, @@ -1299,6 +1335,7 @@ static struct sort_dimension bstack_sort_dimensions[] = { static struct sort_dimension memory_sort_dimensions[] = { DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym), + DIM(SORT_MEM_IADDR_SYMBOL, "symbol_iaddr", sort_mem_iaddr_sym), DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso), DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked), DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb), diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index c06b75746613..33b3d30e18d3 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -201,6 +201,7 @@ enum sort_type { SORT_MEM_LVL, SORT_MEM_SNOOP, SORT_MEM_DCACHELINE, + SORT_MEM_IADDR_SYMBOL, }; /* From b34b3bf0798633cc248b682f5b4f6509739ce234 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Oct 2015 20:06:08 +0200 Subject: [PATCH 08/16] perf tools: Setup proper width for symbol_iaddr field We need to properly initialize column width for symbol_iaddr field, so all symbols could fit in the column. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Don Zickus Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444068369-20978-9-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index c346b331b892..4fd37d6708cb 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -132,6 +132,18 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); } + + if (h->mem_info->iaddr.sym) { + symlen = (int)h->mem_info->iaddr.sym->namelen + 4 + + unresolved_col_width + 2; + hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, + symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, + symlen); + } + if (h->mem_info->daddr.map) { symlen = dso__name_len(h->mem_info->daddr.map->dso); hists__new_col_len(hists, HISTC_MEM_DADDR_DSO, @@ -143,6 +155,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) } else { symlen = unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); + hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, symlen); hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); } From a1853e2c6f8ed488adcd84fb162c5b3f0b674d9b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Oct 2015 20:06:09 +0200 Subject: [PATCH 09/16] perf tools: Handle -h and -v options Adding handling for '-h' and '-v' options to invoke help and version command respectively. Current behaviour is: $ perf -v Unknown option: -v Usage: perf [--version] [--help] [OPTIONS] COMMAND [ARGS] $ perf -h Unknown option: -h Usage: perf [--version] [--help] [OPTIONS] COMMAND [ARGS] New behaviour: $ perf -h usage: perf [--version] [--help] [OPTIONS] COMMAND [ARGS] The most commonly used perf commands are: annotate Read perf.data (created by perf record) and display annotated code archive Create archive with object files with build-ids found in perf.data file bench General framework for benchmark suites ... $ perf -v perf version 4.3.rc3.gc99e32 Updated man page. Requested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444068369-20978-10-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.txt | 8 ++++++++ tools/perf/perf.c | 14 ++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 2b131776363e..864e37597252 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -27,6 +27,14 @@ OPTIONS Setup buildid cache directory. It has higher priority than buildid.dir config file option. +-v:: +--version:: + Display perf version. + +-h:: +--help:: + Run perf help command. + DESCRIPTION ----------- Performance counters for Linux are a new kernel-based subsystem diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 1fded922bcc8..543713422d14 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -161,6 +161,20 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version")) break; + /* + * Shortcut for '-h' and '-v' options to invoke help + * and version command. + */ + if (!strcmp(cmd, "-h")) { + (*argv)[0] = "--help"; + break; + } + + if (!strcmp(cmd, "-v")) { + (*argv)[0] = "--version"; + break; + } + /* * Check remaining flags. */ From 31b6753f95320260b160935d0e9c0b29f096ab57 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 5 Oct 2015 15:40:19 +0100 Subject: [PATCH 10/16] perf tests: Add arch tests Tests that only make sense for some architectures currently live in the same place as the generic tests. Move out the x86-specific tests into tools/perf/arch/x86/tests and define an 'arch_tests' array, which is the list of tests that only apply to the build architecture. The main idea is to encourage developers to add arch tests to build out perf's test coverage, without dumping everything in tools/perf/tests. Signed-off-by: Matt Fleming Cc: Adrian Hunter Cc: Andi Kleen Cc: Fenghua Yu Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Peter Zijlstra Cc: Vikas Shivappa Cc: Vince Weaver Link: http://lkml.kernel.org/n/tip-p4uc1c15ssbj8xj7ku5slpa6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/Build | 2 +- tools/perf/arch/x86/include/arch-tests.h | 6 +++++ tools/perf/arch/x86/tests/Build | 6 +++-- tools/perf/arch/x86/tests/arch-tests.c | 10 +++++++++ tools/perf/tests/builtin-test.c | 28 +++++++++++++++++------- tools/perf/tests/tests.h | 5 +++++ 6 files changed, 46 insertions(+), 11 deletions(-) create mode 100644 tools/perf/arch/x86/include/arch-tests.h create mode 100644 tools/perf/arch/x86/tests/arch-tests.c diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build index 41bf61da476a..db52fa22d3a1 100644 --- a/tools/perf/arch/x86/Build +++ b/tools/perf/arch/x86/Build @@ -1,2 +1,2 @@ libperf-y += util/ -libperf-$(CONFIG_DWARF_UNWIND) += tests/ +libperf-y += tests/ diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h new file mode 100644 index 000000000000..4bd41d8e1ca4 --- /dev/null +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -0,0 +1,6 @@ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +extern struct test arch_tests[]; + +#endif diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index b30eff9bcc83..d827ef384b33 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -1,2 +1,4 @@ -libperf-y += regs_load.o -libperf-y += dwarf-unwind.o +libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o +libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o + +libperf-y += arch-tests.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c new file mode 100644 index 000000000000..fca9eb9d39a2 --- /dev/null +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -0,0 +1,10 @@ +#include +#include "tests/tests.h" +#include "arch-tests.h" + +struct test arch_tests[] = { + { + .func = NULL, + }, + +}; diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index d9bf51dc8cf5..2b6c1bf13456 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -14,10 +14,13 @@ #include "parse-options.h" #include "symbol.h" -static struct test { - const char *desc; - int (*func)(void); -} tests[] = { +struct test __weak arch_tests[] = { + { + .func = NULL, + }, +}; + +static struct test generic_tests[] = { { .desc = "vmlinux symtab matches kallsyms", .func = test__vmlinux_matches_kallsyms, @@ -195,6 +198,11 @@ static struct test { }, }; +static struct test *tests[] = { + generic_tests, + arch_tests, +}; + static bool perf_test__matches(struct test *test, int curr, int argc, const char *argv[]) { int i; @@ -249,22 +257,25 @@ static int run_test(struct test *test) return err; } -#define for_each_test(t) for (t = &tests[0]; t->func; t++) +#define for_each_test(j, t) \ + for (j = 0; j < ARRAY_SIZE(tests); j++) \ + for (t = &tests[j][0]; t->func; t++) static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) { struct test *t; + unsigned int j; int i = 0; int width = 0; - for_each_test(t) { + for_each_test(j, t) { int len = strlen(t->desc); if (width < len) width = len; } - for_each_test(t) { + for_each_test(j, t) { int curr = i++, err; if (!perf_test__matches(t, curr, argc, argv)) @@ -300,10 +311,11 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) static int perf_test__list(int argc, const char **argv) { + unsigned int j; struct test *t; int i = 0; - for_each_test(t) { + for_each_test(j, t) { if (argc > 1 && !strstr(t->desc, argv[1])) continue; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 0b3549672c16..b1cb1c081e3c 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -24,6 +24,11 @@ enum { TEST_SKIP = -2, }; +struct test { + const char *desc; + int (*func)(void); +}; + /* Tests */ int test__vmlinux_matches_kallsyms(void); int test__openat_syscall_event(void); From d8b167f9d8af817073ee35cf904e2e527465dbc1 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 5 Oct 2015 15:40:20 +0100 Subject: [PATCH 11/16] perf tests: Move x86 tests into arch directory Move out the x86-specific tests into tools/perf/arch/x86/tests and define an 'arch_tests' array, which is the list of tests that only apply to the build architecture. We can also now begin to get rid of some of the #ifdef code that is present in the generic perf tests. Signed-off-by: Matt Fleming Cc: Adrian Hunter Cc: Andi Kleen Cc: Fenghua Yu Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Peter Zijlstra Cc: Vikas Shivappa Cc: Vince Weaver Link: http://lkml.kernel.org/n/tip-9s68h4ptg06ah0lgnjz55mqn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 12 ++++++++ tools/perf/arch/x86/tests/Build | 3 ++ tools/perf/arch/x86/tests/arch-tests.c | 20 +++++++++++++ tools/perf/arch/x86/tests/dwarf-unwind.c | 1 + .../{ => arch/x86}/tests/gen-insn-x86-dat.awk | 0 .../{ => arch/x86}/tests/gen-insn-x86-dat.sh | 0 .../{ => arch/x86}/tests/insn-x86-dat-32.c | 0 .../{ => arch/x86}/tests/insn-x86-dat-64.c | 0 .../{ => arch/x86}/tests/insn-x86-dat-src.c | 0 tools/perf/{ => arch/x86}/tests/insn-x86.c | 3 +- .../{ => arch/x86}/tests/perf-time-to-tsc.c | 4 ++- tools/perf/{ => arch/x86}/tests/rdpmc.c | 7 ++--- tools/perf/tests/Build | 6 ---- tools/perf/tests/builtin-test.c | 28 ------------------- tools/perf/tests/dwarf-unwind.c | 4 +++ tools/perf/tests/tests.h | 5 +--- 16 files changed, 48 insertions(+), 45 deletions(-) rename tools/perf/{ => arch/x86}/tests/gen-insn-x86-dat.awk (100%) rename tools/perf/{ => arch/x86}/tests/gen-insn-x86-dat.sh (100%) rename tools/perf/{ => arch/x86}/tests/insn-x86-dat-32.c (100%) rename tools/perf/{ => arch/x86}/tests/insn-x86-dat-64.c (100%) rename tools/perf/{ => arch/x86}/tests/insn-x86-dat-src.c (100%) rename tools/perf/{ => arch/x86}/tests/insn-x86.c (98%) rename tools/perf/{ => arch/x86}/tests/perf-time-to-tsc.c (98%) rename tools/perf/{ => arch/x86}/tests/rdpmc.c (97%) diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 4bd41d8e1ca4..5927cf224325 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -1,6 +1,18 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H +/* Tests */ +int test__rdpmc(void); +int test__perf_time_to_tsc(void); +int test__insn_x86(void); + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +struct thread; +struct perf_sample; +int test__arch_unwind_sample(struct perf_sample *sample, + struct thread *thread); +#endif + extern struct test arch_tests[]; #endif diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index d827ef384b33..8e2c5a38c3b9 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -2,3 +2,6 @@ libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o libperf-y += arch-tests.o +libperf-y += rdpmc.o +libperf-y += perf-time-to-tsc.o +libperf-$(CONFIG_AUXTRACE) += insn-x86.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index fca9eb9d39a2..d116c217af99 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -3,6 +3,26 @@ #include "arch-tests.h" struct test arch_tests[] = { + { + .desc = "x86 rdpmc test", + .func = test__rdpmc, + }, + { + .desc = "Test converting perf time to TSC", + .func = test__perf_time_to_tsc, + }, +#ifdef HAVE_DWARF_UNWIND_SUPPORT + { + .desc = "Test dwarf unwind", + .func = test__dwarf_unwind, + }, +#endif +#ifdef HAVE_AUXTRACE_SUPPORT + { + .desc = "Test x86 instruction decoder - new instructions", + .func = test__insn_x86, + }, +#endif { .func = NULL, }, diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index d8bbf7ad1681..7f209ce827bf 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -5,6 +5,7 @@ #include "event.h" #include "debug.h" #include "tests/tests.h" +#include "arch-tests.h" #define STACK_SIZE 8192 diff --git a/tools/perf/tests/gen-insn-x86-dat.awk b/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk similarity index 100% rename from tools/perf/tests/gen-insn-x86-dat.awk rename to tools/perf/arch/x86/tests/gen-insn-x86-dat.awk diff --git a/tools/perf/tests/gen-insn-x86-dat.sh b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh similarity index 100% rename from tools/perf/tests/gen-insn-x86-dat.sh rename to tools/perf/arch/x86/tests/gen-insn-x86-dat.sh diff --git a/tools/perf/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c similarity index 100% rename from tools/perf/tests/insn-x86-dat-32.c rename to tools/perf/arch/x86/tests/insn-x86-dat-32.c diff --git a/tools/perf/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c similarity index 100% rename from tools/perf/tests/insn-x86-dat-64.c rename to tools/perf/arch/x86/tests/insn-x86-dat-64.c diff --git a/tools/perf/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c similarity index 100% rename from tools/perf/tests/insn-x86-dat-src.c rename to tools/perf/arch/x86/tests/insn-x86-dat-src.c diff --git a/tools/perf/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c similarity index 98% rename from tools/perf/tests/insn-x86.c rename to tools/perf/arch/x86/tests/insn-x86.c index 5c49eec81349..b6115dfd28f0 100644 --- a/tools/perf/tests/insn-x86.c +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -1,7 +1,8 @@ #include #include "debug.h" -#include "tests.h" +#include "tests/tests.h" +#include "arch-tests.h" #include "intel-pt-decoder/insn.h" #include "intel-pt-decoder/intel-pt-insn-decoder.h" diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c similarity index 98% rename from tools/perf/tests/perf-time-to-tsc.c rename to tools/perf/arch/x86/tests/perf-time-to-tsc.c index 5f49484f1abc..658cd200af74 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -9,7 +9,9 @@ #include "thread_map.h" #include "cpumap.h" #include "tsc.h" -#include "tests.h" +#include "tests/tests.h" + +#include "arch-tests.h" #define CHECK__(x) { \ while ((x) < 0) { \ diff --git a/tools/perf/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c similarity index 97% rename from tools/perf/tests/rdpmc.c rename to tools/perf/arch/x86/tests/rdpmc.c index d31f2c4d9f64..e7688214c7cf 100644 --- a/tools/perf/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -5,10 +5,9 @@ #include #include "perf.h" #include "debug.h" -#include "tests.h" +#include "tests/tests.h" #include "cloexec.h" - -#if defined(__x86_64__) || defined(__i386__) +#include "arch-tests.h" static u64 rdpmc(unsigned int counter) { @@ -173,5 +172,3 @@ int test__rdpmc(void) return 0; } - -#endif diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index c6f198ae65fb..50de2253cff6 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -8,7 +8,6 @@ perf-y += openat-syscall-all-cpus.o perf-y += openat-syscall-tp-fields.o perf-y += mmap-basic.o perf-y += perf-record.o -perf-y += rdpmc.o perf-y += evsel-roundtrip-name.o perf-y += evsel-tp-sched.o perf-y += fdarray.o @@ -35,11 +34,6 @@ perf-y += thread-map.o perf-y += llvm.o perf-y += topology.o -perf-$(CONFIG_X86) += perf-time-to-tsc.o -ifdef CONFIG_AUXTRACE -perf-$(CONFIG_X86) += insn-x86.o -endif - ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 2b6c1bf13456..66f72d3d6677 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -41,12 +41,6 @@ static struct test generic_tests[] = { .desc = "parse events tests", .func = test__parse_events, }, -#if defined(__x86_64__) || defined(__i386__) - { - .desc = "x86 rdpmc test", - .func = test__rdpmc, - }, -#endif { .desc = "Validate PERF_RECORD_* events & perf_sample fields", .func = test__PERF_RECORD, @@ -107,12 +101,6 @@ static struct test generic_tests[] = { .desc = "Test software clock events have valid period values", .func = test__sw_clock_freq, }, -#if defined(__x86_64__) || defined(__i386__) - { - .desc = "Test converting perf time to TSC", - .func = test__perf_time_to_tsc, - }, -#endif { .desc = "Test object code reading", .func = test__code_reading, @@ -129,14 +117,6 @@ static struct test generic_tests[] = { .desc = "Test parsing with no sample_id_all bit set", .func = test__parse_no_sample_id_all, }, -#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) -#ifdef HAVE_DWARF_UNWIND_SUPPORT - { - .desc = "Test dwarf unwind", - .func = test__dwarf_unwind, - }, -#endif -#endif { .desc = "Test filtering hist entries", .func = test__hists_filter, @@ -181,14 +161,6 @@ static struct test generic_tests[] = { .desc = "Test LLVM searching and compiling", .func = test__llvm, }, -#ifdef HAVE_AUXTRACE_SUPPORT -#if defined(__x86_64__) || defined(__i386__) - { - .desc = "Test x86 instruction decoder - new instructions", - .func = test__insn_x86, - }, -#endif -#endif { .desc = "Test topology in session", .func = test_session_topology, diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 40b36c462427..07221793a3ac 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -11,6 +11,10 @@ #include "thread.h" #include "callchain.h" +#if defined (__x86_64__) || defined (__i386__) +#include "arch-tests.h" +#endif + /* For bsearch. We try to unwind functions in shared object. */ #include diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index b1cb1c081e3c..c80486969f83 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -35,7 +35,6 @@ int test__openat_syscall_event(void); int test__openat_syscall_event_on_all_cpus(void); int test__basic_mmap(void); int test__PERF_RECORD(void); -int test__rdpmc(void); int test__perf_evsel__roundtrip_name_test(void); int test__perf_evsel__tp_sched_test(void); int test__syscall_openat_tp_fields(void); @@ -51,7 +50,6 @@ int test__bp_signal(void); int test__bp_signal_overflow(void); int test__task_exit(void); int test__sw_clock_freq(void); -int test__perf_time_to_tsc(void); int test__code_reading(void); int test__sample_parsing(void); int test__keep_tracking(void); @@ -68,10 +66,9 @@ int test__fdarray__add(void); int test__kmod_path__parse(void); int test__thread_map(void); int test__llvm(void); -int test__insn_x86(void); int test_session_topology(void); -#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; struct perf_sample; From 035827e9f2bd71a280f4eb58c65811d377ab2217 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 5 Oct 2015 15:40:21 +0100 Subject: [PATCH 12/16] perf tests: Add Intel CQM test Peter reports that it's possible to trigger a WARN_ON_ONCE() in the Intel CQM code by combining a hardware event and an Intel CQM (software) event into a group. Unfortunately, the perf tools are not able to create this bundle and we need to manually construct a test case. For posterity, record Peter's proof of concept test case in tools/perf so that it presents a model for how we can perform architecture specific tests, or "arch tests", in perf in the future. The particular issue triggered in the test case is that when the counter for the hardware event overflows and triggers a PMI we'll read both the hardware event and the software event counters. Unfortunately, for CQM that involves performing an IPI to read the CQM event counters on all sockets, which in NMI context triggers the WARN_ON_ONCE(). Reported-by: Peter Zijlstra Signed-off-by: Matt Fleming Cc: Adrian Hunter Cc: Andi Kleen Cc: Fenghua Yu Cc: Jiri Olsa Cc: Kanaka Juvva Cc: Vikas Shivappa Cc: Vince Weaver Link: http://lkml.kernel.org/r/1437490509-15373-1-git-send-email-matt@codeblueprint.co.uk Link: http://lkml.kernel.org/n/tip-3p4ra0u8vzm7m289a1m799kf@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 + tools/perf/arch/x86/tests/Build | 1 + tools/perf/arch/x86/tests/arch-tests.c | 4 + tools/perf/arch/x86/tests/intel-cqm.c | 124 +++++++++++++++++++++++ 4 files changed, 130 insertions(+) create mode 100644 tools/perf/arch/x86/tests/intel-cqm.c diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 5927cf224325..7ed00f4b0908 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -5,6 +5,7 @@ int test__rdpmc(void); int test__perf_time_to_tsc(void); int test__insn_x86(void); +int test__intel_cqm_count_nmi_context(void); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 8e2c5a38c3b9..cbb7e978166b 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -5,3 +5,4 @@ libperf-y += arch-tests.o libperf-y += rdpmc.o libperf-y += perf-time-to-tsc.o libperf-$(CONFIG_AUXTRACE) += insn-x86.o +libperf-y += intel-cqm.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index d116c217af99..2218cb64f840 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -23,6 +23,10 @@ struct test arch_tests[] = { .func = test__insn_x86, }, #endif + { + .desc = "Test intel cqm nmi context read", + .func = test__intel_cqm_count_nmi_context, + }, { .func = NULL, }, diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c new file mode 100644 index 000000000000..d28c1b6a3b54 --- /dev/null +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -0,0 +1,124 @@ +#include "tests/tests.h" +#include "perf.h" +#include "cloexec.h" +#include "debug.h" +#include "evlist.h" +#include "evsel.h" +#include "arch-tests.h" + +#include +#include + +static pid_t spawn(void) +{ + pid_t pid; + + pid = fork(); + if (pid) + return pid; + + while(1); + sleep(5); + return 0; +} + +/* + * Create an event group that contains both a sampled hardware + * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then + * wait for the hardware perf counter to overflow and generate a PMI, + * which triggers an event read for both of the events in the group. + * + * Since reading Intel CQM event counters requires sending SMP IPIs, the + * CQM pmu needs to handle the above situation gracefully, and return + * the last read counter value to avoid triggering a WARN_ON_ONCE() in + * smp_call_function_many() caused by sending IPIs from NMI context. + */ +int test__intel_cqm_count_nmi_context(void) +{ + struct perf_evlist *evlist = NULL; + struct perf_evsel *evsel = NULL; + struct perf_event_attr pe; + int i, fd[2], flag, ret; + size_t mmap_len; + void *event; + pid_t pid; + int err = TEST_FAIL; + + flag = perf_event_open_cloexec_flag(); + + evlist = perf_evlist__new(); + if (!evlist) { + pr_debug("perf_evlist__new failed\n"); + return TEST_FAIL; + } + + ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL); + if (ret) { + pr_debug("parse_events failed\n"); + err = TEST_SKIP; + goto out; + } + + evsel = perf_evlist__first(evlist); + if (!evsel) { + pr_debug("perf_evlist__first failed\n"); + goto out; + } + + memset(&pe, 0, sizeof(pe)); + pe.size = sizeof(pe); + + pe.type = PERF_TYPE_HARDWARE; + pe.config = PERF_COUNT_HW_CPU_CYCLES; + pe.read_format = PERF_FORMAT_GROUP; + + pe.sample_period = 128; + pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ; + + pid = spawn(); + + fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag); + if (fd[0] < 0) { + pr_debug("failed to open event\n"); + goto out; + } + + memset(&pe, 0, sizeof(pe)); + pe.size = sizeof(pe); + + pe.type = evsel->attr.type; + pe.config = evsel->attr.config; + + fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag); + if (fd[1] < 0) { + pr_debug("failed to open event\n"); + goto out; + } + + /* + * Pick a power-of-two number of pages + 1 for the meta-data + * page (struct perf_event_mmap_page). See tools/perf/design.txt. + */ + mmap_len = page_size * 65; + + event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0); + if (event == (void *)(-1)) { + pr_debug("failed to mmap %d\n", errno); + goto out; + } + + sleep(1); + + err = TEST_OK; + + munmap(event, mmap_len); + + for (i = 0; i < 2; i++) + close(fd[i]); + + kill(pid, SIGKILL); + wait(NULL); +out: + perf_evlist__delete(evlist); + return err; +} From def02db0d662b0edd83f80e3c18f660fc414decb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 5 Oct 2015 17:05:35 -0300 Subject: [PATCH 13/16] perf callchain: Switch default to 'graph,0.5,caller' Which is the most common default found in other similar tools. Requested-by: Ingo Molnar Cc: Adrian Hunter Cc: Borislav Petkov Cc: Chandler Carruth Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Cc: Wang Nan Link: https://www.youtube.com/watch?v=nXaxk27zwlk Link: http://lkml.kernel.org/n/tip-v8lq36aispvdwgxdmt9p9jd9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/builtin-report.c | 4 ++-- tools/perf/util/util.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index ce499035e6d8..e4fdeeb51123 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -192,7 +192,7 @@ OPTIONS when available. Usually more convenient to use --branch-history for this. - Default: fractal,0.5,callee,function. + Default: graph,0.5,caller --children:: Accumulate callchain of children to parent entry so that then can diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b5623639f67d..3b23b25d1589 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -633,7 +633,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) bool has_br_stack = false; int branch_mode = -1; bool branch_call_mode = false; - char callchain_default_opt[] = "fractal,0.5,callee"; + char callchain_default_opt[] = "graph,0.5,caller"; const char * const report_usage[] = { "perf report []", NULL @@ -701,7 +701,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Only display entries with parent-match"), OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " - "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), + "Default: graph,0.5,caller", &report_parse_callchain_opt, callchain_default_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &report.max_stack, diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index ce465b259e52..c1bf9ff210b0 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -17,9 +17,9 @@ #include "callchain.h" struct callchain_param callchain_param = { - .mode = CHAIN_GRAPH_REL, + .mode = CHAIN_GRAPH_ABS, .min_percent = 0.5, - .order = ORDER_CALLEE, + .order = ORDER_CALLER, .key = CCKEY_FUNCTION }; From faae6f690eecb82b6d9d9f2112f5b51ac37d4acb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Aug 2015 17:14:40 -0300 Subject: [PATCH 14/16] perf ui browser: Optional horizontal scrolling key binding If the classes derived from ui_browser want to do some sort of horizontal scrolling, they have just to set ui_browser->columns to the number of columns available. Those columns can be the number of characters on the screen, if what is desired is to scroll character by character, or the number of columns in a spreadsheet like table. This is what the hist_browser will do, skipping ui_browser->horiz_scroll columns when rendering each of its lines. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-q6a22bpmpgcr1awgzrmd4jrs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browser.c | 14 ++++++++++++++ tools/perf/ui/browser.h | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index c6c7e5189214..e9703c0829f1 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -393,6 +393,7 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs) if (browser->use_navkeypressed && !browser->navkeypressed) { if (key == K_DOWN || key == K_UP || + (browser->columns && (key == K_LEFT || key == K_RIGHT)) || key == K_PGDN || key == K_PGUP || key == K_HOME || key == K_END || key == ' ') { @@ -421,6 +422,18 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs) browser->seek(browser, -1, SEEK_CUR); } break; + case K_RIGHT: + if (!browser->columns) + goto out; + if (browser->horiz_scroll < browser->columns - 1) + ++browser->horiz_scroll; + break; + case K_LEFT: + if (!browser->columns) + goto out; + if (browser->horiz_scroll != 0) + --browser->horiz_scroll; + break; case K_PGDN: case ' ': if (browser->top_idx + browser->rows > browser->nr_entries - 1) @@ -459,6 +472,7 @@ int ui_browser__run(struct ui_browser *browser, int delay_secs) browser->seek(browser, -offset, SEEK_END); break; default: + out: return key; } } diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index f3cef564de02..01781de59532 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -14,7 +14,7 @@ struct ui_browser { u64 index, top_idx; void *top, *entries; - u16 y, x, width, height, rows; + u16 y, x, width, height, rows, columns, horiz_scroll; int current_color; void *priv; const char *title; From c6c3c02dea4034431110923ffd8e296ebfbdbe1b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Aug 2015 17:22:43 -0300 Subject: [PATCH 15/16] perf hists browser: Implement horizontal scrolling Do it using the recently introduced ui_brower scrolling mode, setting ui_browser.columns to the number of sort columns and then, when rendering each line, skipping as many initial columns as the user pressed the right arrow. As the user presses the left arrow, the ui_browser code will remove the scrolling counter and the left scrolling takes place. The right arrow key was an alias for ENTER, so people used to press it may get a bit annoyed at first, sorry! Ditto for ESC and the left key. Callchains can be left as is or we can, when rendering the Symbol column, store the at what position on the screen it is and then using ui_browser__gotorc() to print it from there, i.e. the callchain would move around with the symbol. Leaving it as is, i.e. at a fixed position, close to the left, saves precious screen real state for it, so I'm inclined to leave it as is now. Cc: Adrian Hunter Cc: Borislav Petkov Cc: Chandler Carruth Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ccqq9sabgfge5dwbqjwh71ij@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index a4e9b370c037..9b7346a881cf 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -784,11 +784,12 @@ static int hist_browser__show_entry(struct hist_browser *browser, .size = sizeof(s), .ptr = &arg, }; + int column = 0; hist_browser__gotorc(browser, row, 0); perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll) continue; if (current_entry && browser->b.navkeypressed) { @@ -861,14 +862,16 @@ static int advance_hpp_check(struct perf_hpp *hpp, int inc) return hpp->size <= 0; } -static int hists__scnprintf_headers(char *buf, size_t size, struct hists *hists) +static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf, size_t size) { + struct hists *hists = browser->hists; struct perf_hpp dummy_hpp = { .buf = buf, .size = size, }; struct perf_hpp_fmt *fmt; size_t ret = 0; + int column = 0; if (symbol_conf.use_callchain) { ret = scnprintf(buf, size, " "); @@ -877,7 +880,7 @@ static int hists__scnprintf_headers(char *buf, size_t size, struct hists *hists) } perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll) continue; ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); @@ -896,7 +899,7 @@ static void hist_browser__show_headers(struct hist_browser *browser) { char headers[1024]; - hists__scnprintf_headers(headers, sizeof(headers), browser->hists); + hists_browser__scnprintf_headers(browser, headers, sizeof(headers)); ui_browser__gotorc(&browser->b, 0, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); @@ -1806,8 +1809,17 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, memset(options, 0, sizeof(options)); memset(actions, 0, sizeof(actions)); - perf_hpp__for_each_format(fmt) + perf_hpp__for_each_format(fmt) { perf_hpp__reset_width(fmt, hists); + /* + * This is done just once, and activates the horizontal scrolling + * code in the ui_browser code, it would be better to have a the + * counter in the perf_hpp code, but I couldn't find doing it here + * works, FIXME by setting this in hist_browser__new, for now, be + * clever 8-) + */ + ++browser->b.columns; + } if (symbol_conf.col_width_list_str) perf_hpp__set_user_width(symbol_conf.col_width_list_str); From 27bf90bf0690f55c3679bcc4c325823cf1cfd19d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Oct 2015 21:31:17 +0200 Subject: [PATCH 16/16] perf tools: Fail properly in case pattern matching fails to find tracepoint Currently we dont fail properly when pattern matching fails to find any tracepoint. Current behaviour: $ perf record -e 'sched:krava*' sleep 1 WARNING: event parser found nothinginvalid or unsupported event: 'sched:krava*' Run 'perf list' for a list of valid events usage: perf record [] [] or: perf record [] -- [] This patch change: $ perf record -e 'sched:krava*' sleep 1 event syntax error: 'sched:krava*' \___ unknown tracepoint Error: File /sys/kernel/debug/tracing/events/sched/krava* not found. Hint: Perhaps this kernel misses some CONFIG_ setting to enable this feature?. Run 'perf list' for a list of valid events usage: perf record [] [] or: perf record [] -- [] Reported-by: Daniel Bristot de Oliveira Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444073477-3181-1-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 3ed8bf175163..991bbd469bea 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -449,7 +449,7 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, char evt_path[MAXPATHLEN]; struct dirent *evt_ent; DIR *evt_dir; - int ret = 0; + int ret = 0, found = 0; snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name); evt_dir = opendir(evt_path); @@ -468,10 +468,17 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, if (!strglobmatch(evt_ent->d_name, evt_name)) continue; + found++; + ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, err, head_config); } + if (!found) { + tracepoint_error(err, ENOENT, sys_name, evt_name); + ret = -1; + } + closedir(evt_dir); return ret; }