diff --git a/tools/build/Build.include b/tools/build/Build.include index 1dcb95e76f70..c4ae12a5d0a5 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -89,7 +89,9 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ # - per target C flags # - per object C flags # - BUILD_STR macro to allow '-D"$(variable)"' constructs -c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) +c_flags_1 = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) +c_flags_2 = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(c_flags_1)) +c_flags = $(filter-out $(CFLAGS_REMOVE_$(obj)), $(c_flags_2)) cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj)) ### diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt index a47bffbae159..a22587475dbe 100644 --- a/tools/build/Documentation/Build.txt +++ b/tools/build/Documentation/Build.txt @@ -135,8 +135,10 @@ CFLAGS It's possible to alter the standard object C flags in the following way: - CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object - CFLAGS_gtk += '...' - alters CFLAGS for gtk build object + CFLAGS_perf.o += '...' - adds CFLAGS for perf.o object + CFLAGS_gtk += '...' - adds CFLAGS for gtk build object + CFLAGS_REMOVE_perf.o += '...' - removes CFLAGS for perf.o object + CFLAGS_REMOVE_gtk += '...' - removes CFLAGS for gtk build object This C flags changes has the scope of the Build makefile they are defined in. diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index ac9c477a2a48..8f668bce8996 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -47,7 +47,8 @@ FILES= \ test-bpf.bin \ test-get_cpuid.bin \ test-sdt.bin \ - test-cxx.bin + test-cxx.bin \ + test-jvmti.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -225,6 +226,9 @@ $(OUTPUT)test-sdt.bin: $(OUTPUT)test-cxx.bin: $(BUILDXX) -std=gnu++11 +$(OUTPUT)test-jvmti.bin: + $(BUILD) + -include $(OUTPUT)*.d ############################### diff --git a/tools/build/feature/test-jvmti.c b/tools/build/feature/test-jvmti.c new file mode 100644 index 000000000000..1c665f09b9d6 --- /dev/null +++ b/tools/build/feature/test-jvmti.c @@ -0,0 +1,13 @@ +#include +#include + +int main(void) +{ + JavaVM jvm __attribute__((unused)); + jvmtiEventCallbacks cb __attribute__((unused)); + jvmtiCapabilities caps __attribute__((unused)); + jvmtiJlocationFormat format __attribute__((unused)); + jvmtiEnv jvmti __attribute__((unused)); + + return 0; +} diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index c6c8318e38a2..b0b3007d3c9c 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -550,6 +550,18 @@ Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users have memory limits imposed upon them. That affects what buffer sizes they can have as outlined above. +The v4.2 kernel introduced support for a context switch metadata event, +PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes +are scheduled out and in, just not by whom, which is left for the +PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, +which in turn requires CAP_SYS_ADMIN. + +Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context +switches") commit, that introduces these metadata events for further info. + +When working with kernels < v4.2, the following considerations must be taken, +as the sched:sched_switch tracepoints will be used to receive such information: + Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are not permitted to use tracepoints which means there is insufficient side-band information to decode Intel PT in per-cpu mode, and potentially workload-only @@ -564,8 +576,11 @@ sched_switch tracepoint ----------------------- The sched_switch tracepoint is used to provide side-band data for Intel PT -decoding. sched_switch events are automatically added. e.g. the second event -shown below +decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't +available. + +The sched_switch events are automatically added. e.g. the second event shown +below: $ perf record -vv -e intel_pt//u uname ------------------------------------------------------------ diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index cb081ac59fd1..9365b75fd04f 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -8,6 +8,8 @@ perf-config - Get and set variables in a configuration file. SYNOPSIS -------- [verse] +'perf config' [] [section.name[=value] ...] +or 'perf config' [] -l | --list DESCRIPTION @@ -118,6 +120,39 @@ Given a $HOME/.perfconfig like this: children = true group = true +You can hide source code of annotate feature setting the config to false with + + % perf config annotate.hide_src_code=true + +If you want to add or modify several config items, you can do like + + % perf config ui.show-headers=false kmem.default=slab + +To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do + + % perf config --user report sort-order=srcline + +To change colors of selected line to other foreground and background colors +in system config file (i.e. `$(sysconf)/perfconfig`), do + + % perf config --system colors.selected=yellow,green + +To query the record mode of call graph, do + + % perf config call-graph.record-mode + +If you want to know multiple config key/value pairs, you can do like + + % perf config report.queue-size call-graph.order report.children + +To query the config value of sort order of call graph in user config file (i.e. `~/.perfconfig`), do + + % perf config --user call-graph.sort-order + +To query the config value of buildid directory in system config file (i.e. `$(sysconf)/perfconfig`), do + + % perf config --system buildid.dir + Variables ~~~~~~~~~ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index cffdd9cf3ebf..8a493d46fab9 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -758,6 +758,31 @@ ifndef NO_AUXTRACE endif endif +ifndef NO_JVMTI + ifneq (,$(wildcard /usr/sbin/update-java-alternatives)) + JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') + else + ifneq (,$(wildcard /usr/sbin/alternatives)) + JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + endif + endif + ifndef JDIR + $(warning No alternatives command found, you need to set JDIR= to point to the root of your Java directory) + NO_JVMTI := 1 + endif +endif + +ifndef NO_JVMTI + FEATURE_CHECK_CFLAGS-jvmti := -I$(JDIR)/include -I$(JDIR)/include/linux + $(call feature_check,jvmti) + ifeq ($(feature-jvmti), 1) + $(call detected_var,JDIR) + else + $(warning No openjdk development package found, please install JDK package) + NO_JVMTI := 1 + endif +endif + # Among the variables below, these: # perfexecdir # template_dir @@ -850,6 +875,7 @@ ifeq ($(VF),1) $(call print_var,sysconfdir) $(call print_var,LIBUNWIND_DIR) $(call print_var,LIBDW_DIR) + $(call print_var,JDIR) ifeq ($(dwarf-post-unwind),1) $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 7de14f470f3c..3cb1df43ad3e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -86,6 +86,8 @@ include ../scripts/utilities.mak # # Define FEATURES_DUMP to provide features detection dump file # and bypass the feature detection +# +# Define NO_JVMTI if you do not want jvmti agent built # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -283,6 +285,12 @@ ifndef NO_PERF_READ_VDSOX32 PROGRAMS += $(OUTPUT)perf-read-vdsox32 endif +LIBJVMTI = libperf-jvmti.so + +ifndef NO_JVMTI +PROGRAMS += $(OUTPUT)$(LIBJVMTI) +endif + # what 'all' will build and 'install' will install, in perfexecdir ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) @@ -551,6 +559,16 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c $(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c endif +ifndef NO_JVMTI +LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o + +$(LIBJVMTI_IN): FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti + +$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN) + $(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $< -lelf -lrt +endif + $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBPERF_IN := $(OUTPUT)libperf-in.o @@ -687,6 +705,10 @@ endif ifndef NO_PERF_READ_VDSOX32 $(call QUIET_INSTALL, perf-read-vdsox32) \ $(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)'; +endif +ifndef NO_JVMTI + $(call QUIET_INSTALL, $(LIBJVMTI)) \ + $(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)'; endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' @@ -754,7 +776,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected - $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents + $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index e4207a23b52c..8c0d93b7c2f0 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -17,7 +17,7 @@ static bool use_system_config, use_user_config; static const char * const config_usage[] = { - "perf config [] [options]", + "perf config [] [options] [section.name[=value] ...]", NULL }; @@ -33,6 +33,73 @@ static struct option config_options[] = { OPT_END() }; +static int set_config(struct perf_config_set *set, const char *file_name, + const char *var, const char *value) +{ + struct perf_config_section *section = NULL; + struct perf_config_item *item = NULL; + const char *first_line = "# this file is auto-generated."; + FILE *fp; + + if (set == NULL) + return -1; + + fp = fopen(file_name, "w"); + if (!fp) + return -1; + + perf_config_set__collect(set, file_name, var, value); + fprintf(fp, "%s\n", first_line); + + /* overwrite configvariables */ + perf_config_items__for_each_entry(&set->sections, section) { + if (!use_system_config && section->from_system_config) + continue; + fprintf(fp, "[%s]\n", section->name); + + perf_config_items__for_each_entry(§ion->items, item) { + if (!use_system_config && section->from_system_config) + continue; + if (item->value) + fprintf(fp, "\t%s = %s\n", + item->name, item->value); + } + } + fclose(fp); + + return 0; +} + +static int show_spec_config(struct perf_config_set *set, const char *var) +{ + struct perf_config_section *section; + struct perf_config_item *item; + + if (set == NULL) + return -1; + + perf_config_items__for_each_entry(&set->sections, section) { + if (prefixcmp(var, section->name) != 0) + continue; + + perf_config_items__for_each_entry(§ion->items, item) { + const char *name = var + strlen(section->name) + 1; + + if (strcmp(name, item->name) == 0) { + char *value = item->value; + + if (value) { + printf("%s=%s\n", var, value); + return 0; + } + } + + } + } + + return 0; +} + static int show_config(struct perf_config_set *set) { struct perf_config_section *section; @@ -52,9 +119,44 @@ static int show_config(struct perf_config_set *set) return 0; } +static int parse_config_arg(char *arg, char **var, char **value) +{ + const char *last_dot = strchr(arg, '.'); + + /* + * Since "var" actually contains the section name and the real + * config variable name separated by a dot, we have to know where the dot is. + */ + if (last_dot == NULL || last_dot == arg) { + pr_err("The config variable does not contain a section name: %s\n", arg); + return -1; + } + if (!last_dot[1]) { + pr_err("The config variable does not contain a variable name: %s\n", arg); + return -1; + } + + *value = strchr(arg, '='); + if (*value == NULL) + *var = arg; + else if (!strcmp(*value, "=")) { + pr_err("The config variable does not contain a value: %s\n", arg); + return -1; + } else { + *value = *value + 1; /* excluding a first character '=' */ + *var = strsep(&arg, "="); + if (*var[0] == '\0') { + pr_err("invalid config variable: %s\n", arg); + return -1; + } + } + + return 0; +} + int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) { - int ret = 0; + int i, ret = 0; struct perf_config_set *set; char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); @@ -100,7 +202,36 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) } break; default: - usage_with_options(config_usage, config_options); + if (argc) { + for (i = 0; argv[i]; i++) { + char *var, *value; + char *arg = strdup(argv[i]); + + if (!arg) { + pr_err("%s: strdup failed\n", __func__); + ret = -1; + break; + } + + if (parse_config_arg(arg, &var, &value) < 0) { + free(arg); + ret = -1; + break; + } + + if (value == NULL) + ret = show_spec_config(set, var); + else { + const char *config_filename = config_exclusive_filename; + + if (!config_exclusive_filename) + config_filename = user_config; + ret = set_config(set, config_filename, var, value); + } + free(arg); + } + } else + usage_with_options(config_usage, config_options); } perf_config_set__delete(set); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8064de8ceedc..3dfbfffe2ecd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -911,6 +911,9 @@ repeat: if (itrace_synth_opts.last_branch) has_br_stack = true; + if (has_br_stack && branch_call_mode) + symbol_conf.show_branchflag_count = true; + /* * Branch mode is a tristate: * -1 means default, so decide based on the file having branch data. diff --git a/tools/perf/jvmti/Build b/tools/perf/jvmti/Build new file mode 100644 index 000000000000..eaeb8cb5379b --- /dev/null +++ b/tools/perf/jvmti/Build @@ -0,0 +1,8 @@ +jvmti-y += libjvmti.o +jvmti-y += jvmti_agent.o + +CFLAGS_jvmti = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux +CFLAGS_REMOVE_jvmti = -Wmissing-declarations +CFLAGS_REMOVE_jvmti += -Wstrict-prototypes +CFLAGS_REMOVE_jvmti += -Wextra +CFLAGS_REMOVE_jvmti += -Wwrite-strings diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile deleted file mode 100644 index df14e6b67b63..000000000000 --- a/tools/perf/jvmti/Makefile +++ /dev/null @@ -1,89 +0,0 @@ -ARCH=$(shell uname -m) - -ifeq ($(ARCH), x86_64) -JARCH=amd64 -endif -ifeq ($(ARCH), armv7l) -JARCH=armhf -endif -ifeq ($(ARCH), armv6l) -JARCH=armhf -endif -ifeq ($(ARCH), aarch64) -JARCH=aarch64 -endif -ifeq ($(ARCH), ppc64) -JARCH=powerpc -endif -ifeq ($(ARCH), ppc64le) -JARCH=powerpc -endif - -DESTDIR=/usr/local - -VERSION=1 -REVISION=0 -AGE=0 - -LN=ln -sf -RM=rm - -SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE) -VLIBJVMTI=libjvmti.so.$(VERSION) -SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI) -SOLIBEXT=so - -# The following works at least on fedora 23, you may need the next -# line for other distros. -ifneq (,$(wildcard /usr/sbin/update-java-alternatives)) -JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') -else - ifneq (,$(wildcard /usr/sbin/alternatives)) - JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') - endif -endif -ifndef JDIR -$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory) -else - ifeq (,$(wildcard $(JDIR)/include/jvmti.h)) - $(error the openjdk development package appears to me missing, install and try again) - endif -endif -$(info Using Java from $(JDIR)) -# -lrt required in 32-bit mode for clock_gettime() -LIBS=-lelf -lrt -INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux - -TARGETS=$(SLIBJVMTI) - -SRCS=libjvmti.c jvmti_agent.c -OBJS=$(SRCS:.c=.o) -SOBJS=$(OBJS:.o=.lo) -OPT=-O2 -g -Werror -Wall - -CFLAGS=$(INCDIR) $(OPT) - -all: $(TARGETS) - -.c.o: - $(CC) $(CFLAGS) -c $*.c -.c.lo: - $(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo - -$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h - -$(SLIBJVMTI): $(SOBJS) - $(CC) $(CFLAGS) $(SLDFLAGS) -o $@ $(SOBJS) $(LIBS) - $(LN) $@ libjvmti.$(SOLIBEXT) - -clean: - $(RM) -f *.o *.so.* *.so *.lo - -install: - -mkdir -p $(DESTDIR)/lib - install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/ - (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI)) - (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT)) - ldconfig - -.SUFFIXES: .c .S .o .lo diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 143f4d549769..08ed7f12cc37 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -106,7 +106,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 -make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 +make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 # $(run) contains all available tests run := make_pure diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 84f5dd2fb59c..66676cb8effe 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -738,6 +738,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, struct callchain_print_arg *arg) { char bf[1024], *alloc_str; + char buf[64], *alloc_str2; const char *str; if (arg->row_offset != 0) { @@ -746,12 +747,26 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, } alloc_str = NULL; + alloc_str2 = NULL; + str = callchain_list__sym_name(chain, bf, sizeof(bf), browser->show_dso); - if (need_percent) { - char buf[64]; + if (symbol_conf.show_branchflag_count) { + if (need_percent) + callchain_list_counts__printf_value(node, chain, NULL, + buf, sizeof(buf)); + else + callchain_list_counts__printf_value(NULL, chain, NULL, + buf, sizeof(buf)); + if (asprintf(&alloc_str2, "%s%s", str, buf) < 0) + str = "Not enough memory!"; + else + str = alloc_str2; + } + + if (need_percent) { callchain_node__scnprintf_value(node, buf, sizeof(buf), total); @@ -764,6 +779,7 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, print(browser, chain, str, offset, row, arg); free(alloc_str); + free(alloc_str2); return 1; } diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 89d8441f9890..668f4aecf2e6 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -41,7 +41,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, { int i; size_t ret = 0; - char bf[1024]; + char bf[1024], *alloc_str = NULL; + char buf[64]; + const char *str; ret += callchain__fprintf_left_margin(fp, left_margin); for (i = 0; i < depth; i++) { @@ -56,8 +58,26 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, } else ret += fprintf(fp, "%s", " "); } - fputs(callchain_list__sym_name(chain, bf, sizeof(bf), false), fp); + + str = callchain_list__sym_name(chain, bf, sizeof(bf), false); + + if (symbol_conf.show_branchflag_count) { + if (!period) + callchain_list_counts__printf_value(node, chain, NULL, + buf, sizeof(buf)); + else + callchain_list_counts__printf_value(NULL, chain, NULL, + buf, sizeof(buf)); + + if (asprintf(&alloc_str, "%s%s", str, buf) < 0) + str = "Not enough memory!"; + else + str = alloc_str; + } + + fputs(str, fp); fputc('\n', fp); + free(alloc_str); return ret; } @@ -219,8 +239,15 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, } else ret += callchain__fprintf_left_margin(fp, left_margin); - ret += fprintf(fp, "%s\n", callchain_list__sym_name(chain, bf, sizeof(bf), - false)); + ret += fprintf(fp, "%s", + callchain_list__sym_name(chain, bf, + sizeof(bf), + false)); + + if (symbol_conf.show_branchflag_count) + ret += callchain_list_counts__printf_value( + NULL, chain, fp, NULL, 0); + ret += fprintf(fp, "\n"); if (++entries_printed == callchain_param.print_limit) break; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 07fd30bc2f81..823befd8209a 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -193,7 +193,6 @@ int perf_callchain_config(const char *var, const char *value) if (!strcmp(var, "record-mode")) return parse_callchain_record_opt(value, &callchain_param); -#ifdef HAVE_DWARF_UNWIND_SUPPORT if (!strcmp(var, "dump-size")) { unsigned long size = 0; int ret; @@ -203,7 +202,6 @@ int perf_callchain_config(const char *var, const char *value) return ret; } -#endif if (!strcmp(var, "print-type")) return parse_callchain_mode(value); if (!strcmp(var, "order")) @@ -440,6 +438,21 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; call->ms.map = cursor_node->map; + + if (cursor_node->branch) { + call->branch_count = 1; + + if (cursor_node->branch_flags.predicted) + call->predicted_count = 1; + + if (cursor_node->branch_flags.abort) + call->abort_count = 1; + + call->cycles_count = cursor_node->branch_flags.cycles; + call->iter_count = cursor_node->nr_loop_iter; + call->samples_count = cursor_node->samples; + } + list_add_tail(&call->list, &node->val); callchain_cursor_advance(cursor); @@ -499,8 +512,23 @@ static enum match_result match_chain(struct callchain_cursor_node *node, right = node->ip; } - if (left == right) + if (left == right) { + if (node->branch) { + cnode->branch_count++; + + if (node->branch_flags.predicted) + cnode->predicted_count++; + + if (node->branch_flags.abort) + cnode->abort_count++; + + cnode->cycles_count += node->branch_flags.cycles; + cnode->iter_count += node->nr_loop_iter; + cnode->samples_count += node->samples; + } + return MATCH_EQ; + } return left > right ? MATCH_GT : MATCH_LT; } @@ -730,7 +758,8 @@ merge_chain_branch(struct callchain_cursor *cursor, list_for_each_entry_safe(list, next_list, &src->val, list) { callchain_cursor_append(cursor, list->ip, - list->ms.map, list->ms.sym); + list->ms.map, list->ms.sym, + false, NULL, 0, 0); list_del(&list->list); free(list); } @@ -767,7 +796,9 @@ int callchain_merge(struct callchain_cursor *cursor, } int callchain_cursor_append(struct callchain_cursor *cursor, - u64 ip, struct map *map, struct symbol *sym) + u64 ip, struct map *map, struct symbol *sym, + bool branch, struct branch_flags *flags, + int nr_loop_iter, int samples) { struct callchain_cursor_node *node = *cursor->last; @@ -782,6 +813,13 @@ int callchain_cursor_append(struct callchain_cursor *cursor, node->ip = ip; node->map = map; node->sym = sym; + node->branch = branch; + node->nr_loop_iter = nr_loop_iter; + node->samples = samples; + + if (flags) + memcpy(&node->branch_flags, flags, + sizeof(struct branch_flags)); cursor->nr++; @@ -939,6 +977,163 @@ int callchain_node__fprintf_value(struct callchain_node *node, return 0; } +static void callchain_counts_value(struct callchain_node *node, + u64 *branch_count, u64 *predicted_count, + u64 *abort_count, u64 *cycles_count) +{ + struct callchain_list *clist; + + list_for_each_entry(clist, &node->val, list) { + if (branch_count) + *branch_count += clist->branch_count; + + if (predicted_count) + *predicted_count += clist->predicted_count; + + if (abort_count) + *abort_count += clist->abort_count; + + if (cycles_count) + *cycles_count += clist->cycles_count; + } +} + +static int callchain_node_branch_counts_cumul(struct callchain_node *node, + u64 *branch_count, + u64 *predicted_count, + u64 *abort_count, + u64 *cycles_count) +{ + struct callchain_node *child; + struct rb_node *n; + + n = rb_first(&node->rb_root_in); + while (n) { + child = rb_entry(n, struct callchain_node, rb_node_in); + n = rb_next(n); + + callchain_node_branch_counts_cumul(child, branch_count, + predicted_count, + abort_count, + cycles_count); + + callchain_counts_value(child, branch_count, + predicted_count, abort_count, + cycles_count); + } + + return 0; +} + +int callchain_branch_counts(struct callchain_root *root, + u64 *branch_count, u64 *predicted_count, + u64 *abort_count, u64 *cycles_count) +{ + if (branch_count) + *branch_count = 0; + + if (predicted_count) + *predicted_count = 0; + + if (abort_count) + *abort_count = 0; + + if (cycles_count) + *cycles_count = 0; + + return callchain_node_branch_counts_cumul(&root->node, + branch_count, + predicted_count, + abort_count, + cycles_count); +} + +static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, + u64 branch_count, u64 predicted_count, + u64 abort_count, u64 cycles_count, + u64 iter_count, u64 samples_count) +{ + double predicted_percent = 0.0; + const char *null_str = ""; + char iter_str[32]; + char *str; + u64 cycles = 0; + + if (branch_count == 0) { + if (fp) + return fprintf(fp, " (calltrace)"); + + return scnprintf(bf, bfsize, " (calltrace)"); + } + + if (iter_count && samples_count) { + scnprintf(iter_str, sizeof(iter_str), + ", iterations:%" PRId64 "", + iter_count / samples_count); + str = iter_str; + } else + str = (char *)null_str; + + predicted_percent = predicted_count * 100.0 / branch_count; + cycles = cycles_count / branch_count; + + if ((predicted_percent >= 100.0) && (abort_count == 0)) { + if (fp) + return fprintf(fp, " (cycles:%" PRId64 "%s)", + cycles, str); + + return scnprintf(bf, bfsize, " (cycles:%" PRId64 "%s)", + cycles, str); + } + + if ((predicted_percent < 100.0) && (abort_count == 0)) { + if (fp) + return fprintf(fp, + " (predicted:%.1f%%, cycles:%" PRId64 "%s)", + predicted_percent, cycles, str); + + return scnprintf(bf, bfsize, + " (predicted:%.1f%%, cycles:%" PRId64 "%s)", + predicted_percent, cycles, str); + } + + if (fp) + return fprintf(fp, + " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", + predicted_percent, abort_count, cycles, str); + + return scnprintf(bf, bfsize, + " (predicted:%.1f%%, abort:%" PRId64 ", cycles:%" PRId64 "%s)", + predicted_percent, abort_count, cycles, str); +} + +int callchain_list_counts__printf_value(struct callchain_node *node, + struct callchain_list *clist, + FILE *fp, char *bf, int bfsize) +{ + u64 branch_count, predicted_count; + u64 abort_count, cycles_count; + u64 iter_count = 0, samples_count = 0; + + branch_count = clist->branch_count; + predicted_count = clist->predicted_count; + abort_count = clist->abort_count; + cycles_count = clist->cycles_count; + + if (node) { + struct callchain_list *call; + + list_for_each_entry(call, &node->val, list) { + iter_count += call->iter_count; + samples_count += call->samples_count; + } + } + + return callchain_counts_printf(fp, bf, bfsize, branch_count, + predicted_count, abort_count, + cycles_count, iter_count, samples_count); +} + static void free_callchain_node(struct callchain_node *node) { struct callchain_list *list, *tmp; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 13e75549c440..d9c70dccf06a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,11 +11,7 @@ #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" -#ifdef HAVE_DWARF_UNWIND_SUPPORT # define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n" -#else -# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|lbr)\n" -#endif #define RECORD_SIZE_HELP \ HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording ()\n" \ @@ -115,6 +111,12 @@ struct callchain_list { bool unfolded; bool has_children; }; + u64 branch_count; + u64 predicted_count; + u64 abort_count; + u64 cycles_count; + u64 iter_count; + u64 samples_count; char *srcline; struct list_head list; }; @@ -129,6 +131,10 @@ struct callchain_cursor_node { u64 ip; struct map *map; struct symbol *sym; + bool branch; + struct branch_flags branch_flags; + int nr_loop_iter; + int samples; struct callchain_cursor_node *next; }; @@ -183,7 +189,9 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) } int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, - struct map *map, struct symbol *sym); + struct map *map, struct symbol *sym, + bool branch, struct branch_flags *flags, + int nr_loop_iter, int samples); /* Close a cursor writing session. Initialize for the reader */ static inline void callchain_cursor_commit(struct callchain_cursor *cursor) @@ -261,8 +269,16 @@ char *callchain_node__scnprintf_value(struct callchain_node *node, int callchain_node__fprintf_value(struct callchain_node *node, FILE *fp, u64 total); +int callchain_list_counts__printf_value(struct callchain_node *node, + struct callchain_list *clist, + FILE *fp, char *bf, int bfsize); + void free_callchain(struct callchain_root *root); void decay_callchain(struct callchain_root *root); int callchain_node__make_parent_list(struct callchain_node *node); +int callchain_branch_counts(struct callchain_root *root, + u64 *branch_count, u64 *predicted_count, + u64 *abort_count, u64 *cycles_count); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 18dae745034f..3d906dbbef74 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -594,6 +594,19 @@ static int collect_config(const char *var, const char *value, goto out_free; } + /* perf_config_set can contain both user and system config items. + * So we should know where each value is from. + * The classification would be needed when a particular config file + * is overwrited by setting feature i.e. set_config(). + */ + if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) { + section->from_system_config = true; + item->from_system_config = true; + } else { + section->from_system_config = false; + item->from_system_config = false; + } + ret = set_value(item, value); return ret; @@ -602,6 +615,13 @@ out_free: return -1; } +int perf_config_set__collect(struct perf_config_set *set, const char *file_name, + const char *var, const char *value) +{ + config_file_name = file_name; + return collect_config(var, value, set); +} + static int perf_config_set__init(struct perf_config_set *set) { int ret = -1; diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 6f813d46045e..1a59a6b43f8b 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -7,12 +7,14 @@ struct perf_config_item { char *name; char *value; + bool from_system_config; struct list_head node; }; struct perf_config_section { char *name; struct list_head items; + bool from_system_config; struct list_head node; }; @@ -33,6 +35,8 @@ const char *perf_etc_perfconfig(void); struct perf_config_set *perf_config_set__new(void); void perf_config_set__delete(struct perf_config_set *set); +int perf_config_set__collect(struct perf_config_set *set, const char *file_name, + const char *var, const char *value); void perf_config__init(void); void perf_config__exit(void); void perf_config__refresh(void); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index df85b9efd80f..9b33bef54581 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1616,7 +1616,11 @@ static int add_callchain_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, u8 *cpumode, - u64 ip) + u64 ip, + bool branch, + struct branch_flags *flags, + int nr_loop_iter, + int samples) { struct addr_location al; @@ -1668,7 +1672,8 @@ static int add_callchain_ip(struct thread *thread, if (symbol_conf.hide_unresolved && al.sym == NULL) return 0; - return callchain_cursor_append(cursor, al.addr, al.map, al.sym); + return callchain_cursor_append(cursor, al.addr, al.map, al.sym, + branch, flags, nr_loop_iter, samples); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -1757,7 +1762,9 @@ static int resolve_lbr_callchain_sample(struct thread *thread, /* LBR only affects the user callchain */ if (i != chain_nr) { struct branch_stack *lbr_stack = sample->branch_stack; - int lbr_nr = lbr_stack->nr, j; + int lbr_nr = lbr_stack->nr, j, k; + bool branch; + struct branch_flags *flags; /* * LBR callstack can only get user call chain. * The mix_chain_nr is kernel call chain @@ -1772,23 +1779,41 @@ static int resolve_lbr_callchain_sample(struct thread *thread, for (j = 0; j < mix_chain_nr; j++) { int err; + branch = false; + flags = NULL; + if (callchain_param.order == ORDER_CALLEE) { if (j < i + 1) ip = chain->ips[j]; - else if (j > i + 1) - ip = lbr_stack->entries[j - i - 2].from; - else + else if (j > i + 1) { + k = j - i - 2; + ip = lbr_stack->entries[k].from; + branch = true; + flags = &lbr_stack->entries[k].flags; + } else { ip = lbr_stack->entries[0].to; + branch = true; + flags = &lbr_stack->entries[0].flags; + } } else { - if (j < lbr_nr) - ip = lbr_stack->entries[lbr_nr - j - 1].from; + if (j < lbr_nr) { + k = lbr_nr - j - 1; + ip = lbr_stack->entries[k].from; + branch = true; + flags = &lbr_stack->entries[k].flags; + } else if (j > lbr_nr) ip = chain->ips[i + 1 - (j - lbr_nr)]; - else + else { ip = lbr_stack->entries[0].to; + branch = true; + flags = &lbr_stack->entries[0].flags; + } } - err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + branch, flags, 0, 0); if (err) return (err < 0) ? err : 0; } @@ -1813,6 +1838,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, int i, j, err, nr_entries; int skip_idx = -1; int first_call = 0; + int nr_loop_iter; if (perf_evsel__has_branch_callstack(evsel)) { err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, @@ -1868,14 +1894,37 @@ static int thread__resolve_callchain_sample(struct thread *thread, be[i] = branch->entries[branch->nr - i - 1]; } + nr_loop_iter = nr; nr = remove_loops(be, nr); + /* + * Get the number of iterations. + * It's only approximation, but good enough in practice. + */ + if (nr_loop_iter > nr) + nr_loop_iter = nr_loop_iter - nr + 1; + else + nr_loop_iter = 0; + for (i = 0; i < nr; i++) { - err = add_callchain_ip(thread, cursor, parent, root_al, - NULL, be[i].to); + if (i == nr - 1) + err = add_callchain_ip(thread, cursor, parent, + root_al, + NULL, be[i].to, + true, &be[i].flags, + nr_loop_iter, 1); + else + err = add_callchain_ip(thread, cursor, parent, + root_al, + NULL, be[i].to, + true, &be[i].flags, + 0, 0); + if (!err) err = add_callchain_ip(thread, cursor, parent, root_al, - NULL, be[i].from); + NULL, be[i].from, + true, &be[i].flags, + 0, 0); if (err == -EINVAL) break; if (err) @@ -1903,7 +1952,9 @@ check_calls: if (ip < PERF_CONTEXT_MAX) ++nr_entries; - err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + false, NULL, 0, 0); if (err) return (err < 0) ? err : 0; @@ -1919,7 +1970,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) if (symbol_conf.hide_unresolved && entry->sym == NULL) return 0; return callchain_cursor_append(cursor, entry->ip, - entry->map, entry->sym); + entry->map, entry->sym, + false, NULL, 0, 0); } static int thread__resolve_callchain_unwind(struct thread *thread, diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index d964844eb314..2d0a905c879a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -100,6 +100,7 @@ struct symbol_conf { show_total_period, use_callchain, cumulate_callchain, + show_branchflag_count, exclude_other, show_cpu_utilization, initialized,