From b075c1d81e7d0e96758877ff9ded30ab87df2b77 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 1 Jul 2021 13:39:15 -0300 Subject: [PATCH 01/16] tools headers cpufeatures: Sync with the kernel sources To pick the changes from: eec2113eabd92b7b ("x86/fpu/amx: Define AMX state components and have it used for boot-time checks") This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Borislav Petkov Cc: Chang S. Bae Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d0ce5cfd3ac1..d5b5f2ab87a0 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -277,6 +277,7 @@ #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -298,6 +299,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ From 346e91998cba46b64e8ef5f89813f8918c2731e6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 09:39:02 -0300 Subject: [PATCH 02/16] tools headers UAPI: Sync linux/kvm.h with the kernel sources To pick the changes in: b56639318bb2be66 ("KVM: SEV: Add support for SEV intra host migration") e615e355894e6197 ("KVM: x86: On emulation failure, convey the exit reason, etc. to userspace") a9d496d8e08ca1eb ("KVM: x86: Clarify the kvm_run.emulation_failure structure layout") c68dc1b577eabd56 ("KVM: x86: Report host tsc and realtime values in KVM_GET_CLOCK") dea8ee31a0392775 ("RISC-V: KVM: Add SBI v0.1 support") That just rebuilds perf, as these patches don't add any new KVM ioctl to be harvested for the the 'perf trace' ioctl syscall argument beautifiers. This is also by now used by tools/testing/selftests/kvm/, a simple test build succeeded. This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Anup Patel Cc: Atish Patra Cc: David Edmondson Cc: Oliver Upton Cc: Paolo Bonzini Cc: Peter Gonda Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index a067410ebea5..1daa45268de2 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -269,6 +269,7 @@ struct kvm_xen_exit { #define KVM_EXIT_AP_RESET_HOLD 32 #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 +#define KVM_EXIT_RISCV_SBI 35 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -397,13 +398,23 @@ struct kvm_run { * "ndata" is correct, that new fields are enumerated in "flags", * and that each flag enumerates fields that are 64-bit aligned * and sized (so that ndata+internal.data[] is valid/accurate). + * + * Space beyond the defined fields may be used to store arbitrary + * debug information relating to the emulation failure. It is + * accounted for in "ndata" but the format is unspecified and is + * not represented in "flags". Any such information is *not* ABI! */ struct { __u32 suberror; __u32 ndata; __u64 flags; - __u8 insn_size; - __u8 insn_bytes[15]; + union { + struct { + __u8 insn_size; + __u8 insn_bytes[15]; + }; + }; + /* Arbitrary debug data may follow. */ } emulation_failure; /* KVM_EXIT_OSI */ struct { @@ -469,6 +480,13 @@ struct kvm_run { } msr; /* KVM_EXIT_XEN */ struct kvm_xen_exit xen; + /* KVM_EXIT_RISCV_SBI */ + struct { + unsigned long extension_id; + unsigned long function_id; + unsigned long args[6]; + unsigned long ret[2]; + } riscv_sbi; /* Fix the size of the union. */ char padding[256]; }; @@ -1112,6 +1130,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_BINARY_STATS_FD 203 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #ifdef KVM_CAP_IRQ_ROUTING @@ -1223,11 +1242,16 @@ struct kvm_irqfd { /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ #define KVM_CLOCK_TSC_STABLE 2 +#define KVM_CLOCK_REALTIME (1 << 2) +#define KVM_CLOCK_HOST_TSC (1 << 3) struct kvm_clock_data { __u64 clock; __u32 flags; - __u32 pad[9]; + __u32 pad0; + __u64 realtime; + __u64 host_tsc; + __u32 pad[4]; }; /* For KVM_CAP_SW_TLB */ From ccb05590c4325ce50b1c0deedd54d5f24f4f1652 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Nov 2021 13:02:52 -0300 Subject: [PATCH 03/16] perf tests wp: Remove unused functions on s390 Fixing these build problems: tests/wp.c:24:12: error: 'wp_read' defined but not used [-Werror=unused-function] static int wp_read(int fd, long long *count, int size) ^ tests/wp.c:35:13: error: 'get__perf_event_attr' defined but not used [-Werror=unused-function] static void get__perf_event_attr(struct perf_event_attr *attr, int wp_type, ^ CC /tmp/build/perf/util/print_binary.o Fixes: e47c6ecaae1df54a ("perf test: Convert watch point tests to test cases.") Cc: Alexander Shishkin Cc: Brendan Higgins Cc: Daniel Latypov Cc: David Gow Cc: Ian Rogers Cc: Ingo Molnar Cc: Jin Yao Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sohaib Mohamed Cc: Stephane Eranian Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/wp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c index 820d942b30c3..9d4c45184e71 100644 --- a/tools/perf/tests/wp.c +++ b/tools/perf/tests/wp.c @@ -21,6 +21,7 @@ do { \ volatile u64 data1; volatile u8 data2[3]; +#ifndef __s390x__ static int wp_read(int fd, long long *count, int size) { int ret = read(fd, count, size); @@ -48,7 +49,6 @@ static void get__perf_event_attr(struct perf_event_attr *attr, int wp_type, attr->exclude_hv = 1; } -#ifndef __s390x__ static int __event(int wp_type, void *wp_addr, unsigned long wp_len) { int fd; From 70f9c9b2df1dd12cf40862b2b31c7bf89e311066 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Nov 2021 14:44:17 -0300 Subject: [PATCH 04/16] perf tools: Set COMPAT_NEED_REALLOCARRAY for CONFIG_AUXTRACE=1 As it is being used in tools/perf/arch/arm64/util/arm-spe.c and the COMPAT_NEED_REALLOCARRAY was only being set when CORESIGHT=1 is set. Fixes: 56c31cdff7c2a640 ("perf arm-spe: Implement find_snapshot callback") Cc: Alexander Shishkin Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Link: https://lore.kernel.org/all/YZT63mIc7iY01er3@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 07e65a061fd3..afd144725a0b 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1010,6 +1010,9 @@ ifndef NO_AUXTRACE ifndef NO_AUXTRACE $(call detected,CONFIG_AUXTRACE) CFLAGS += -DHAVE_AUXTRACE_SUPPORT + ifeq ($(feature-reallocarray), 0) + CFLAGS += -DCOMPAT_NEED_REALLOCARRAY + endif endif endif From 784e8adda4cdb3e2510742023729851b6c08803c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 5 Nov 2021 15:56:15 -0700 Subject: [PATCH 05/16] perf sort: Fix the 'weight' sort key behavior Currently, the 'weight' field in the perf sample has latency information for some instructions like in memory accesses. And perf tool has 'weight' and 'local_weight' sort keys to display the info. But it's somewhat confusing what it shows exactly. In my understanding, 'local_weight' shows a weight in a single sample, and (global) 'weight' shows a sum of the weights in the hist_entry. For example: $ perf mem record -t load dd if=/dev/zero of=/dev/null bs=4k count=1M $ perf report --stdio -n -s +local_weight ... # # Overhead Samples Command Shared Object Symbol Local Weight # ........ ....... ....... ................ ......................... ............ # 21.23% 313 dd [kernel.vmlinux] [k] lockref_get_not_zero 32 12.43% 183 dd [kernel.vmlinux] [k] lockref_get_not_zero 35 11.97% 159 dd [kernel.vmlinux] [k] lockref_get_not_zero 36 10.40% 141 dd [kernel.vmlinux] [k] lockref_put_return 32 7.63% 113 dd [kernel.vmlinux] [k] lockref_get_not_zero 33 6.37% 92 dd [kernel.vmlinux] [k] lockref_get_not_zero 34 6.15% 90 dd [kernel.vmlinux] [k] lockref_put_return 33 ... So let's look at the 'lockref_get_not_zero' symbols. The top entry shows that 313 samples were captured with 'local_weight' 32, so the total weight should be 313 x 32 = 10016. But it's not the case: $ perf report --stdio -n -s +local_weight,weight -S lockref_get_not_zero ... # # Overhead Samples Command Shared Object Local Weight Weight # ........ ....... ....... ................ ............ ...... # 1.36% 4 dd [kernel.vmlinux] 36 144 0.47% 4 dd [kernel.vmlinux] 37 148 0.42% 4 dd [kernel.vmlinux] 32 128 0.40% 4 dd [kernel.vmlinux] 34 136 0.35% 4 dd [kernel.vmlinux] 36 144 0.34% 4 dd [kernel.vmlinux] 35 140 0.30% 4 dd [kernel.vmlinux] 36 144 0.30% 4 dd [kernel.vmlinux] 34 136 0.30% 4 dd [kernel.vmlinux] 32 128 0.30% 4 dd [kernel.vmlinux] 32 128 ... With the 'weight' sort key, it's divided to 4 samples even with the same info ('comm', 'dso', 'sym' and 'local_weight'). I don't think this is what we want. I found this because of the way it aggregates the 'weight' value. Since it's not a period, we should not add them in the he->stat. Otherwise, two 32 'weight' entries will create a 64 'weight' entry. After that, new 32 'weight' samples don't have a matching entry so it'd create a new entry and make it a 64 'weight' entry again and again. Later, they will be merged into 128 'weight' entries during the hists__collapse_resort() with 4 samples, multiple times like above. Let's keep the weight and display it differently. For 'local_weight', it can show the weight as is, and for (global) 'weight' it can display the number multiplied by the number of samples. With this change, I can see the expected numbers. $ perf report --stdio -n -s +local_weight,weight -S lockref_get_not_zero ... # # Overhead Samples Command Shared Object Local Weight Weight # ........ ....... ....... ................ ............ ..... # 21.23% 313 dd [kernel.vmlinux] 32 10016 12.43% 183 dd [kernel.vmlinux] 35 6405 11.97% 159 dd [kernel.vmlinux] 36 5724 7.63% 113 dd [kernel.vmlinux] 33 3729 6.37% 92 dd [kernel.vmlinux] 34 3128 4.17% 59 dd [kernel.vmlinux] 37 2183 0.08% 1 dd [kernel.vmlinux] 269 269 0.08% 1 dd [kernel.vmlinux] 38 38 Reviewed-by: Athira Jajeev Signed-off-by: Namhyung Kim Tested-by: Athira Jajeev Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211105225617.151364-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 14 +++++--------- tools/perf/util/sort.c | 24 +++++++----------------- tools/perf/util/sort.h | 2 +- 3 files changed, 13 insertions(+), 27 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 65fe65ba03c2..4e9bd7b589b1 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -290,11 +290,9 @@ static long hist_time(unsigned long htime) } static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight, u64 ins_lat, u64 p_stage_cyc) + u64 ins_lat, u64 p_stage_cyc) { - he_stat->period += period; - he_stat->weight += weight; he_stat->nr_events += 1; he_stat->ins_lat += ins_lat; he_stat->p_stage_cyc += p_stage_cyc; @@ -308,9 +306,8 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_guest_sys += src->period_guest_sys; dest->period_guest_us += src->period_guest_us; dest->nr_events += src->nr_events; - dest->weight += src->weight; dest->ins_lat += src->ins_lat; - dest->p_stage_cyc += src->p_stage_cyc; + dest->p_stage_cyc += src->p_stage_cyc; } static void he_stat__decay(struct he_stat *he_stat) @@ -598,7 +595,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; - u64 weight = entry->stat.weight; u64 ins_lat = entry->stat.ins_lat; u64 p_stage_cyc = entry->stat.p_stage_cyc; bool leftmost = true; @@ -619,11 +615,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc); + he_stat__add_period(&he->stat, period, ins_lat, p_stage_cyc); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc); + he_stat__add_period(he->stat_acc, period, ins_lat, p_stage_cyc); /* * This mem info was allocated from sample__resolve_mem @@ -733,7 +729,6 @@ __hists__add_entry(struct hists *hists, .stat = { .nr_events = 1, .period = sample->period, - .weight = sample->weight, .ins_lat = sample->ins_lat, .p_stage_cyc = sample->p_stage_cyc, }, @@ -748,6 +743,7 @@ __hists__add_entry(struct hists *hists, .raw_size = sample->raw_size, .ops = ops, .time = hist_time(sample->time), + .weight = sample->weight, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); if (!hists->has_callchains && he && he->callchain_size != 0) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 568a88c001c6..903f34fff27e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1325,45 +1325,35 @@ struct sort_entry sort_mispredict = { .se_width_idx = HISTC_MISPREDICT, }; -static u64 he_weight(struct hist_entry *he) -{ - return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0; -} - static int64_t -sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right) +sort__weight_cmp(struct hist_entry *left, struct hist_entry *right) { - return he_weight(left) - he_weight(right); + return left->weight - right->weight; } static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he)); + return repsep_snprintf(bf, size, "%-*llu", width, he->weight); } struct sort_entry sort_local_weight = { .se_header = "Local Weight", - .se_cmp = sort__local_weight_cmp, + .se_cmp = sort__weight_cmp, .se_snprintf = hist_entry__local_weight_snprintf, .se_width_idx = HISTC_LOCAL_WEIGHT, }; -static int64_t -sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return left->stat.weight - right->stat.weight; -} - static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight); + return repsep_snprintf(bf, size, "%-*llu", width, + he->weight * he->stat.nr_events); } struct sort_entry sort_global_weight = { .se_header = "Weight", - .se_cmp = sort__global_weight_cmp, + .se_cmp = sort__weight_cmp, .se_snprintf = hist_entry__global_weight_snprintf, .se_width_idx = HISTC_GLOBAL_WEIGHT, }; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index b67c469aba79..e18b79916f63 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -49,7 +49,6 @@ struct he_stat { u64 period_us; u64 period_guest_sys; u64 period_guest_us; - u64 weight; u64 ins_lat; u64 p_stage_cyc; u32 nr_events; @@ -109,6 +108,7 @@ struct hist_entry { s32 socket; s32 cpu; u64 code_page_size; + u64 weight; u8 cpumode; u8 depth; From 4d03c75363eeca861c843319a0e6f4426234ed6c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 5 Nov 2021 15:56:16 -0700 Subject: [PATCH 06/16] perf sort: Fix the 'ins_lat' sort key behavior Handle 'ins_lat' (for instruction latency) and 'local_ins_lat' sort keys with the same rationale as for the 'weight' and 'local_weight', see the previous fix in this series for a full explanation. But I couldn't test it actually, so only build tested. Reviewed-by: Athira Jajeev Signed-off-by: Namhyung Kim Tested-by: Athira Jajeev Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211105225617.151364-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 11 ++++------- tools/perf/util/sort.c | 24 +++++++----------------- tools/perf/util/sort.h | 2 +- 3 files changed, 12 insertions(+), 25 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 4e9bd7b589b1..54fe97dd191c 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -290,11 +290,10 @@ static long hist_time(unsigned long htime) } static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 ins_lat, u64 p_stage_cyc) + u64 p_stage_cyc) { he_stat->period += period; he_stat->nr_events += 1; - he_stat->ins_lat += ins_lat; he_stat->p_stage_cyc += p_stage_cyc; } @@ -306,7 +305,6 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_guest_sys += src->period_guest_sys; dest->period_guest_us += src->period_guest_us; dest->nr_events += src->nr_events; - dest->ins_lat += src->ins_lat; dest->p_stage_cyc += src->p_stage_cyc; } @@ -595,7 +593,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; - u64 ins_lat = entry->stat.ins_lat; u64 p_stage_cyc = entry->stat.p_stage_cyc; bool leftmost = true; @@ -615,11 +612,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, ins_lat, p_stage_cyc); + he_stat__add_period(&he->stat, period, p_stage_cyc); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, ins_lat, p_stage_cyc); + he_stat__add_period(he->stat_acc, period, p_stage_cyc); /* * This mem info was allocated from sample__resolve_mem @@ -729,7 +726,6 @@ __hists__add_entry(struct hists *hists, .stat = { .nr_events = 1, .period = sample->period, - .ins_lat = sample->ins_lat, .p_stage_cyc = sample->p_stage_cyc, }, .parent = sym_parent, @@ -744,6 +740,7 @@ __hists__add_entry(struct hists *hists, .ops = ops, .time = hist_time(sample->time), .weight = sample->weight, + .ins_lat = sample->ins_lat, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); if (!hists->has_callchains && he && he->callchain_size != 0) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 903f34fff27e..adc0584695d6 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1358,45 +1358,35 @@ struct sort_entry sort_global_weight = { .se_width_idx = HISTC_GLOBAL_WEIGHT, }; -static u64 he_ins_lat(struct hist_entry *he) -{ - return he->stat.nr_events ? he->stat.ins_lat / he->stat.nr_events : 0; -} - static int64_t -sort__local_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) +sort__ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) { - return he_ins_lat(left) - he_ins_lat(right); + return left->ins_lat - right->ins_lat; } static int hist_entry__local_ins_lat_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he_ins_lat(he)); + return repsep_snprintf(bf, size, "%-*u", width, he->ins_lat); } struct sort_entry sort_local_ins_lat = { .se_header = "Local INSTR Latency", - .se_cmp = sort__local_ins_lat_cmp, + .se_cmp = sort__ins_lat_cmp, .se_snprintf = hist_entry__local_ins_lat_snprintf, .se_width_idx = HISTC_LOCAL_INS_LAT, }; -static int64_t -sort__global_ins_lat_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return left->stat.ins_lat - right->stat.ins_lat; -} - static int hist_entry__global_ins_lat_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->stat.ins_lat); + return repsep_snprintf(bf, size, "%-*u", width, + he->ins_lat * he->stat.nr_events); } struct sort_entry sort_global_ins_lat = { .se_header = "INSTR Latency", - .se_cmp = sort__global_ins_lat_cmp, + .se_cmp = sort__ins_lat_cmp, .se_snprintf = hist_entry__global_ins_lat_snprintf, .se_width_idx = HISTC_GLOBAL_INS_LAT, }; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e18b79916f63..22ae7c6ae398 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -49,7 +49,6 @@ struct he_stat { u64 period_us; u64 period_guest_sys; u64 period_guest_us; - u64 ins_lat; u64 p_stage_cyc; u32 nr_events; }; @@ -109,6 +108,7 @@ struct hist_entry { s32 cpu; u64 code_page_size; u64 weight; + u64 ins_lat; u8 cpumode; u8 depth; From db4b284029099224f387d75198e5995df1cb8aef Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 5 Nov 2021 15:56:17 -0700 Subject: [PATCH 07/16] perf sort: Fix the 'p_stage_cyc' sort key behavior andle 'p_stage_cyc' (for pipeline stage cycles) sort key with the same rationale as for the 'weight' and 'local_weight', see the fix in this series for a full explanation. Not sure it also needs the local and global variants. But I couldn't test it actually because I don't have the machine. Reviewed-by: Athira Jajeev Signed-off-by: Namhyung Kim Tested-by: Athira Jajeev Cc: Andi Kleen Cc: Athira Jajeev Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20211105225617.151364-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 12 ++++-------- tools/perf/util/sort.c | 4 ++-- tools/perf/util/sort.h | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 54fe97dd191c..b776465e04ef 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -289,12 +289,10 @@ static long hist_time(unsigned long htime) return htime; } -static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 p_stage_cyc) +static void he_stat__add_period(struct he_stat *he_stat, u64 period) { he_stat->period += period; he_stat->nr_events += 1; - he_stat->p_stage_cyc += p_stage_cyc; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -305,7 +303,6 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_guest_sys += src->period_guest_sys; dest->period_guest_us += src->period_guest_us; dest->nr_events += src->nr_events; - dest->p_stage_cyc += src->p_stage_cyc; } static void he_stat__decay(struct he_stat *he_stat) @@ -593,7 +590,6 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; - u64 p_stage_cyc = entry->stat.p_stage_cyc; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -612,11 +608,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, p_stage_cyc); + he_stat__add_period(&he->stat, period); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, p_stage_cyc); + he_stat__add_period(he->stat_acc, period); /* * This mem info was allocated from sample__resolve_mem @@ -726,7 +722,6 @@ __hists__add_entry(struct hists *hists, .stat = { .nr_events = 1, .period = sample->period, - .p_stage_cyc = sample->p_stage_cyc, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, @@ -741,6 +736,7 @@ __hists__add_entry(struct hists *hists, .time = hist_time(sample->time), .weight = sample->weight, .ins_lat = sample->ins_lat, + .p_stage_cyc = sample->p_stage_cyc, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); if (!hists->has_callchains && he && he->callchain_size != 0) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index adc0584695d6..a111065b484e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1394,13 +1394,13 @@ struct sort_entry sort_global_ins_lat = { static int64_t sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { - return left->stat.p_stage_cyc - right->stat.p_stage_cyc; + return left->p_stage_cyc - right->p_stage_cyc; } static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc); + return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); } struct sort_entry sort_p_stage_cyc = { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 22ae7c6ae398..7b7145501933 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -49,7 +49,6 @@ struct he_stat { u64 period_us; u64 period_guest_sys; u64 period_guest_us; - u64 p_stage_cyc; u32 nr_events; }; @@ -109,6 +108,7 @@ struct hist_entry { u64 code_page_size; u64 weight; u64 ins_lat; + u64 p_stage_cyc; u8 cpumode; u8 depth; From 162b944598344fd72800d453885979f06ca263f3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 10 Sep 2021 11:46:54 -0300 Subject: [PATCH 08/16] tools headers UAPI: Sync x86's asm/kvm.h with the kernel sources To pick the changes in: 828ca89628bfcb1b ("KVM: x86: Expose TSC offset controls to userspace") That just rebuilds kvm-stat.c on x86, no change in functionality. This silences these perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h' diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Cc: Oliver Upton Cc: Paolo Bonzini Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 2ef1f6513c68..5a776a08f78c 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -504,4 +504,8 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 +/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ +#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ +#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ + #endif /* _ASM_X86_KVM_H */ From cb5a63feae2d963cac7b687e6598d620bed13507 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 17 Nov 2021 09:26:09 +0100 Subject: [PATCH 09/16] perf test sample-parsing: Fix branch_stack entry endianness check Commit 10269a2ca2b08cbd ("perf test sample-parsing: Add endian test for struct branch_flags") broke the test case 27 (Sample parsing) on s390 on linux-next tree: # perf test -Fv 27 27: Sample parsing --- start --- parsing failed for sample_type 0x800 ---- end ---- Sample parsing: FAILED! # The cause of the failure is a wrong #define BS_EXPECTED_BE statement in above commit. Correct this define and the test case runs fine. Output After: # perf test -Fv 27 27: Sample parsing : --- start --- ---- end ---- Sample parsing: Ok # Fixes: 10269a2ca2b08c ("perf test sample-parsing: Add endian test for struct branch_flags") Signed-off-by: Thomas Richter Tested-by: Madhavan Srinivasan Acked-by: Madhavan Srinivasan CC: Sven Schnelle Cc: Heiko Carstens Cc: Jiri Olsa Cc: Sumanth Korikkar Cc: Vasily Gorbik Link: https://lore.kernel.org/r/54077e81-503e-3405-6cb0-6541eb5532cc@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sample-parsing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index b669d22f2b13..07f2411b0ad4 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -36,7 +36,7 @@ * These are based on the input value (213) specified * in branch_stack variable. */ -#define BS_EXPECTED_BE 0xa00d000000000000 +#define BS_EXPECTED_BE 0xa000d00000000000 #define BS_EXPECTED_LE 0xd5000000 #define FLAG(s) s->branch_stack->entries[i].flags From 92723ea0f11d92496687db8c9725248e9d1e5e1d Mon Sep 17 00:00:00 2001 From: Sohaib Mohamed Date: Wed, 10 Nov 2021 04:20:11 +0200 Subject: [PATCH 10/16] perf bench: Fix two memory leaks detected with ASan ASan reports memory leaks while running: $ perf bench sched all Fixes: e27454cc6352c422 ("perf bench: Add sched-messaging.c: Benchmark for scheduler and IPC mechanisms based on hackbench") Signed-off-by: Sohaib Mohamed Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Hitoshi Mitake Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Russel Cc: Peter Zijlstra Cc: Pierre Gondois Link: http://lore.kernel.org/lkml/20211110022012.16620-1-sohaib.amhmd@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-messaging.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 488f6e6ba1a5..fa0ff4ce2b74 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -223,6 +223,8 @@ static unsigned int group(pthread_t *pth, snd_ctx->out_fds[i] = fds[1]; if (!thread_mode) close(fds[0]); + + free(ctx); } /* Now we have all the fds, fork the senders */ @@ -239,6 +241,8 @@ static unsigned int group(pthread_t *pth, for (i = 0; i < num_fds; i++) close(snd_ctx->out_fds[i]); + free(snd_ctx); + /* Return number of children to reap */ return num_fds * 2; } From 9e1a8d9f683260d50e0a14176d3f7c46a93b2700 Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 5 Nov 2021 10:41:30 +0000 Subject: [PATCH 11/16] perf inject: Fix ARM SPE handling 'perf inject' is currently not working for Arm SPE. When you try to run 'perf inject' and 'perf report' with a perf.data file that contains SPE traces, the tool reports a "Bad address" error: # ./perf record -e arm_spe_0/ts_enable=1,store_filter=1,branch_filter=1,load_filter=1/ -a -- sleep 1 # ./perf inject -i perf.data -o perf.inject.data --itrace # ./perf report -i perf.inject.data --stdio 0x42c00 [0x8]: failed to process type: 9 [Bad address] Error: failed to process sample As far as I know, the issue was first spotted in [1], but 'perf inject' was not yet injecting the samples. This patch does something similar to what cs_etm does for injecting the samples [2], but for SPE. [1] https://patchwork.kernel.org/project/linux-arm-kernel/cover/20210412091006.468557-1-leo.yan@linaro.org/#24117339 [2] https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/tree/tools/perf/util/cs-etm.c?h=perf/core&id=133fe2e617e48ca0948983329f43877064ffda3e#n1196 Reviewed-by: James Clark Signed-off-by: German Gomez Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211105104130.28186-2-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 4748bcfe61de..fccac06b573a 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -51,6 +51,7 @@ struct arm_spe { u8 timeless_decoding; u8 data_queued; + u64 sample_type; u8 sample_flc; u8 sample_llc; u8 sample_tlb; @@ -287,6 +288,12 @@ static void arm_spe_prep_sample(struct arm_spe *spe, event->sample.header.size = sizeof(struct perf_event_header); } +static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample); +} + static inline int arm_spe_deliver_synth_event(struct arm_spe *spe, struct arm_spe_queue *speq __maybe_unused, @@ -295,6 +302,12 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, { int ret; + if (spe->synth_opts.inject) { + ret = arm_spe__inject_event(event, sample, spe->sample_type); + if (ret) + return ret; + } + ret = perf_session__deliver_synth_event(spe->session, event, sample); if (ret) pr_err("ARM SPE: failed to deliver event, error %d\n", ret); @@ -986,6 +999,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) else attr.sample_type |= PERF_SAMPLE_TIME; + spe->sample_type = attr.sample_type; + attr.exclude_user = evsel->core.attr.exclude_user; attr.exclude_kernel = evsel->core.attr.exclude_kernel; attr.exclude_hv = evsel->core.attr.exclude_hv; From e8c04ea0fef5731dbcaabac86d65254c227aedf4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 17 Nov 2021 14:29:31 -0300 Subject: [PATCH 12/16] tools build: Fix removal of feature-sync-compare-and-swap feature detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch removing the feature-sync-compare-and-swap feature detection didn't remove the call to main_test_sync_compare_and_swap(), making the 'test-all' case fail an all the feature tests to be performed individually: $ cat /tmp/build/perf/feature/test-all.make.output In file included from test-all.c:18: test-libpython-version.c:5:10: error: #error 5 | #error | ^~~~~ test-all.c: In function ‘main’: test-all.c:203:9: error: implicit declaration of function ‘main_test_sync_compare_and_swap’ [-Werror=implicit-function-declaration] 203 | main_test_sync_compare_and_swap(argc, argv); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors $ Fix it, now to figure out what is that test-libpython-version.c problem... Fixes: 60fa754b2a5a4e0c ("tools: Remove feature-sync-compare-and-swap feature detection") Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/YZU9Fe0sgkHSXeC2@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-all.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 920439527291..0b243ce842be 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -200,7 +200,6 @@ int main(int argc, char *argv[]) main_test_timerfd(); main_test_stackprotector_all(); main_test_libdw_dwarf_unwind(); - main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); main_test_pthread_barrier(); From 8b8dcc3720d57d88faa74e0da2fb419da953ddd0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 8 Sep 2021 16:09:08 -0300 Subject: [PATCH 13/16] tools headers UAPI: Sync MIPS syscall table file changed by new futex_waitv syscall To pick the changes in these csets: b3ff2881ba18b852 ("MIPS: syscalls: Wire up futex_waitv syscall") That add support for this new syscall in tools such as 'perf trace'. For instance, this is now possible (adapted from the x86_64 test output): # perf trace -e futex_waitv ^C# # perf trace -v -e futex_waitv event qualifier tracepoint filter: (common_pid != 807333 && common_pid != 3564) && (id == 449) ^C# # perf trace -v -e futex* --max-events 10 event qualifier tracepoint filter: (common_pid != 812168 && common_pid != 3564) && (id == 202 || id == 449) mmap size 528384B ? ( ): Timer/219310 ... [continued]: futex()) = -1 ETIMEDOUT (Connection timed out) 0.012 ( 0.002 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.024 ( 0.060 ms): Timer/219310 futex(uaddr: 0x7fd0b152d420, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) = 0 0.086 ( 0.001 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.088 ( ): Timer/219310 futex(uaddr: 0x7fd0b152d424, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) ... 0.075 ( 0.005 ms): Web Content/219299 futex(uaddr: 0x7fd0b152d420, op: WAKE|PRIVATE_FLAG, val: 1) = 1 0.169 ( 0.004 ms): Web Content/219299 futex(uaddr: 0x7fd0b152d424, op: WAKE|PRIVATE_FLAG, val: 1) = 1 0.088 ( 0.089 ms): Timer/219310 ... [continued]: futex()) = 0 0.179 ( 0.001 ms): Timer/219310 futex(uaddr: 0x7fd0b152d3c8, op: WAKE|PRIVATE_FLAG, val: 1) = 0 0.181 ( ): Timer/219310 futex(uaddr: 0x7fd0b152d420, op: WAIT_BITSET|PRIVATE_FLAG, utime: 0x7fd0b1657840, val3: MATCH_ANY) ... # That is the filter expression attached to the raw_syscalls:sys_{enter,exit} tracepoints. $ grep futex_waitv tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl 449 n64 futex_waitv sys_futex_waitv $ This addresses these perf build warnings: Warning: Kernel ABI header at 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest version at 'arch/mips/kernel/syscalls/syscall_n64.tbl' diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl Cc: Thomas Bogendoerfer Cc: Wang Haojun Link: https://lore.kernel.org/lkml/YZZRxuIyvSGLZhM4@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 1ca7bc337932..e2c481fcede6 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -363,3 +363,4 @@ 446 n64 landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 n64 process_mrelease sys_process_mrelease +449 n64 futex_waitv sys_futex_waitv From 0ca1f534a776cc7d42f2c33da4732b74ec2790cd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 17 Nov 2021 23:12:47 -0800 Subject: [PATCH 14/16] perf hist: Fix memory leak of a perf_hpp_fmt perf_hpp__column_unregister() removes an entry from a list but doesn't free the memory causing a memory leak spotted by leak sanitizer. Add the free while at the same time reducing the scope of the function to static. Signed-off-by: Ian Rogers Reviewed-by: Kajol Jain Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211118071247.2140392-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/hist.c | 28 ++++++++++++++-------------- tools/perf/util/hist.h | 1 - 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index c1f24d004852..5075ecead5f3 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -535,6 +535,18 @@ struct perf_hpp_list perf_hpp_list = { #undef __HPP_SORT_ACC_FN #undef __HPP_SORT_RAW_FN +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + /* + * At this point fmt should be completely + * unhooked, if not it's a bug. + */ + BUG_ON(!list_empty(&fmt->list)); + BUG_ON(!list_empty(&fmt->sort_list)); + + if (fmt->free) + fmt->free(fmt); +} void perf_hpp__init(void) { @@ -598,9 +610,10 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, list_add(&format->sort_list, &list->sorts); } -void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +static void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { list_del_init(&format->list); + fmt_free(format); } void perf_hpp__cancel_cumulate(void) @@ -672,19 +685,6 @@ next: } -static void fmt_free(struct perf_hpp_fmt *fmt) -{ - /* - * At this point fmt should be completely - * unhooked, if not it's a bug. - */ - BUG_ON(!list_empty(&fmt->list)); - BUG_ON(!list_empty(&fmt->sort_list)); - - if (fmt->free) - fmt->free(fmt); -} - void perf_hpp__reset_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt, *tmp; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5343b62476e6..621f35ae1efa 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -369,7 +369,6 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__cancel_cumulate(void); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); From d9fc706108c15f8bc2d4ccccf8e50f74830fabd9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 17 Nov 2021 23:38:04 -0800 Subject: [PATCH 15/16] perf report: Fix memory leaks around perf_tip() perf_tip() may allocate memory or use a literal, this means memory wasn't freed if allocated. Change the API so that literals aren't used. At the same time add missing frees for system_path. These issues were spotted using leak sanitizer. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211118073804.2149974-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 15 +++++++++------ tools/perf/util/util.c | 14 +++++++------- tools/perf/util/util.h | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8167ebfe776a..8ae400429870 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -619,14 +619,17 @@ static int report__browse_hists(struct report *rep) int ret; struct perf_session *session = rep->session; struct evlist *evlist = session->evlist; - const char *help = perf_tip(system_path(TIPDIR)); + char *help = NULL, *path = NULL; - if (help == NULL) { + path = system_path(TIPDIR); + if (perf_tip(&help, path) || help == NULL) { /* fallback for people who don't install perf ;-) */ - help = perf_tip(DOCDIR); - if (help == NULL) - help = "Cannot load tips.txt file, please install perf!"; + free(path); + path = system_path(DOCDIR); + if (perf_tip(&help, path) || help == NULL) + help = strdup("Cannot load tips.txt file, please install perf!"); } + free(path); switch (use_browser) { case 1: @@ -651,7 +654,7 @@ static int report__browse_hists(struct report *rep) ret = evlist__tty_browse_hists(evlist, rep, help); break; } - + free(help); return ret; } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 37a9492edb3e..df3c4671be72 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -379,32 +379,32 @@ fetch_kernel_version(unsigned int *puint, char *str, return 0; } -const char *perf_tip(const char *dirpath) +int perf_tip(char **strp, const char *dirpath) { struct strlist *tips; struct str_node *node; - char *tip = NULL; struct strlist_config conf = { .dirname = dirpath, .file_only = true, }; + int ret = 0; + *strp = NULL; tips = strlist__new("tips.txt", &conf); if (tips == NULL) - return errno == ENOENT ? NULL : - "Tip: check path of tips.txt or get more memory! ;-p"; + return -errno; if (strlist__nr_entries(tips) == 0) goto out; node = strlist__entry(tips, random() % strlist__nr_entries(tips)); - if (asprintf(&tip, "Tip: %s", node->s) < 0) - tip = (char *)"Tip: get more memory! ;-)"; + if (asprintf(strp, "Tip: %s", node->s) < 0) + ret = -ENOMEM; out: strlist__delete(tips); - return tip; + return ret; } char *perf_exe(char *buf, int len) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ad737052e597..9f0d36ba77f2 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -39,7 +39,7 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_FMT "%d.%d.%d" #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) -const char *perf_tip(const char *dirpath); +int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); From b194c9cd09dd98af76beaa32a041af674260d730 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 18 Nov 2021 00:47:49 -0800 Subject: [PATCH 16/16] perf evsel: Fix memory leaks relating to unit unit may have a strdup pointer or be to a literal, consequently memory assocciated with it isn't freed. Change it so the unit is always strdup and so the memory can be safely freed. Fix related issue in perf_event__process_event_update() for name and own_cpus. Leaks were spotted by leak sanitizer. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211118084749.2191447-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/event_update.c | 5 ++--- tools/perf/util/evsel.c | 18 +++++++++--------- tools/perf/util/header.c | 8 +++++--- tools/perf/util/parse-events.c | 9 ++++++--- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index fbb68deba59f..d01532d40acb 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -88,7 +88,6 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes struct evsel *evsel; struct event_name tmp; struct evlist *evlist = evlist__new_default(); - char *unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to get evlist", evlist); @@ -99,7 +98,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes perf_evlist__id_add(&evlist->core, &evsel->core, 0, 0, 123); - evsel->unit = unit; + free((char *)evsel->unit); + evsel->unit = strdup("KRAVA"); TEST_ASSERT_VAL("failed to synthesize attr update unit", !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit)); @@ -119,7 +119,6 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes TEST_ASSERT_VAL("failed to synthesize attr update cpus", !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus)); - free(unit); evlist__delete(evlist); return 0; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a59fb2ecb84e..ac0127be0459 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -241,7 +241,7 @@ void evsel__init(struct evsel *evsel, { perf_evsel__init(&evsel->core, attr, idx); evsel->tracking = !idx; - evsel->unit = ""; + evsel->unit = strdup(""); evsel->scale = 1.0; evsel->max_events = ULONG_MAX; evsel->evlist = NULL; @@ -276,13 +276,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) } if (evsel__is_clock(evsel)) { - /* - * The evsel->unit points to static alias->unit - * so it's ok to use static string in here. - */ - static const char *unit = "msec"; - - evsel->unit = unit; + free((char *)evsel->unit); + evsel->unit = strdup("msec"); evsel->scale = 1e-6; } @@ -420,7 +415,11 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->max_events = orig->max_events; evsel->tool_event = orig->tool_event; - evsel->unit = orig->unit; + free((char *)evsel->unit); + evsel->unit = strdup(orig->unit); + if (evsel->unit == NULL) + goto out_err; + evsel->scale = orig->scale; evsel->snapshot = orig->snapshot; evsel->per_pkg = orig->per_pkg; @@ -1441,6 +1440,7 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); + zfree(&evsel->unit); zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index fda8d14c891f..79cce216727e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4257,9 +4257,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, switch (ev->type) { case PERF_EVENT_UPDATE__UNIT: + free((char *)evsel->unit); evsel->unit = strdup(ev->data); break; case PERF_EVENT_UPDATE__NAME: + free(evsel->name); evsel->name = strdup(ev->data); break; case PERF_EVENT_UPDATE__SCALE: @@ -4268,11 +4270,11 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, break; case PERF_EVENT_UPDATE__CPUS: ev_cpus = (struct perf_record_event_update_cpus *)ev->data; - map = cpu_map__new_data(&ev_cpus->cpus); - if (map) + if (map) { + perf_cpu_map__put(evsel->core.own_cpus); evsel->core.own_cpus = map; - else + } else pr_err("failed to get event_update cpus\n"); default: break; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5bfb6f892489..ba74fdf74af9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -402,8 +402,10 @@ static int add_event_tool(struct list_head *list, int *idx, if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME) - evsel->unit = "ns"; + if (tool_event == PERF_TOOL_DURATION_TIME) { + free((char *)evsel->unit); + evsel->unit = strdup("ns"); + } return 0; } @@ -1630,7 +1632,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (parse_state->fake_pmu) return 0; - evsel->unit = info.unit; + free((char *)evsel->unit); + evsel->unit = strdup(info.unit); evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot;