From b430d243673760ada58d3c8678ba9f11f72083cb Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 17 Jan 2023 23:24:09 -0800 Subject: [PATCH 001/114] perf script flamegraph: Avoid d3-flame-graph package dependency Currently flame graph generation requires a d3-flame-graph template to be installed. Unfortunately this is hard to come by for things like Debian [1]. If the template isn't installed then ask if it should be downloaded from jsdelivr CDN. The downloaded HTML file is validated against an md5sum. If the download fails, generate a minimal flame graph with the javascript coming from links to jsdelivr CDN. v3. Adds a warning message and quits before download in live mode. v2. Change the warning to a prompt about downloading and add the --allow-download command line flag. Add an md5sum check for the downloaded HTML. [1] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=996839 Reviewed-by: Andreas Gerstmayr Signed-off-by: Ian Rogers Cc: 996839@bugs.debian.org Cc: Alexander Shishkin Cc: Brendan Gregg Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Martin Spier Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230118072409.147786-1-irogers@google.com # v3 discussion Link: https://lore.kernel.org/r/20230112220024.32709-1-irogers@google.com # v2 discussion Link: https://lore.kernel.org/r/CAP-5=fXi_9zdhTAoYApiFQoLURAvpEatFzU3uL23o3zs=z25ZQ@mail.gmail.com # v1 discussion Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/flamegraph.py | 107 +++++++++++++++++++----- 1 file changed, 85 insertions(+), 22 deletions(-) diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py index b6af1dd5f816..cf7ce8229a6c 100755 --- a/tools/perf/scripts/python/flamegraph.py +++ b/tools/perf/scripts/python/flamegraph.py @@ -19,12 +19,34 @@ # pylint: disable=missing-function-docstring from __future__ import print_function -import sys -import os -import io import argparse +import hashlib +import io import json +import os import subprocess +import sys +import urllib.request + +minimal_html = """ + + + +
+ + + + +""" # pylint: disable=too-few-public-methods class Node: @@ -50,16 +72,6 @@ class FlameGraphCLI: self.args = args self.stack = Node("all", "root") - if self.args.format == "html" and \ - not os.path.isfile(self.args.template): - print("Flame Graph template {} does not exist. Please install " - "the js-d3-flame-graph (RPM) or libjs-d3-flame-graph (deb) " - "package, specify an existing flame graph template " - "(--template PATH) or another output format " - "(--format FORMAT).".format(self.args.template), - file=sys.stderr) - sys.exit(1) - @staticmethod def get_libtype_from_dso(dso): """ @@ -128,16 +140,63 @@ class FlameGraphCLI: } options_json = json.dumps(options) + template_md5sum = None + if self.args.format == "html": + if os.path.isfile(self.args.template): + template = f"file://{self.args.template}" + else: + if not self.args.allow_download: + print(f"""Warning: Flame Graph template '{self.args.template}' +does not exist. To avoid this please install a package such as the +js-d3-flame-graph or libjs-d3-flame-graph, specify an existing flame +graph template (--template PATH) or use another output format (--format +FORMAT).""", + file=sys.stderr) + if self.args.input == "-": + print("""Not attempting to download Flame Graph template as script command line +input is disabled due to using live mode. If you want to download the +template retry without live mode. For example, use 'perf record -a -g +-F 99 sleep 60' and 'perf script report flamegraph'. Alternatively, +download the template from: +https://cdn.jsdelivr.net/npm/d3-flame-graph@4.1.3/dist/templates/d3-flamegraph-base.html +and place it at: +/usr/share/d3-flame-graph/d3-flamegraph-base.html""", + file=sys.stderr) + quit() + s = None + while s != "y" and s != "n": + s = input("Do you wish to download a template from cdn.jsdelivr.net? (this warning can be suppressed with --allow-download) [yn] ").lower() + if s == "n": + quit() + template = "https://cdn.jsdelivr.net/npm/d3-flame-graph@4.1.3/dist/templates/d3-flamegraph-base.html" + template_md5sum = "143e0d06ba69b8370b9848dcd6ae3f36" + try: - with io.open(self.args.template, encoding="utf-8") as template: - output_str = ( - template.read() - .replace("/** @options_json **/", options_json) - .replace("/** @flamegraph_json **/", stacks_json) - ) - except IOError as err: - print("Error reading template file: {}".format(err), file=sys.stderr) - sys.exit(1) + with urllib.request.urlopen(template) as template: + output_str = "".join([ + l.decode("utf-8") for l in template.readlines() + ]) + except Exception as err: + print(f"Error reading template {template}: {err}\n" + "a minimal flame graph will be generated", file=sys.stderr) + output_str = minimal_html + template_md5sum = None + + if template_md5sum: + download_md5sum = hashlib.md5(output_str.encode("utf-8")).hexdigest() + if download_md5sum != template_md5sum: + s = None + while s != "y" and s != "n": + s = input(f"""Unexpected template md5sum. +{download_md5sum} != {template_md5sum}, for: +{output_str} +continue?[yn] """).lower() + if s == "n": + quit() + + output_str = output_str.replace("/** @options_json **/", options_json) + output_str = output_str.replace("/** @flamegraph_json **/", stacks_json) + output_fn = self.args.output or "flamegraph.html" else: output_str = stacks_json @@ -172,6 +231,10 @@ if __name__ == "__main__": choices=["blue-green", "orange"]) parser.add_argument("-i", "--input", help=argparse.SUPPRESS) + parser.add_argument("--allow-download", + default=False, + action="store_true", + help="allow unprompted downloading of HTML template") cli_args = parser.parse_args() cli = FlameGraphCLI(cli_args) From acef233b7ca749fda153a06bbd2d9feb2bb16857 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 17 Jan 2023 15:29:25 +0800 Subject: [PATCH 002/114] perf pmu: Add #slots literal support for arm64 The slots in each architecture may be different, so add #slots literal to obtain the slots of different architectures, and the #slots can be applied in the metric. Currently, The #slots just support for arm64, and other architectures will return NAN. On arm64, the value of slots is from the register PMMIR_EL1.SLOT, which I can read in /sys/bus/event_source/device/armv8_pmuv3_*/caps/slots. PMMIR_EL1.SLOT might read as zero if the PMU version is lower than ID_AA64DFR0_EL1_PMUVer_V3P4 or the STALL_SLOT event is not implemented. Reviewed-by: John Garry Signed-off-by: Jing Zhang Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shuai Xue Cc: Will Deacon Cc: Xing Zhengjun Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1673940573-90503-2-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/pmu.c | 35 ++++++++++++++++++++++++++++++-- tools/perf/util/expr.c | 5 +++++ tools/perf/util/pmu.c | 6 ++++++ tools/perf/util/pmu.h | 1 + 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 477e513972a4..9e674cac5a73 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -3,8 +3,10 @@ #include #include "../../../util/cpumap.h" #include "../../../util/pmu.h" +#include +#include -const struct pmu_events_table *pmu_events_table__find(void) +static struct perf_pmu *pmu__find_core_pmu(void) { struct perf_pmu *pmu = NULL; @@ -19,8 +21,37 @@ const struct pmu_events_table *pmu_events_table__find(void) if (pmu->cpus->nr != cpu__max_cpu().cpu) return NULL; - return perf_pmu__find_table(pmu); + return pmu; } return NULL; } + +const struct pmu_events_table *pmu_events_table__find(void) +{ + struct perf_pmu *pmu = pmu__find_core_pmu(); + + if (pmu) + return perf_pmu__find_table(pmu); + + return NULL; +} + +double perf_pmu__cpu_slots_per_cycle(void) +{ + char path[PATH_MAX]; + unsigned long long slots = 0; + struct perf_pmu *pmu = pmu__find_core_pmu(); + + if (pmu) { + scnprintf(path, PATH_MAX, + EVENT_SOURCE_DEVICE_PATH "%s/caps/slots", pmu->name); + /* + * The value of slots is not greater than 32 bits, but sysfs__read_int + * can't read value with 0x prefix, so use sysfs__read_ull instead. + */ + sysfs__read_ull(path, &slots); + } + + return slots ? (double)slots : NAN; +} diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 00dcde35e0d3..c1da20b868db 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -19,6 +19,7 @@ #include #include #include +#include "pmu.h" #ifdef PARSER_DEBUG extern int expr_debug; @@ -448,6 +449,10 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx result = topology->core_cpus_lists; goto out; } + if (!strcmp("#slots", literal)) { + result = perf_pmu__cpu_slots_per_cycle(); + goto out; + } pr_err("Unrecognized literal '%s'", literal); out: diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 2bdeb89352e7..cbb4fbf124bd 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "debug.h" #include "evsel.h" #include "pmu.h" @@ -1993,3 +1994,8 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, *ucpus_ptr = unmatched_cpus; return 0; } + +double __weak perf_pmu__cpu_slots_per_cycle(void) +{ + return NAN; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 69ca0004f94f..fd414ba1d776 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -259,4 +259,5 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, char *pmu_find_real_name(const char *name); char *pmu_find_alias_name(const char *name); +double perf_pmu__cpu_slots_per_cycle(void); #endif /* __PMU_H */ From 5b51e47a3f1d7619b424b4b89b5d19569a462b09 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 17 Jan 2023 15:29:26 +0800 Subject: [PATCH 003/114] perf jevent: Add general metrics support Add general metrics support, so that some general metrics applicable to multiple architectures can be defined in the public JSON file like general events, and then add general metrics through "arch_std_event" in JSON file of different architecture. Reviewed-by: John Garry Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shuai Xue Cc: Will Deacon Cc: Xing Zhengjun Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1673940573-90503-3-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 4c398e0eeb2f..0416b7442171 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -358,6 +358,8 @@ def preprocess_arch_std_files(archpath: str) -> None: for event in read_json_events(item.path, topic=''): if event.name: _arch_std_events[event.name.lower()] = event + if event.metric_name: + _arch_std_events[event.metric_name.lower()] = event def print_events_table_prefix(tblname: str) -> None: From a9ff64e5a0421914c6b23e4505d9384b8c745b5a Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 17 Jan 2023 15:29:27 +0800 Subject: [PATCH 004/114] perf vendor events arm64: Add common topdown L1 metrics The metrics of topdown L1 are from ARM sbsa7.0 platform design doc[0], D37-38, which are standard. So put them in the common file sbsa.json of arm64, so that other cores besides n2/v2 can also be reused. [0] https://documentation-service.arm.com/static/60250c7395978b529036da86?token= Reviewed-by: John Garry Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shuai Xue Cc: Will Deacon Cc: Xing Zhengjun Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1673940573-90503-4-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/arm64/sbsa.json | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/sbsa.json diff --git a/tools/perf/pmu-events/arch/arm64/sbsa.json b/tools/perf/pmu-events/arch/arm64/sbsa.json new file mode 100644 index 000000000000..f678c37ea9c3 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/sbsa.json @@ -0,0 +1,30 @@ +[ + { + "MetricExpr": "stall_slot_frontend / (#slots * cpu_cycles)", + "BriefDescription": "Frontend bound L1 topdown metric", + "MetricGroup": "TopdownL1", + "MetricName": "frontend_bound", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "(1 - op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))", + "BriefDescription": "Bad speculation L1 topdown metric", + "MetricGroup": "TopdownL1", + "MetricName": "bad_speculation", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "(op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))", + "BriefDescription": "Retiring L1 topdown metric", + "MetricGroup": "TopdownL1", + "MetricName": "retiring", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "stall_slot_backend / (#slots * cpu_cycles)", + "BriefDescription": "Backend Bound L1 topdown metric", + "MetricGroup": "TopdownL1", + "MetricName": "backend_bound", + "ScaleUnit": "100%" + } +] From c1c685cee6a12005fdc05ffdd584e30840da5b0c Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 17 Jan 2023 15:29:28 +0800 Subject: [PATCH 005/114] perf vendor events arm64: Add topdown L1 metrics for neoverse-n2-v2 Add general topdown L1 metrics for neoverse-n2-v2. Due to the wrong count of stall_slot and stall_slot_frontend on neoverse-n2, the real stall_slot and real stall_slot_frontend need to subtract cpu_cycles, so overwrite the "MetricExpr" for neoverse-n2 which slots are 5. Reference from ARM neoverse-n2 errata notice [0], D117. Since neoverse-n2/neoverse-v2 does not yet support topdown L2, metric groups such as Cache, TLB, Branch, InstructionsMix and PEutilization will be added to further analysis of performance bottlenecks in the following patches. Reference from ARM PMU guide [1][2]. [0] https://documentation-service.arm.com/static/636a66a64e6cf12278ad89cb?token= [1] https://documentation-service.arm.com/static/628f8fa3dfaf015c2b76eae8?token= [2] https://documentation-service.arm.com/static/62cfe21e31ea212bb6627393?token= Reviewed-by: John Garry Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shuai Xue Cc: Will Deacon Cc: Xing Zhengjun Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1673940573-90503-5-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/arm/neoverse-n2-v2/metrics.json | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json new file mode 100644 index 000000000000..4e7417f0274f --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json @@ -0,0 +1,17 @@ +[ + { + "ArchStdEvent": "FRONTEND_BOUND", + "MetricExpr": "((stall_slot_frontend) if (#slots - 5) else (stall_slot_frontend - cpu_cycles)) / (#slots * cpu_cycles)" + }, + { + "ArchStdEvent": "BAD_SPECULATION", + "MetricExpr": "(1 - op_retired / op_spec) * (1 - (stall_slot if (#slots - 5) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))" + }, + { + "ArchStdEvent": "RETIRING", + "MetricExpr": "(op_retired / op_spec) * (1 - (stall_slot if (#slots - 5) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))" + }, + { + "ArchStdEvent": "BACKEND_BOUND" + } +] From 6a60dd2e876913be55e17e53ee57e1fe09448238 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 17 Jan 2023 15:29:29 +0800 Subject: [PATCH 006/114] perf vendor events arm64: Add TLB metrics for neoverse-n2-v2 Add TLB related metrics. Reviewed-by: John Garry Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shuai Xue Cc: Will Deacon Cc: Xing Zhengjun Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1673940573-90503-6-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/arm/neoverse-n2-v2/metrics.json | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json index 4e7417f0274f..60bbd8f8f60d 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json @@ -13,5 +13,54 @@ }, { "ArchStdEvent": "BACKEND_BOUND" + }, + { + "MetricExpr": "L1D_TLB_REFILL / L1D_TLB", + "BriefDescription": "The rate of L1D TLB refill to the overall L1D TLB lookups", + "MetricGroup": "TLB", + "MetricName": "l1d_tlb_miss_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "L1I_TLB_REFILL / L1I_TLB", + "BriefDescription": "The rate of L1I TLB refill to the overall L1I TLB lookups", + "MetricGroup": "TLB", + "MetricName": "l1i_tlb_miss_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "L2D_TLB_REFILL / L2D_TLB", + "BriefDescription": "The rate of L2D TLB refill to the overall L2D TLB lookups", + "MetricGroup": "TLB", + "MetricName": "l2_tlb_miss_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "DTLB_WALK / INST_RETIRED * 1000", + "BriefDescription": "The rate of TLB Walks per kilo instructions for data accesses", + "MetricGroup": "TLB", + "MetricName": "dtlb_mpki", + "ScaleUnit": "1MPKI" + }, + { + "MetricExpr": "DTLB_WALK / L1D_TLB", + "BriefDescription": "The rate of DTLB Walks to the overall L1D TLB lookups", + "MetricGroup": "TLB", + "MetricName": "dtlb_walk_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "ITLB_WALK / INST_RETIRED * 1000", + "BriefDescription": "The rate of TLB Walks per kilo instructions for instruction accesses", + "MetricGroup": "TLB", + "MetricName": "itlb_mpki", + "ScaleUnit": "1MPKI" + }, + { + "MetricExpr": "ITLB_WALK / L1I_TLB", + "BriefDescription": "The rate of ITLB Walks to the overall L1I TLB lookups", + "MetricGroup": "TLB", + "MetricName": "itlb_walk_rate", + "ScaleUnit": "100%" } ] From 8556d367a7f9c44860422bca5fb677f07aca3960 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 17 Jan 2023 15:29:30 +0800 Subject: [PATCH 007/114] perf vendor events arm64: Add cache metrics for neoverse-n2-v2 Add cache related metrics. Reviewed-by: John Garry Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shuai Xue Cc: Will Deacon Cc: Xing Zhengjun Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1673940573-90503-7-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/arm/neoverse-n2-v2/metrics.json | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json index 60bbd8f8f60d..08c6aaa7ff09 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json @@ -62,5 +62,82 @@ "MetricGroup": "TLB", "MetricName": "itlb_walk_rate", "ScaleUnit": "100%" + }, + { + "MetricExpr": "L1I_CACHE_REFILL / INST_RETIRED * 1000", + "BriefDescription": "The rate of L1 I-Cache misses per kilo instructions", + "MetricGroup": "Cache", + "MetricName": "l1i_cache_mpki", + "ScaleUnit": "1MPKI" + }, + { + "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE", + "BriefDescription": "The rate of L1 I-Cache misses to the overall L1 I-Cache", + "MetricGroup": "Cache", + "MetricName": "l1i_cache_miss_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "L1D_CACHE_REFILL / INST_RETIRED * 1000", + "BriefDescription": "The rate of L1 D-Cache misses per kilo instructions", + "MetricGroup": "Cache", + "MetricName": "l1d_cache_mpki", + "ScaleUnit": "1MPKI" + }, + { + "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE", + "BriefDescription": "The rate of L1 D-Cache misses to the overall L1 D-Cache", + "MetricGroup": "Cache", + "MetricName": "l1d_cache_miss_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "L2D_CACHE_REFILL / INST_RETIRED * 1000", + "BriefDescription": "The rate of L2 D-Cache misses per kilo instructions", + "MetricGroup": "Cache", + "MetricName": "l2d_cache_mpki", + "ScaleUnit": "1MPKI" + }, + { + "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE", + "BriefDescription": "The rate of L2 D-Cache misses to the overall L2 D-Cache", + "MetricGroup": "Cache", + "MetricName": "l2d_cache_miss_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "L3D_CACHE_REFILL / INST_RETIRED * 1000", + "BriefDescription": "The rate of L3 D-Cache misses per kilo instructions", + "MetricGroup": "Cache", + "MetricName": "l3d_cache_mpki", + "ScaleUnit": "1MPKI" + }, + { + "MetricExpr": "L3D_CACHE_REFILL / L3D_CACHE", + "BriefDescription": "The rate of L3 D-Cache misses to the overall L3 D-Cache", + "MetricGroup": "Cache", + "MetricName": "l3d_cache_miss_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "LL_CACHE_MISS_RD / INST_RETIRED * 1000", + "BriefDescription": "The rate of LL Cache read misses per kilo instructions", + "MetricGroup": "Cache", + "MetricName": "ll_cache_read_mpki", + "ScaleUnit": "1MPKI" + }, + { + "MetricExpr": "LL_CACHE_MISS_RD / LL_CACHE_RD", + "BriefDescription": "The rate of LL Cache read misses to the overall LL Cache read", + "MetricGroup": "Cache", + "MetricName": "ll_cache_read_miss_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "(LL_CACHE_RD - LL_CACHE_MISS_RD) / LL_CACHE_RD", + "BriefDescription": "The rate of LL Cache read hit to the overall LL Cache read", + "MetricGroup": "Cache", + "MetricName": "ll_cache_read_hit_rate", + "ScaleUnit": "100%" } ] From a1adade799bd9348a9bb290b3df2ca5068cd57bc Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 17 Jan 2023 15:29:31 +0800 Subject: [PATCH 008/114] perf vendor events arm64: Add branch metrics for neoverse-n2-v2 Add branch related metrics. Reviewed-by: John Garry Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shuai Xue Cc: Will Deacon Cc: Xing Zhengjun Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1673940573-90503-8-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/arm/neoverse-n2-v2/metrics.json | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json index 08c6aaa7ff09..afcdb174e1c1 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json @@ -139,5 +139,26 @@ "MetricGroup": "Cache", "MetricName": "ll_cache_read_hit_rate", "ScaleUnit": "100%" + }, + { + "MetricExpr": "BR_MIS_PRED_RETIRED / INST_RETIRED * 1000", + "BriefDescription": "The rate of branches mis-predicted per kilo instructions", + "MetricGroup": "Branch", + "MetricName": "branch_mpki", + "ScaleUnit": "1MPKI" + }, + { + "MetricExpr": "BR_RETIRED / INST_RETIRED * 1000", + "BriefDescription": "The rate of branches retired per kilo instructions", + "MetricGroup": "Branch", + "MetricName": "branch_pki", + "ScaleUnit": "1PKI" + }, + { + "MetricExpr": "BR_MIS_PRED_RETIRED / BR_RETIRED", + "BriefDescription": "The rate of branches mis-predited to the overall branches", + "MetricGroup": "Branch", + "MetricName": "branch_miss_pred_rate", + "ScaleUnit": "100%" } ] From 4befa5cf8469790bd73f3795f2b02e3baf26022c Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 17 Jan 2023 15:29:32 +0800 Subject: [PATCH 009/114] perf vendor events arm64: Add PE utilization metrics for neoverse-n2-v2 Add PE utilization related metrics. In cpu_utilization metric, if it is neoverse-n2 which slots are 5, the real stall_slot need to subtract the cpu_cycles according to the neoverse-n2 errata [0]. [0] https://documentation-service.arm.com/static/636a66a64e6cf12278ad89cb?token= Reviewed-by: John Garry Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shuai Xue Cc: Will Deacon Cc: Xing Zhengjun Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1673940573-90503-9-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/arm/neoverse-n2-v2/metrics.json | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json index afcdb174e1c1..3d6ac0ca8474 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json @@ -160,5 +160,51 @@ "MetricGroup": "Branch", "MetricName": "branch_miss_pred_rate", "ScaleUnit": "100%" + }, + { + "MetricExpr": "instructions / CPU_CYCLES", + "BriefDescription": "The average number of instructions executed for each cycle.", + "MetricGroup": "PEutilization", + "MetricName": "ipc" + }, + { + "MetricExpr": "ipc / 5", + "BriefDescription": "IPC percentage of peak. The peak of IPC is 5.", + "MetricGroup": "PEutilization", + "MetricName": "ipc_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "INST_RETIRED / CPU_CYCLES", + "BriefDescription": "Architecturally executed Instructions Per Cycle (IPC)", + "MetricGroup": "PEutilization", + "MetricName": "retired_ipc" + }, + { + "MetricExpr": "INST_SPEC / CPU_CYCLES", + "BriefDescription": "Speculatively executed Instructions Per Cycle (IPC)", + "MetricGroup": "PEutilization", + "MetricName": "spec_ipc" + }, + { + "MetricExpr": "OP_RETIRED / OP_SPEC", + "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)", + "MetricGroup": "PEutilization", + "MetricName": "retired_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "1 - OP_RETIRED / OP_SPEC", + "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)", + "MetricGroup": "PEutilization", + "MetricName": "wasted_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "OP_RETIRED / OP_SPEC * (1 - (STALL_SLOT if (#slots - 5) else (STALL_SLOT - CPU_CYCLES)) / (#slots * CPU_CYCLES))", + "BriefDescription": "The truly effective ratio of micro-operations executed by the CPU, which means that misprediction and stall are not included", + "MetricGroup": "PEutilization", + "MetricName": "cpu_utilization", + "ScaleUnit": "100%" } ] From 485c5bc590899cb640d32e43a471bbc6e134cfc7 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 17 Jan 2023 15:29:33 +0800 Subject: [PATCH 010/114] perf vendor events arm64: Add instruction mix metrics for neoverse-n2-v2 Add instruction mix related metrics. Reviewed-by: John Garry Signed-off-by: Jing Zhang Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shuai Xue Cc: Will Deacon Cc: Xing Zhengjun Cc: Zhuo Song Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1673940573-90503-10-git-send-email-renyu.zj@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arm64/arm/neoverse-n2-v2/metrics.json | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json index 3d6ac0ca8474..8ad15b726dca 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json @@ -206,5 +206,68 @@ "MetricGroup": "PEutilization", "MetricName": "cpu_utilization", "ScaleUnit": "100%" + }, + { + "MetricExpr": "LD_SPEC / INST_SPEC", + "BriefDescription": "The rate of load instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "InstructionMix", + "MetricName": "load_spec_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "ST_SPEC / INST_SPEC", + "BriefDescription": "The rate of store instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "InstructionMix", + "MetricName": "store_spec_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "DP_SPEC / INST_SPEC", + "BriefDescription": "The rate of integer data-processing instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "InstructionMix", + "MetricName": "data_process_spec_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "ASE_SPEC / INST_SPEC", + "BriefDescription": "The rate of advanced SIMD instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "InstructionMix", + "MetricName": "advanced_simd_spec_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "VFP_SPEC / INST_SPEC", + "BriefDescription": "The rate of floating point instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "InstructionMix", + "MetricName": "float_point_spec_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "CRYPTO_SPEC / INST_SPEC", + "BriefDescription": "The rate of crypto instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "InstructionMix", + "MetricName": "crypto_spec_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "BR_IMMED_SPEC / INST_SPEC", + "BriefDescription": "The rate of branch immediate instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "InstructionMix", + "MetricName": "branch_immed_spec_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "BR_RETURN_SPEC / INST_SPEC", + "BriefDescription": "The rate of procedure return instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "InstructionMix", + "MetricName": "branch_return_spec_rate", + "ScaleUnit": "100%" + }, + { + "MetricExpr": "BR_INDIRECT_SPEC / INST_SPEC", + "BriefDescription": "The rate of indirect branch instructions speculatively executed to overall instructions speclatively executed", + "MetricGroup": "InstructionMix", + "MetricName": "branch_indirect_spec_rate", + "ScaleUnit": "100%" } ] From 3524f89edaf0159330a3199a42fb97f1aa1d13d9 Mon Sep 17 00:00:00 2001 From: qinyu Date: Mon, 16 Jan 2023 09:21:43 +0800 Subject: [PATCH 011/114] perf docs: Fix a typo in 'perf probe' man page: l20th -> 120th Fix a minor typo in 'perf probe' doc. Fixes: 631c9def804b2c92 ("perf probe: Support --line option to show probable source-code lines") Signed-off-by: qinyu Acked-by: Ian Rogers Cc: Feilong Lin Cc: Hewenliang Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230116012143.432435-1-qinyu32@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-probe.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 7f8e8ba3a787..5c43a6edc0e5 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -222,7 +222,7 @@ probe syntax, 'SRC' means the source file path, 'ALN' is start line number, and 'ALN2' is end line number in the file. It is also possible to specify how many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good for searching a specific function when several functions share same name. -So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. +So, "source.c:100-120" shows lines between 100th to 120th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function. LAZY MATCHING ------------- From 03953a697bdd0728b2f20309133b8664896ffd4a Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 18 Jan 2023 09:56:32 -0800 Subject: [PATCH 012/114] perf vendor events intel: Add Emerald Rapids The event list of the Emerald Rapids is the same as the Sapphire Rapids. Add the CPU model ID of Emerald Rapids into the mapfile.csv and point it to the event list of Sapphire Rapids. Signed-off-by: Kan Liang Acked-by: Ian Rogers Cc: Andi Kleen Cc: Artem Bityutskiy Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230118175632.3165217-1-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 711a4ef05fdf..5facdac6fe8e 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -21,7 +21,7 @@ GenuineIntel-6-A[AC],v1.00,meteorlake,core GenuineIntel-6-1[AEF],v3,nehalemep,core GenuineIntel-6-2E,v3,nehalemex,core GenuineIntel-6-2A,v17,sandybridge,core -GenuineIntel-6-8F,v1.09,sapphirerapids,core +GenuineIntel-6-(8F|CF),v1.09,sapphirerapids,core GenuineIntel-6-(37|4A|4C|4D|5A),v14,silvermont,core GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v53,skylake,core GenuineIntel-6-55-[01234],v1.28,skylakex,core From 9f19aab47ced012eddef1e2bc96007efc7713b61 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 5 Jan 2023 00:26:09 -0800 Subject: [PATCH 013/114] perf llvm: Fix inadvertent file creation The LLVM template is first echo-ed into command_out and then command_out executed. The echo surrounds the template with double quotes, however, the template itself may contain quotes. This is generally innocuous but in tools/perf/tests/bpf-script-test-prologue.c we see: ... SEC("func=null_lseek file->f_mode offset orig") ... where the first double quote ends the double quote of the echo, then the > redirects output into a file called f_mode. To avoid this inadvertent behavior substitute redirects and similar characters to be ASCII control codes, then substitute the output in the echo back again. Fixes: 5eab5a7ee032acaa ("perf llvm: Display eBPF compiling command in debug output") Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Andrii Nakryiko Cc: bpf@vger.kernel.org Cc: Ingo Molnar Cc: Jiri Olsa Cc: llvm@lists.linux.dev Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Tom Rix Link: https://lore.kernel.org/r/20230105082609.344538-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/llvm-utils.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 650ffe336f3a..4e8e243a6e4b 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -531,14 +531,37 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, pr_debug("llvm compiling command template: %s\n", template); + /* + * Below, substitute control characters for values that can cause the + * echo to misbehave, then substitute the values back. + */ err = -ENOMEM; - if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0) + if (asprintf(&command_echo, "echo -n \a%s\a", template) < 0) goto errout; +#define SWAP_CHAR(a, b) do { if (*p == a) *p = b; } while (0) + for (char *p = command_echo; *p; p++) { + SWAP_CHAR('<', '\001'); + SWAP_CHAR('>', '\002'); + SWAP_CHAR('"', '\003'); + SWAP_CHAR('\'', '\004'); + SWAP_CHAR('|', '\005'); + SWAP_CHAR('&', '\006'); + SWAP_CHAR('\a', '"'); + } err = read_from_pipe(command_echo, (void **) &command_out, NULL); if (err) goto errout; + for (char *p = command_out; *p; p++) { + SWAP_CHAR('\001', '<'); + SWAP_CHAR('\002', '>'); + SWAP_CHAR('\003', '"'); + SWAP_CHAR('\004', '\''); + SWAP_CHAR('\005', '|'); + SWAP_CHAR('\006', '&'); + } +#undef SWAP_CHAR pr_debug("llvm compiling command : %s\n", command_out); err = read_from_pipe(template, &obj_buf, &obj_buf_sz); From 1784eeaeb3de49a10dcae23e3882d879c5d342ba Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 10 Jan 2023 23:06:39 -0800 Subject: [PATCH 014/114] perf tools: Remove HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE Switch HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE to be a version number test on libtraceevent being >= to version 1.5.0. This also corrects a greater-than test to be greater-than-or-equal. Fixes: b9a49f8cb02f0859 ("perf tools: Check if libtracevent has TEP_FIELD_IS_RELATIVE") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Jajeev Cc: Eelco Chaudron Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Sean Christopherson Cc: Stephane Eranian Cc: Yang Jihong Link: https://lore.kernel.org/lkml/20221205225940.3079667-3-irogers@google.com/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 4 ---- tools/perf/builtin-trace.c | 2 +- tools/perf/util/data-convert-bt.c | 2 +- tools/perf/util/evsel.c | 2 +- tools/perf/util/python.c | 2 +- tools/perf/util/scripting-engines/trace-event-perl.c | 2 +- tools/perf/util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/sort.c | 3 ++- tools/perf/util/trace-event.h | 3 +++ 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 5b8784675903..3519a0139026 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1208,10 +1208,6 @@ ifneq ($(NO_LIBTRACEEVENT),1) LIBTRACEEVENT_VERSION_CPP := $(shell expr $(LIBTRACEEVENT_VERSION_1) \* 255 \* 255 + $(LIBTRACEEVENT_VERSION_2) \* 255 + $(LIBTRACEEVENT_VERSION_3)) CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP) $(call detected,CONFIG_LIBTRACEEVENT) - LIBTRACEEVENT_VERSION_WITH_TEP_FIELD_IS_RELATIVE := $(shell expr 1 \* 255 \* 255 + 5 \* 255 + 0) # 1.5.0 - ifeq ($(shell test $(LIBTRACEEVENT_VERSION_CPP) -gt $(LIBTRACEEVENT_VERSION_WITH_TEP_FIELD_IS_RELATIVE); echo $$?),0) - CFLAGS += -DHAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE - endif else dummy := $(warning Warning: libtraceevent is missing limiting functionality, please install libtraceevent-dev/libtraceevent-devel) endif diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d21fe0f32a6d..46944eaba2e5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2731,7 +2731,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, offset = format_field__intval(field, sample, evsel->needs_swap); syscall_arg.len = offset >> 16; offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE +#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) if (field->flags & TEP_FIELD_IS_RELATIVE) offset += field->offset + field->size; #endif diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index b842273458b8..98454f7a820c 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -322,7 +322,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, offset = tmp_val; len = offset >> 16; offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE +#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) if (flags & TEP_FIELD_IS_RELATIVE) offset += fmtf->offset + fmtf->size; #endif diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 999dd1700502..296292fa2c04 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2784,7 +2784,7 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(sample->raw_data + field->offset); offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE +#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) if (field->flags & TEP_FIELD_IS_RELATIVE) offset += field->offset + field->size; #endif diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 212031b97910..25a276710dfb 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -442,7 +442,7 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) offset = val; len = offset >> 16; offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE +#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) if (field->flags & TEP_FIELD_IS_RELATIVE) offset += field->offset + field->size; #endif diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index c097b7934fd4..5bcec514f697 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -393,7 +393,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(data + field->offset); offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE +#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) if (field->flags & TEP_FIELD_IS_RELATIVE) offset += field->offset + field->size; #endif diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index e930f5f1f36d..759ed6eafa3c 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -994,7 +994,7 @@ static void python_process_tracepoint(struct perf_sample *sample, offset = val; len = offset >> 16; offset &= 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE +#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) if (field->flags & TEP_FIELD_IS_RELATIVE) offset += field->offset + field->size; #endif diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 37662cdec5ee..3673912d9e6b 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -28,6 +28,7 @@ #include "time-utils.h" #include "cgroup.h" #include "machine.h" +#include "trace-event.h" #include #include @@ -2667,7 +2668,7 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, tep_read_number_field(field, a->raw_data, &dyn); offset = dyn & 0xffff; size = (dyn >> 16) & 0xffff; -#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE +#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) if (field->flags & TEP_FIELD_IS_RELATIVE) offset += field->offset + field->size; #endif diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 9b3cd79cca12..2e1b8641591c 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -21,6 +21,9 @@ struct trace_event { struct tep_plugin_list *plugin_list; }; +/* Computes a version number comparable with LIBTRACEEVENT_VERSION from Makefile.config. */ +#define MAKE_LIBTRACEEVENT_VERSION(a, b, c) ((a)*255*255+(b)*255+(c)) + typedef char *(tep_func_resolver_t)(void *priv, unsigned long long *addrp, char **modp); From 1634bad32074e00e0ec29e0aef53210ed20f0ec5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 10 Jan 2023 23:06:40 -0800 Subject: [PATCH 015/114] perf trace: Reduce #ifdefs for TEP_FIELD_IS_RELATIVE Add a helper function that applies the mask to test, or returns false if libtraceevent is too old or not present. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Jajeev Cc: Eelco Chaudron Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Sean Christopherson Cc: Stephane Eranian Cc: Yang Jihong Link: https://lore.kernel.org/r/20230111070641.1728726-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 +--- tools/perf/util/data-convert-bt.c | 4 +--- tools/perf/util/evsel.c | 4 +--- tools/perf/util/python.c | 4 +--- .../util/scripting-engines/trace-event-perl.c | 4 +--- .../util/scripting-engines/trace-event-python.c | 4 +--- tools/perf/util/sort.c | 4 +--- tools/perf/util/trace-event.h | 16 ++++++++++++++++ 8 files changed, 23 insertions(+), 21 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 46944eaba2e5..610fb60b1c0d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2731,10 +2731,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, offset = format_field__intval(field, sample, evsel->needs_swap); syscall_arg.len = offset >> 16; offset &= 0xffff; -#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif } val = (uintptr_t)(sample->raw_data + offset); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 98454f7a820c..2b732bccabad 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -322,10 +322,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, offset = tmp_val; len = offset >> 16; offset &= 0xffff; -#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) - if (flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(flags)) offset += fmtf->offset + fmtf->size; -#endif } if (flags & TEP_FIELD_IS_ARRAY) { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 296292fa2c04..8550638587e5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2784,10 +2784,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(sample->raw_data + field->offset); offset &= 0xffff; -#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif } return sample->raw_data + offset; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 25a276710dfb..d948455e5ed4 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -442,10 +442,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) offset = val; len = offset >> 16; offset &= 0xffff; -#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 5bcec514f697..83fd2fd0ba16 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -393,10 +393,8 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(data + field->offset); offset &= 0xffff; -#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif } else offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 759ed6eafa3c..2c2697c5d025 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -994,10 +994,8 @@ static void python_process_tracepoint(struct perf_sample *sample, offset = val; len = offset >> 16; offset &= 0xffff; -#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 3673912d9e6b..d7d0f997873a 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2668,10 +2668,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, tep_read_number_field(field, a->raw_data, &dyn); offset = dyn & 0xffff; size = (dyn >> 16) & 0xffff; -#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) - if (field->flags & TEP_FIELD_IS_RELATIVE) + if (tep_field_is_relative(field->flags)) offset += field->offset + field->size; -#endif /* record max width for output */ if (size > hde->dynamic_len) hde->dynamic_len = size; diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 2e1b8641591c..a0cff184b1cd 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -140,4 +140,20 @@ int common_lock_depth(struct scripting_context *context); #define SAMPLE_FLAGS_BUF_SIZE 64 int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz); +#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) +#include + +static inline bool tep_field_is_relative(unsigned long flags) +{ + return (flags & TEP_FIELD_IS_RELATIVE) != 0; +} +#else +#include + +static inline bool tep_field_is_relative(unsigned long flags __maybe_unused) +{ + return false; +} +#endif + #endif /* _PERF_UTIL_TRACE_EVENT_H */ From 316769f75718f16e4e7d6a55d39053fe8a1d8b1c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 10 Jan 2023 23:06:41 -0800 Subject: [PATCH 016/114] perf debug: Increase libtraceevent logging when verbose libtraceevent has added more levels of debug printout and with changes like: https://lore.kernel.org/linux-trace-devel/20210507095022.1079364-3-tz.stoyanov@gmail.com previously generated output like "registering plugin" is no longer displayed. This change makes it so that if perf's verbose debug output is enabled then the debug and info libtraceevent messages can be displayed. This change was previously posted: https://lore.kernel.org/linux-perf-users/20210923001024.550263-4-irogers@google.com/ and reverted: https://lore.kernel.org/linux-perf-users/20220109153446.160593-1-acme@kernel.org/ The previous failure was due to -Itools/lib being on the include path and libtraceevent in tools/lib being version 1.1.0. This meant that when LIBTRACEEVENT_VERSION was 1.3.0 the #if succeeded, but the header file for libtraceevent (taken from tools/lib rather than the intended /usr/include) was for version 1.1.0 and function definitions were missing. Since the previous issue the -Itools/lib include path has been removed: https://lore.kernel.org/lkml/20221109184914.1357295-1-irogers@google.com/ As well as libtraceevent 1.1.0 has been removed from tools/lib: https://lore.kernel.org/lkml/20221130062935.2219247-1-irogers@google.com/ Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Athira Jajeev Cc: Eelco Chaudron Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Sean Christopherson Cc: Stephane Eranian Cc: Yang Jihong Link: https://lore.kernel.org/r/20230111070641.1728726-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 190e818a0717..88378c4c5dd9 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -19,12 +19,19 @@ #include "debug.h" #include "print_binary.h" #include "target.h" +#include "trace-event.h" #include "ui/helpline.h" #include "ui/ui.h" #include "util/parse-sublevel-options.h" #include +#ifdef HAVE_LIBTRACEEVENT +#include +#else +#define LIBTRACEEVENT_VERSION 0 +#endif + int verbose; int debug_peo_args; bool dump_trace = false, quiet = false; @@ -228,6 +235,14 @@ int perf_debug_option(const char *str) /* Allow only verbose value in range (0, 10), otherwise set 0. */ verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose; +#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 3, 0) + if (verbose == 1) + tep_set_loglevel(TEP_LOG_INFO); + else if (verbose == 2) + tep_set_loglevel(TEP_LOG_DEBUG); + else if (verbose >= 3) + tep_set_loglevel(TEP_LOG_ALL); +#endif return 0; } From 1962ab6f6e0b39e4216206205bda14aff87705f3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 14 Jan 2023 13:52:51 -0800 Subject: [PATCH 017/114] perf test workload thloop: Make count increments atomic The count variable is incremented by multiple threads, doing so without an atomic operation causes thread sanitizer warnings. Switch to using relaxed atomics. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230114215251.271678-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/workloads/thloop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/workloads/thloop.c b/tools/perf/tests/workloads/thloop.c index 29193b75717e..af05269c2eb8 100644 --- a/tools/perf/tests/workloads/thloop.c +++ b/tools/perf/tests/workloads/thloop.c @@ -20,7 +20,7 @@ static void sighandler(int sig __maybe_unused) noinline void test_loop(void) { while (!done) - count++; + __atomic_fetch_add(&count, 1, __ATOMIC_RELAXED); } static void *thfunc(void *arg) From 1b69346e7a0bd18a9c3b00bdd6219d4d8ad73466 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Jan 2023 14:34:47 +0200 Subject: [PATCH 018/114] perf test: Add Symbols test Add a test to check function symbols do not overlap and are not zero length. The main motivation for the test is to make it easier to review changes to PLT symbol synthesis i.e. changes to dso__synthesize_plt_symbols(). By default the test uses the perf executable as a test DSO, but a specific DSO can be specified via a new perf test option "--dso". The test is useful in the following ways: - Any DSO can be tested, even ones that do not run on the current architecture. For example, using cross-compiled DSOs to see how well perf handles different architectures. - With verbose > 1 (e.g. -vv), all the symbols are printed, which makes it easier to see issues. - perf removes duplicate symbols and expands zero-length symbols to reach the next symbol, however that is done before adding synthesized symbols, so the test is checking those also. Example: $ perf test -v Symbols 74: Symbols : --- start --- test child forked, pid 154918 Testing /home/user/bin/perf Overlapping symbols: 7d000-7f3a0 g _init 7d030-7d040 g __printf_chk@plt test child finished with -1 ---- end ---- Symbols: FAILED! Note the test fails because perf expands the _init symbol over the PLT because there are no PLT symbols at that point, but then dso__synthesize_plt_symbols() creates them. Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230120123456.12449-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-test.txt | 3 + tools/perf/tests/Build | 1 + tools/perf/tests/builtin-test.c | 3 + tools/perf/tests/symbols.c | 150 +++++++++++++++++++++++++ tools/perf/tests/tests.h | 3 + 5 files changed, 160 insertions(+) create mode 100644 tools/perf/tests/symbols.c diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt index b329c65d7f40..951a2f262872 100644 --- a/tools/perf/Documentation/perf-test.txt +++ b/tools/perf/Documentation/perf-test.txt @@ -34,3 +34,6 @@ OPTIONS -F:: --dont-fork:: Do not fork child for each test, run all tests within single process. + +--dso:: + Specify a DSO for the "Symbols" test. diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 90fd1eb317bb..fb9ac5dc4079 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -68,6 +68,7 @@ perf-y += perf-time-to-tsc.o perf-y += dlfilter-test.o perf-y += sigtrap.o perf-y += event_groups.o +perf-y += symbols.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index cfa61493c750..35cc3807cc9e 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -31,6 +31,7 @@ #include "builtin-test-list.h" static bool dont_fork; +const char *dso_to_test; struct test_suite *__weak arch_tests[] = { NULL, @@ -117,6 +118,7 @@ static struct test_suite *generic_tests[] = { &suite__dlfilter, &suite__sigtrap, &suite__event_groups, + &suite__symbols, NULL, }; @@ -521,6 +523,7 @@ int cmd_test(int argc, const char **argv) OPT_BOOLEAN('F', "dont-fork", &dont_fork, "Do not fork for testcase"), OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), + OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"), OPT_END() }; const char * const test_subcommands[] = { "list", NULL }; diff --git a/tools/perf/tests/symbols.c b/tools/perf/tests/symbols.c new file mode 100644 index 000000000000..057b16df6416 --- /dev/null +++ b/tools/perf/tests/symbols.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "debug.h" +#include "dso.h" +#include "machine.h" +#include "thread.h" +#include "symbol.h" +#include "map.h" +#include "util.h" +#include "tests.h" + +struct test_info { + struct machine *machine; + struct thread *thread; +}; + +static int init_test_info(struct test_info *ti) +{ + ti->machine = machine__new_host(); + if (!ti->machine) { + pr_debug("machine__new_host() failed!\n"); + return TEST_FAIL; + } + + /* Create a dummy thread */ + ti->thread = machine__findnew_thread(ti->machine, 100, 100); + if (!ti->thread) { + pr_debug("machine__findnew_thread() failed!\n"); + return TEST_FAIL; + } + + return TEST_OK; +} + +static void exit_test_info(struct test_info *ti) +{ + thread__put(ti->thread); + machine__delete(ti->machine); +} + +static void get_test_dso_filename(char *filename, size_t max_sz) +{ + if (dso_to_test) + strlcpy(filename, dso_to_test, max_sz); + else + perf_exe(filename, max_sz); +} + +static int create_map(struct test_info *ti, char *filename, struct map **map_p) +{ + /* Create a dummy map at 0x100000 */ + *map_p = map__new(ti->machine, 0x100000, 0xffffffff, 0, NULL, + PROT_EXEC, 0, NULL, filename, ti->thread); + if (!*map_p) { + pr_debug("Failed to create map!"); + return TEST_FAIL; + } + + return TEST_OK; +} + +static int test_dso(struct dso *dso) +{ + struct symbol *last_sym = NULL; + struct rb_node *nd; + int ret = TEST_OK; + + /* dso__fprintf() prints all the symbols */ + if (verbose > 1) + dso__fprintf(dso, stderr); + + for (nd = rb_first_cached(&dso->symbols); nd; nd = rb_next(nd)) { + struct symbol *sym = rb_entry(nd, struct symbol, rb_node); + + if (sym->type != STT_FUNC && sym->type != STT_GNU_IFUNC) + continue; + + /* Check for overlapping function symbols */ + if (last_sym && sym->start < last_sym->end) { + pr_debug("Overlapping symbols:\n"); + symbol__fprintf(last_sym, stderr); + symbol__fprintf(sym, stderr); + ret = TEST_FAIL; + } + /* Check for zero-length function symbol */ + if (sym->start == sym->end) { + pr_debug("Zero-length symbol:\n"); + symbol__fprintf(sym, stderr); + ret = TEST_FAIL; + } + last_sym = sym; + } + + return ret; +} + +static int test_file(struct test_info *ti, char *filename) +{ + struct map *map = NULL; + int ret, nr; + + pr_debug("Testing %s\n", filename); + + ret = create_map(ti, filename, &map); + if (ret != TEST_OK) + return ret; + + nr = dso__load(map->dso, map); + if (nr < 0) { + pr_debug("dso__load() failed!\n"); + ret = TEST_FAIL; + goto out_put; + } + + if (nr == 0) { + pr_debug("DSO has no symbols!\n"); + ret = TEST_SKIP; + goto out_put; + } + + ret = test_dso(map->dso); +out_put: + map__put(map); + + return ret; +} + +static int test__symbols(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +{ + char filename[PATH_MAX]; + struct test_info ti; + int ret; + + ret = init_test_info(&ti); + if (ret != TEST_OK) + return ret; + + get_test_dso_filename(filename, sizeof(filename)); + + ret = test_file(&ti, filename); + + exit_test_info(&ti); + + return ret; +} + +DEFINE_SUITE("Symbols", symbols); diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index fb4b5ad4dd0f..9a0f3904e53d 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -148,6 +148,7 @@ DECLARE_SUITE(perf_time_to_tsc); DECLARE_SUITE(dlfilter); DECLARE_SUITE(sigtrap); DECLARE_SUITE(event_groups); +DECLARE_SUITE(symbols); /* * PowerPC and S390 do not support creation of instruction breakpoints using the @@ -208,4 +209,6 @@ DECLARE_WORKLOAD(sqrtloop); DECLARE_WORKLOAD(brstack); DECLARE_WORKLOAD(datasym); +extern const char *dso_to_test; + #endif /* TESTS_H */ From c2d066c090c9fec5de99ae051e97e0f448cbc229 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Jan 2023 14:34:48 +0200 Subject: [PATCH 019/114] perf symbols: Factor out get_plt_sizes() Factor out get_plt_sizes() to make the code more readable and further changes to dso__synthesize_plt_symbols() easier to follow. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230120123456.12449-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 54 +++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 96767d1b3f1c..4605680a22a3 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -323,6 +323,33 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) return demangled; } +static void get_plt_sizes(GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, + u64 *plt_header_size, u64 *plt_entry_size) +{ + switch (ehdr->e_machine) { + case EM_ARM: + *plt_header_size = 20; + *plt_entry_size = 12; + return; + case EM_AARCH64: + *plt_header_size = 32; + *plt_entry_size = 16; + return; + case EM_SPARC: + *plt_header_size = 48; + *plt_entry_size = 12; + return; + case EM_SPARCV9: + *plt_header_size = 128; + *plt_entry_size = 32; + return; + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ + *plt_header_size = shdr_plt->sh_entsize; + *plt_entry_size = shdr_plt->sh_entsize; + return; + } +} + #define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ idx < nr_entries; \ @@ -411,32 +438,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; plt_offset = shdr_plt.sh_offset; - switch (ehdr.e_machine) { - case EM_ARM: - plt_header_size = 20; - plt_entry_size = 12; - break; - - case EM_AARCH64: - plt_header_size = 32; - plt_entry_size = 16; - break; - - case EM_SPARC: - plt_header_size = 48; - plt_entry_size = 12; - break; - - case EM_SPARCV9: - plt_header_size = 128; - plt_entry_size = 32; - break; - - default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ - plt_header_size = shdr_plt.sh_entsize; - plt_entry_size = shdr_plt.sh_entsize; - break; - } + get_plt_sizes(&ehdr, &shdr_plt, &plt_header_size, &plt_entry_size); plt_offset += plt_header_size; if (shdr_rel_plt.sh_type == SHT_RELA) { From b08b20c3098832a4ffc22222ab742179b1f8514b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Jan 2023 14:34:49 +0200 Subject: [PATCH 020/114] perf symbols: Check plt_entry_size is not zero The code expects non-zero plt_entry_size. Check it and add a debug message to print if it is zero. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230120123456.12449-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 4605680a22a3..c6a4e6c73990 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -323,30 +323,33 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) return demangled; } -static void get_plt_sizes(GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, +static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, u64 *plt_header_size, u64 *plt_entry_size) { switch (ehdr->e_machine) { case EM_ARM: *plt_header_size = 20; *plt_entry_size = 12; - return; + return true; case EM_AARCH64: *plt_header_size = 32; *plt_entry_size = 16; - return; + return true; case EM_SPARC: *plt_header_size = 48; *plt_entry_size = 12; - return; + return true; case EM_SPARCV9: *plt_header_size = 128; *plt_entry_size = 32; - return; + return true; default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ *plt_header_size = shdr_plt->sh_entsize; *plt_entry_size = shdr_plt->sh_entsize; - return; + if (*plt_entry_size) + return true; + pr_debug("Missing PLT entry size for %s\n", dso->long_name); + return false; } } @@ -438,7 +441,8 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; plt_offset = shdr_plt.sh_offset; - get_plt_sizes(&ehdr, &shdr_plt, &plt_header_size, &plt_entry_size); + if (!get_plt_sizes(dso, &ehdr, &shdr_plt, &plt_header_size, &plt_entry_size)) + return 0; plt_offset += plt_header_size; if (shdr_rel_plt.sh_type == SHT_RELA) { From a2db72c5dac4c9bc1e577a82602639619264409f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Jan 2023 14:34:50 +0200 Subject: [PATCH 021/114] perf symbols: Add dso__find_symbol_nocache() Symbols should not be cached when there are more symbols still to add. Add dso__find_symbol_nocache() to facilitate that. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230120123456.12449-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 5 +++++ tools/perf/util/symbol.h | 1 + 2 files changed, 6 insertions(+) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a3a165ae933a..a024f06f75d8 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -556,6 +556,11 @@ struct symbol *dso__find_symbol(struct dso *dso, u64 addr) return dso->last_find_result.symbol; } +struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr) +{ + return symbols__find(&dso->symbols, addr); +} + struct symbol *dso__first_symbol(struct dso *dso) { return symbols__first(&dso->symbols); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f735108c4d4e..2fdeb22bd02f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -148,6 +148,7 @@ void dso__delete_symbol(struct dso *dso, struct symbol *sym); struct symbol *dso__find_symbol(struct dso *dso, u64 addr); +struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name); struct symbol *symbol__next_by_name(struct symbol *sym); From 477d5e35b42b6ca6e1db229977c1ca33456babd7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Jan 2023 14:34:51 +0200 Subject: [PATCH 022/114] perf symbols: Slightly simplify 'err' usage in dso__synthesize_plt_symbols() Return zero directly instead of needless 'goto out_elf_end' that does the same thing. That allows 'err' to be initialized to -1 instead of having to change its value later. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230120123456.12449-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index c6a4e6c73990..990a2c6037bb 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -384,7 +384,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) GElf_Ehdr ehdr; char sympltname[1024]; Elf *elf; - int nr = 0, symidx, err = 0; + int nr = 0, symidx, err = -1; if (!ss->dynsym) return 0; @@ -397,7 +397,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) dynsym_idx = ss->dynsym_idx; if (scn_dynsym == NULL) - goto out_elf_end; + return 0; scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, ".rela.plt", NULL); @@ -405,11 +405,9 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, ".rel.plt", NULL); if (scn_plt_rel == NULL) - goto out_elf_end; + return 0; } - err = -1; - if (shdr_rel_plt.sh_link != dynsym_idx) goto out_elf_end; From 5fec9b171cd80616e52e463ae5ae155483078004 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Jan 2023 14:34:52 +0200 Subject: [PATCH 023/114] perf symbols: Do not check ss->dynsym twice ss->dynsym is checked to be not NULL twice. Remove the first check because, in fact, there can be a plt with no dynsym, which is something that will be dealt with later. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230120123456.12449-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 990a2c6037bb..87b82507c205 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -386,9 +386,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) Elf *elf; int nr = 0, symidx, err = -1; - if (!ss->dynsym) - return 0; - elf = ss->elf; ehdr = ss->ehdr; From 698a0d1a1a6c095477b07b9fc2e8713ee642ed53 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Jan 2023 14:34:53 +0200 Subject: [PATCH 024/114] perf symbols: Add symbol for .plt header perf expands the _init symbol over .plt because there are no PLT symbols at that point, but then dso__synthesize_plt_symbols() creates them. Fix by truncating the previous symbol and inserting a symbol for .plt header. Example: Before: $ perf test --dso `which uname` -v Symbols 74: Symbols : --- start --- test child forked, pid 191028 Problems creating module maps, continuing anyway... Testing /usr/bin/uname Overlapping symbols: 2000-25f0 g _init 2040-2050 g free@plt test child finished with -1 ---- end ---- Symbols: FAILED! $ perf test --dso `which uname` -vv Symbols 2>/tmp/cmp1.txt After: $ perf test --dso `which uname` -v Symbols 74: Symbols : --- start --- test child forked, pid 194291 Testing /usr/bin/uname test child finished with 0 ---- end ---- Symbols: Ok $ perf test --dso `which uname` -vv Symbols 2>/tmp/cmp2.txt $ diff /tmp/cmp1.txt /tmp/cmp2.txt 4,5c4 < test child forked, pid 191031 < Problems creating module maps, continuing anyway... --- > test child forked, pid 194296 9c8,9 < 2000-25f0 g _init --- > 2000-2030 g _init > 2030-2040 g .plt 100,103c100 < Overlapping symbols: < 2000-25f0 g _init < 2040-2050 g free@plt < test child finished with -1 --- > test child finished with 0 105c102 < Symbols: FAILED! --- > Symbols: Ok $ Signed-off-by: Adrian Hunter Tested-by: Arnaldo Carvalho de Melo Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230120123456.12449-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 87b82507c205..a8b7c3860b2d 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -389,6 +389,27 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) elf = ss->elf; ehdr = ss->ehdr; + if (!elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL)) + return 0; + + /* + * A symbol from a previous section (e.g. .init) can have been expanded + * by symbols__fixup_end() to overlap .plt. Truncate it before adding + * a symbol for .plt header. + */ + f = dso__find_symbol_nocache(dso, shdr_plt.sh_offset); + if (f && f->start < shdr_plt.sh_offset && f->end > shdr_plt.sh_offset) + f->end = shdr_plt.sh_offset; + + if (!get_plt_sizes(dso, &ehdr, &shdr_plt, &plt_header_size, &plt_entry_size)) + return 0; + + /* Add a symbol for .plt header */ + f = symbol__new(shdr_plt.sh_offset, plt_header_size, STB_GLOBAL, STT_FUNC, ".plt"); + if (!f) + goto out_elf_end; + symbols__insert(&dso->symbols, f); + scn_dynsym = ss->dynsym; shdr_dynsym = ss->dynshdr; dynsym_idx = ss->dynsym_idx; @@ -408,9 +429,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) if (shdr_rel_plt.sh_link != dynsym_idx) goto out_elf_end; - if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL) - goto out_elf_end; - /* * Fetch the relocation section to find the idxes to the GOT * and the symbols in the .dynsym they refer to. @@ -436,8 +454,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; plt_offset = shdr_plt.sh_offset; - if (!get_plt_sizes(dso, &ehdr, &shdr_plt, &plt_header_size, &plt_entry_size)) - return 0; plt_offset += plt_header_size; if (shdr_rel_plt.sh_type == SHT_RELA) { From 45204677d427b7d0ed11930bd5be4a42893d1c93 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Jan 2023 14:34:54 +0200 Subject: [PATCH 025/114] perf symbols: Allow for .plt entries with no symbol Create a sensible name for .plt entries with no symbol. Example: Before: $ perf test --dso /usr/lib/x86_64-linux-gnu/libc.so.6 -vv Symbols 2>/tmp/cmp1.txt After: $ perf test --dso /usr/lib/x86_64-linux-gnu/libc.so.6 -vv Symbols 2>/tmp/cmp2.txt $ diff /tmp/cmp1.txt /tmp/cmp2.txt 4c4 < test child forked, pid 53043 --- > test child forked, pid 54372 23,62c23,62 < 280f0-28100 g @plt < 28100-28110 g @plt < 28110-28120 g @plt < 28120-28130 g @plt < 28130-28140 g @plt < 28140-28150 g @plt < 28150-28160 g @plt < 28160-28170 g @plt < 28170-28180 g @plt < 28180-28190 g @plt < 28190-281a0 g @plt < 281a0-281b0 g @plt < 281b0-281c0 g @plt < 281c0-281d0 g @plt < 281d0-281e0 g @plt < 281e0-281f0 g @plt < 281f0-28200 g @plt < 28200-28210 g @plt < 28210-28220 g @plt < 28220-28230 g @plt < 28230-28240 g @plt < 28240-28250 g @plt < 28250-28260 g @plt < 28260-28270 g @plt < 28270-28280 g @plt < 28280-28290 g @plt < 28290-282a0 g @plt < 282a0-282b0 g @plt < 282b0-282c0 g @plt < 282c0-282d0 g @plt < 282d0-282e0 g @plt < 282e0-282f0 g @plt < 282f0-28300 g @plt < 28300-28310 g @plt < 28310-28320 g @plt < 28320-28330 g @plt < 28330-28340 g @plt < 28340-28350 g @plt < 28350-28360 g @plt < 28360-28370 g @plt --- > 280f0-28100 g offset_0x280f0@plt > 28100-28110 g offset_0x28100@plt > 28110-28120 g offset_0x28110@plt > 28120-28130 g offset_0x28120@plt > 28130-28140 g offset_0x28130@plt > 28140-28150 g offset_0x28140@plt > 28150-28160 g offset_0x28150@plt > 28160-28170 g offset_0x28160@plt > 28170-28180 g offset_0x28170@plt > 28180-28190 g offset_0x28180@plt > 28190-281a0 g offset_0x28190@plt > 281a0-281b0 g offset_0x281a0@plt > 281b0-281c0 g offset_0x281b0@plt > 281c0-281d0 g offset_0x281c0@plt > 281d0-281e0 g offset_0x281d0@plt > 281e0-281f0 g offset_0x281e0@plt > 281f0-28200 g offset_0x281f0@plt > 28200-28210 g offset_0x28200@plt > 28210-28220 g offset_0x28210@plt > 28220-28230 g offset_0x28220@plt > 28230-28240 g offset_0x28230@plt > 28240-28250 g offset_0x28240@plt > 28250-28260 g offset_0x28250@plt > 28260-28270 g offset_0x28260@plt > 28270-28280 g offset_0x28270@plt > 28280-28290 g offset_0x28280@plt > 28290-282a0 g offset_0x28290@plt > 282a0-282b0 g offset_0x282a0@plt > 282b0-282c0 g offset_0x282b0@plt > 282c0-282d0 g offset_0x282c0@plt > 282d0-282e0 g offset_0x282d0@plt > 282e0-282f0 g offset_0x282e0@plt > 282f0-28300 g offset_0x282f0@plt > 28300-28310 g offset_0x28300@plt > 28310-28320 g offset_0x28310@plt > 28320-28330 g offset_0x28320@plt > 28330-28340 g offset_0x28330@plt > 28340-28350 g offset_0x28340@plt > 28350-28360 g offset_0x28350@plt > 28360-28370 g offset_0x28360@plt Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230120123456.12449-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a8b7c3860b2d..6e4a22acefba 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -470,8 +470,11 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) demangled = demangle_sym(dso, 0, elf_name); if (demangled != NULL) elf_name = demangled; - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_name); + if (*elf_name) + snprintf(sympltname, sizeof(sympltname), "%s@plt", elf_name); + else + snprintf(sympltname, sizeof(sympltname), + "offset_%#" PRIx64 "@plt", plt_offset); free(demangled); f = symbol__new(plt_offset, plt_entry_size, @@ -496,8 +499,11 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) demangled = demangle_sym(dso, 0, elf_name); if (demangled != NULL) elf_name = demangled; - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_name); + if (*elf_name) + snprintf(sympltname, sizeof(sympltname), "%s@plt", elf_name); + else + snprintf(sympltname, sizeof(sympltname), + "offset_%#" PRIx64 "@plt", plt_offset); free(demangled); f = symbol__new(plt_offset, plt_entry_size, From 375a44818429a4df3fc18a0ac0ecfad9953f1ac8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Jan 2023 14:34:55 +0200 Subject: [PATCH 026/114] perf symbols: Combine handling for SHT_RELA and SHT_REL SHT_REL and SHT_RELA are handled the same way. Simplify by combining the handling. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230120123456.12449-10-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 75 +++++++++++++----------------------- 1 file changed, 27 insertions(+), 48 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6e4a22acefba..e274f646ac32 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -323,6 +323,23 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) return demangled; } +struct rel_info { + bool is_rela; + Elf_Data *reldata; + GElf_Rela rela; + GElf_Rel rel; +}; + +static u32 get_rel_symidx(struct rel_info *ri, u32 idx) +{ + if (ri->is_rela) { + gelf_getrela(ri->reldata, idx, &ri->rela); + return GELF_R_SYM(ri->rela.r_info); + } + gelf_getrel(ri->reldata, idx, &ri->rel); + return GELF_R_SYM(ri->rel.r_info); +} + static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, u64 *plt_header_size, u64 *plt_entry_size) { @@ -353,16 +370,6 @@ static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, } } -#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrel(reldata, idx, &pos_mem)) - -#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) - /* * We need to check if we have a .dynsym, so that we can handle the * .plt, synthesizing its symbols, that aren't on the symtabs (be it @@ -378,13 +385,14 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) GElf_Shdr shdr_plt; struct symbol *f; GElf_Shdr shdr_rel_plt, shdr_dynsym; - Elf_Data *reldata, *syms, *symstrs; + Elf_Data *syms, *symstrs; Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; size_t dynsym_idx; GElf_Ehdr ehdr; char sympltname[1024]; Elf *elf; - int nr = 0, symidx, err = -1; + int nr = 0, err = -1; + struct rel_info ri = { .is_rela = false }; elf = ss->elf; ehdr = ss->ehdr; @@ -433,8 +441,8 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) * Fetch the relocation section to find the idxes to the GOT * and the symbols in the .dynsym they refer to. */ - reldata = elf_getdata(scn_plt_rel, NULL); - if (reldata == NULL) + ri.reldata = elf_getdata(scn_plt_rel, NULL); + if (!ri.reldata) goto out_elf_end; syms = elf_getdata(scn_dynsym, NULL); @@ -456,44 +464,15 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) plt_offset = shdr_plt.sh_offset; plt_offset += plt_header_size; - if (shdr_rel_plt.sh_type == SHT_RELA) { - GElf_Rela pos_mem, *pos; + ri.is_rela = shdr_rel_plt.sh_type == SHT_RELA; - elf_section__for_each_rela(reldata, pos, pos_mem, idx, - nr_rel_entries) { + if (shdr_rel_plt.sh_type == SHT_RELA || + shdr_rel_plt.sh_type == SHT_REL) { + for (idx = 0; idx < nr_rel_entries; idx++) { const char *elf_name = NULL; char *demangled = NULL; - symidx = GELF_R_SYM(pos->r_info); - gelf_getsym(syms, symidx, &sym); - elf_name = elf_sym__name(&sym, symstrs); - demangled = demangle_sym(dso, 0, elf_name); - if (demangled != NULL) - elf_name = demangled; - if (*elf_name) - snprintf(sympltname, sizeof(sympltname), "%s@plt", elf_name); - else - snprintf(sympltname, sizeof(sympltname), - "offset_%#" PRIx64 "@plt", plt_offset); - free(demangled); - - f = symbol__new(plt_offset, plt_entry_size, - STB_GLOBAL, STT_FUNC, sympltname); - if (!f) - goto out_elf_end; - - plt_offset += plt_entry_size; - symbols__insert(&dso->symbols, f); - ++nr; - } - } else if (shdr_rel_plt.sh_type == SHT_REL) { - GElf_Rel pos_mem, *pos; - elf_section__for_each_rel(reldata, pos, pos_mem, idx, - nr_rel_entries) { - const char *elf_name = NULL; - char *demangled = NULL; - symidx = GELF_R_SYM(pos->r_info); - gelf_getsym(syms, symidx, &sym); + gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym); elf_name = elf_sym__name(&sym, symstrs); demangled = demangle_sym(dso, 0, elf_name); From df8aeaefea026a25f443b8982186127291c6f149 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 Jan 2023 14:34:56 +0200 Subject: [PATCH 027/114] perf symbols: Check SHT_RELA and SHT_REL type earlier Make the code more readable by checking for SHT_RELA and SHT_REL type earlier. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/20230120123456.12449-11-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 48 ++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index e274f646ac32..aa62735aea7b 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -434,6 +434,10 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) return 0; } + if (shdr_rel_plt.sh_type != SHT_RELA && + shdr_rel_plt.sh_type != SHT_REL) + return 0; + if (shdr_rel_plt.sh_link != dynsym_idx) goto out_elf_end; @@ -466,34 +470,30 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) ri.is_rela = shdr_rel_plt.sh_type == SHT_RELA; - if (shdr_rel_plt.sh_type == SHT_RELA || - shdr_rel_plt.sh_type == SHT_REL) { - for (idx = 0; idx < nr_rel_entries; idx++) { - const char *elf_name = NULL; - char *demangled = NULL; + for (idx = 0; idx < nr_rel_entries; idx++) { + const char *elf_name = NULL; + char *demangled = NULL; - gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym); + gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym); - elf_name = elf_sym__name(&sym, symstrs); - demangled = demangle_sym(dso, 0, elf_name); - if (demangled != NULL) - elf_name = demangled; - if (*elf_name) - snprintf(sympltname, sizeof(sympltname), "%s@plt", elf_name); - else - snprintf(sympltname, sizeof(sympltname), - "offset_%#" PRIx64 "@plt", plt_offset); - free(demangled); + elf_name = elf_sym__name(&sym, symstrs); + demangled = demangle_sym(dso, 0, elf_name); + if (demangled) + elf_name = demangled; + if (*elf_name) + snprintf(sympltname, sizeof(sympltname), "%s@plt", elf_name); + else + snprintf(sympltname, sizeof(sympltname), + "offset_%#" PRIx64 "@plt", plt_offset); + free(demangled); - f = symbol__new(plt_offset, plt_entry_size, - STB_GLOBAL, STT_FUNC, sympltname); - if (!f) - goto out_elf_end; + f = symbol__new(plt_offset, plt_entry_size, STB_GLOBAL, STT_FUNC, sympltname); + if (!f) + goto out_elf_end; - plt_offset += plt_entry_size; - symbols__insert(&dso->symbols, f); - ++nr; - } + plt_offset += plt_entry_size; + symbols__insert(&dso->symbols, f); + ++nr; } err = 0; From 4cbd5334ff1390444644e82b5aed1e3288798e50 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 19 Jan 2023 12:10:36 -0800 Subject: [PATCH 028/114] perf tools: Fix foolproof typo In the context of LBR stitching documentation. Signed-off-by: Ian Rogers Acked-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sandipan Das Link: https://lore.kernel.org/r/20230119201036.156441-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-c2c.txt | 2 +- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/Documentation/perf-script.txt | 2 +- tools/perf/Documentation/perf-top.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 5c5eb2def83e..af5c3106f468 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -121,7 +121,7 @@ REPORT OPTIONS perf c2c record --call-graph lbr. Disabled by default. In common cases with call stack overflows, it can recreate better call stacks than the default lbr call stack - output. But this approach is not full proof. There can be cases + output. But this approach is not foolproof. There can be cases where it creates incorrect call stacks from incorrect matches. The known limitations include exception handing such as setjmp/longjmp will have calls/returns not match. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 4fa509b15948..9b0c0dbf9a77 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -507,7 +507,7 @@ include::itrace.txt[] perf record --call-graph lbr. Disabled by default. In common cases with call stack overflows, it can recreate better call stacks than the default lbr call stack - output. But this approach is not full proof. There can be cases + output. But this approach is not foolproof. There can be cases where it creates incorrect call stacks from incorrect matches. The known limitations include exception handing such as setjmp/longjmp will have calls/returns not match. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 68e37de5fae4..8d77182fbf31 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -502,7 +502,7 @@ include::itrace.txt[] perf record --call-graph lbr. Disabled by default. In common cases with call stack overflows, it can recreate better call stacks than the default lbr call stack - output. But this approach is not full proof. There can be cases + output. But this approach is not foolproof. There can be cases where it creates incorrect call stacks from incorrect matches. The known limitations include exception handing such as setjmp/longjmp will have calls/returns not match. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index e534d709cc5a..c60e615b7183 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -334,7 +334,7 @@ use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'. callgraph. The option must be used with --call-graph lbr recording. Disabled by default. In common cases with call stack overflows, it can recreate better call stacks than the default lbr call stack - output. But this approach is not full proof. There can be cases + output. But this approach is not foolproof. There can be cases where it creates incorrect call stacks from incorrect matches. The known limitations include exception handing such as setjmp/longjmp will have calls/returns not match. From f8ad6018ce3c065a706b187cdc4520975969660e Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 20 Jan 2023 14:36:54 +0000 Subject: [PATCH 029/114] perf pmu: Remove duplication around EVENT_SOURCE_DEVICE_PATH The pattern for accessing EVENT_SOURCE_DEVICE_PATH is duplicated in a few places, so add two utility functions to cover it. Also just use perf_pmu__scan_file() instead of pmu_type() which already does the same thing. No functional changes. Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Suzuki Poulouse Tested-by: Tanmay Jagdale Cc: Alexander Shishkin Cc: Bharat Bhushan Cc: George Cherian Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Linu Cherian Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sunil Kovvuri Goutham Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120143702.4035046-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/auxtrace.c | 5 +- tools/perf/arch/arm64/util/pmu.c | 4 +- tools/perf/arch/x86/util/pmu.c | 12 +-- tools/perf/util/pmu.c | 110 +++++++++++----------------- tools/perf/util/pmu.h | 5 +- 5 files changed, 51 insertions(+), 85 deletions(-) diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index deeb163999ce..adec6c9ee11d 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -55,17 +55,16 @@ static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err) static struct perf_pmu **find_all_hisi_ptt_pmus(int *nr_ptts, int *err) { - const char *sysfs = sysfs__mountpoint(); struct perf_pmu **hisi_ptt_pmus = NULL; struct dirent *dent; char path[PATH_MAX]; DIR *dir = NULL; int idx = 0; - snprintf(path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); + perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); dir = opendir(path); if (!dir) { - pr_err("can't read directory '%s'\n", EVENT_SOURCE_DEVICE_PATH); + pr_err("can't read directory '%s'\n", path); *err = -EINVAL; return NULL; } diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 9e674cac5a73..801bf52e2ea6 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -44,8 +44,8 @@ double perf_pmu__cpu_slots_per_cycle(void) struct perf_pmu *pmu = pmu__find_core_pmu(); if (pmu) { - scnprintf(path, PATH_MAX, - EVENT_SOURCE_DEVICE_PATH "%s/caps/slots", pmu->name); + perf_pmu__pathname_scnprintf(path, sizeof(path), + pmu->name, "caps/slots"); /* * The value of slots is not greater than 32 bits, but sysfs__read_int * can't read value with 0x prefix, so use sysfs__read_ull instead. diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index 74d69db1ea99..358340b34243 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -15,8 +15,6 @@ #include "../../../util/pmu.h" #include "../../../util/fncache.h" -#define TEMPLATE_ALIAS "%s/bus/event_source/devices/%s/alias" - struct pmu_alias { char *name; char *alias; @@ -72,18 +70,14 @@ static int setup_pmu_alias_list(void) char path[PATH_MAX]; DIR *dir; struct dirent *dent; - const char *sysfs = sysfs__mountpoint(); struct pmu_alias *pmu_alias; char buf[MAX_PMU_NAME_LEN]; FILE *file; int ret = -ENOMEM; - if (!sysfs) + if (!perf_pmu__event_source_devices_scnprintf(path, sizeof(path))) return -1; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); - dir = opendir(path); if (!dir) return -errno; @@ -93,9 +87,7 @@ static int setup_pmu_alias_list(void) !strcmp(dent->d_name, "..")) continue; - snprintf(path, PATH_MAX, - TEMPLATE_ALIAS, sysfs, dent->d_name); - + perf_pmu__pathname_scnprintf(path, sizeof(path), dent->d_name, "alias"); if (!file_available(path)) continue; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index cbb4fbf124bd..1edbb714ff32 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -108,14 +108,10 @@ int perf_pmu__format_parse(char *dir, struct list_head *head) static int pmu_format(const char *name, struct list_head *format) { char path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "format")) return -1; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name); - if (!file_available(path)) return 0; @@ -514,14 +510,10 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) static int pmu_aliases(const char *name, struct list_head *head) { char path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "events")) return -1; - snprintf(path, PATH_MAX, - "%s/bus/event_source/devices/%s/events", sysfs, name); - if (!file_available(path)) return 0; @@ -555,52 +547,16 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, return 0; } -/* - * Reading/parsing the default pmu type value, which should be - * located at: - * /sys/bus/event_source/devices//type as sysfs attribute. - */ -static int pmu_type(const char *name, __u32 *type) -{ - char path[PATH_MAX]; - FILE *file; - int ret = 0; - const char *sysfs = sysfs__mountpoint(); - - if (!sysfs) - return -1; - - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name); - - if (access(path, R_OK) < 0) - return -1; - - file = fopen(path, "r"); - if (!file) - return -EINVAL; - - if (1 != fscanf(file, "%u", type)) - ret = -1; - - fclose(file); - return ret; -} - /* Add all pmus in sysfs to pmu list: */ static void pmu_read_sysfs(void) { char path[PATH_MAX]; DIR *dir; struct dirent *dent; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__event_source_devices_scnprintf(path, sizeof(path))) return; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); - dir = opendir(path); if (!dir) return; @@ -697,14 +653,9 @@ static char *pmu_id(const char *name) static int is_arm_pmu_core(const char *name) { char path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), name, "cpus")) return 0; - - /* Look for cpu sysfs (specific to arm) */ - scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", - sysfs, name); return file_available(path); } @@ -970,11 +921,8 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) return NULL; /* - * Check the type first to avoid unnecessary work. + * Check the aliases first to avoid unnecessary work. */ - if (pmu_type(name, &type)) - return NULL; - if (pmu_aliases(name, &aliases)) return NULL; @@ -984,9 +932,14 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) pmu->cpus = pmu_cpumask(name); pmu->name = strdup(name); + if (!pmu->name) goto err; + /* Read type, and ensure that type value is successfully assigned (return 1) */ + if (perf_pmu__scan_file(pmu, "type", "%u", &type) != 1) + goto err; + alias_name = pmu_find_alias_name(name); if (alias_name) { pmu->alias_name = strdup(alias_name); @@ -1787,16 +1740,11 @@ bool pmu_have_event(const char *pname, const char *name) static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) { char path[PATH_MAX]; - const char *sysfs; - sysfs = sysfs__mountpoint(); - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, name) || + !file_available(path)) return NULL; - snprintf(path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name); - if (!file_available(path)) - return NULL; return fopen(path, "r"); } @@ -1850,7 +1798,6 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) { struct stat st; char caps_path[PATH_MAX]; - const char *sysfs = sysfs__mountpoint(); DIR *caps_dir; struct dirent *evt_ent; @@ -1859,12 +1806,9 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) pmu->nr_caps = 0; - if (!sysfs) + if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps")) return -1; - snprintf(caps_path, PATH_MAX, - "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name); - if (stat(caps_path, &st) < 0) { pmu->caps_initialized = true; return 0; /* no error if caps does not exist */ @@ -1999,3 +1943,31 @@ double __weak perf_pmu__cpu_slots_per_cycle(void) { return NAN; } + +int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size) +{ + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + return scnprintf(pathname, size, "%s/bus/event_source/devices/", sysfs); +} + +/* + * Fill 'buf' with the path to a file or folder in 'pmu_name' in + * sysfs. For example if pmu_name = "cs_etm" and 'filename' = "format" + * then pathname will be filled with + * "/sys/bus/event_source/devices/cs_etm/format" + * + * Return 0 if the sysfs mountpoint couldn't be found or if no + * characters were written. + */ +int perf_pmu__pathname_scnprintf(char *buf, size_t size, + const char *pmu_name, const char *filename) +{ + char base_path[PATH_MAX]; + + if (!perf_pmu__event_source_devices_scnprintf(base_path, sizeof(base_path))) + return 0; + return scnprintf(buf, size, "%s%s/%s", base_path, pmu_name, filename); +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index fd414ba1d776..96d030c8b3b3 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -22,7 +22,6 @@ enum { }; #define PERF_PMU_FORMAT_BITS 64 -#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" #define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" #define MAX_PMU_NAME_LEN 128 @@ -260,4 +259,8 @@ int perf_pmu__cpus_match(struct perf_pmu *pmu, struct perf_cpu_map *cpus, char *pmu_find_real_name(const char *name); char *pmu_find_alias_name(const char *name); double perf_pmu__cpu_slots_per_cycle(void); +int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size); +int perf_pmu__pathname_scnprintf(char *buf, size_t size, + const char *pmu_name, const char *filename); + #endif /* __PMU_H */ From d50a79cd0f391cbfdc3d9a6165e247d084f94dd3 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 20 Jan 2023 14:36:55 +0000 Subject: [PATCH 030/114] perf pmu: Use perf_pmu__open_file() and perf_pmu__scan_file() Remove some code that duplicates existing methods. Copy strings where const strings are required. No functional changes. Committer notes: Add a stub for erf_pmu__scan_file() in tools/perf/util/python.c not to drag tools/perf/util/pmu.c into the python binding. This fixes 'perf test python' at this point in this patchset. Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Suzuki Poulouse Tested-by: Tanmay Jagdale Cc: Alexander Shishkin Cc: Bharat Bhushan Cc: George Cherian Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Linu Cherian Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sunil Kovvuri Goutham Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120143702.4035046-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cputopo.c | 9 +------- tools/perf/util/pmu-hybrid.c | 27 +++++------------------- tools/perf/util/pmu.c | 40 +++++++++++------------------------- tools/perf/util/pmu.h | 3 ++- tools/perf/util/python.c | 8 +++++++- 5 files changed, 27 insertions(+), 60 deletions(-) diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 1a3ff6449158..e08797c3cdbc 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -422,8 +422,6 @@ void numa_topology__delete(struct numa_topology *tp) static int load_hybrid_node(struct hybrid_topology_node *node, struct perf_pmu *pmu) { - const char *sysfs; - char path[PATH_MAX]; char *buf = NULL, *p; FILE *fp; size_t len = 0; @@ -432,12 +430,7 @@ static int load_hybrid_node(struct hybrid_topology_node *node, if (!node->pmu_name) return -1; - sysfs = sysfs__mountpoint(); - if (!sysfs) - goto err; - - snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, pmu->name); - fp = fopen(path, "r"); + fp = perf_pmu__open_file(pmu, "cpus"); if (!fp) goto err; diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c index f51ccaac60ee..38628805a952 100644 --- a/tools/perf/util/pmu-hybrid.c +++ b/tools/perf/util/pmu-hybrid.c @@ -20,32 +20,15 @@ LIST_HEAD(perf_pmu__hybrid_pmus); bool perf_pmu__hybrid_mounted(const char *name) { - char path[PATH_MAX]; - const char *sysfs; - FILE *file; - int n, cpu; + int cpu; + char pmu_name[PATH_MAX]; + struct perf_pmu pmu = {.name = pmu_name}; if (strncmp(name, "cpu_", 4)) return false; - sysfs = sysfs__mountpoint(); - if (!sysfs) - return false; - - snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, name); - if (!file_available(path)) - return false; - - file = fopen(path, "r"); - if (!file) - return false; - - n = fscanf(file, "%u", &cpu); - fclose(file); - if (n <= 0) - return false; - - return true; + strlcpy(pmu_name, name, sizeof(pmu_name)); + return perf_pmu__scan_file(&pmu, "cpus", "%u", &cpu) > 0; } struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 1edbb714ff32..a771a5972fc5 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -571,45 +571,31 @@ static void pmu_read_sysfs(void) closedir(dir); } -static struct perf_cpu_map *__pmu_cpumask(const char *path) -{ - FILE *file; - struct perf_cpu_map *cpus; - - file = fopen(path, "r"); - if (!file) - return NULL; - - cpus = perf_cpu_map__read(file); - fclose(file); - return cpus; -} - /* * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) * may have a "cpus" file. */ #define SYS_TEMPLATE_ID "./bus/event_source/devices/%s/identifier" -#define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" static struct perf_cpu_map *pmu_cpumask(const char *name) { - char path[PATH_MAX]; struct perf_cpu_map *cpus; - const char *sysfs = sysfs__mountpoint(); const char *templates[] = { - CPUS_TEMPLATE_UNCORE, - CPUS_TEMPLATE_CPU, + "cpumask", + "cpus", NULL }; const char **template; + char pmu_name[PATH_MAX]; + struct perf_pmu pmu = {.name = pmu_name}; + FILE *file; - if (!sysfs) - return NULL; - + strlcpy(pmu_name, name, sizeof(pmu_name)); for (template = templates; *template; template++) { - snprintf(path, PATH_MAX, *template, sysfs, name); - cpus = __pmu_cpumask(path); + file = perf_pmu__open_file(&pmu, *template); + if (!file) + continue; + cpus = perf_cpu_map__read(file); if (cpus) return cpus; } @@ -620,13 +606,11 @@ static struct perf_cpu_map *pmu_cpumask(const char *name) static bool pmu_is_uncore(const char *name) { char path[PATH_MAX]; - const char *sysfs; if (perf_pmu__hybrid_mounted(name)) return false; - sysfs = sysfs__mountpoint(); - snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); + perf_pmu__pathname_scnprintf(path, sizeof(path), name, "cpumask"); return file_available(path); } @@ -1737,7 +1721,7 @@ bool pmu_have_event(const char *pname, const char *name) return false; } -static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) +FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) { char path[PATH_MAX]; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 96d030c8b3b3..742d4db319a0 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "parse-events.h" #include "pmu-events/pmu-events.h" @@ -22,7 +23,6 @@ enum { }; #define PERF_PMU_FORMAT_BITS 64 -#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" #define MAX_PMU_NAME_LEN 128 struct perf_event_attr; @@ -262,5 +262,6 @@ double perf_pmu__cpu_slots_per_cycle(void); int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size); int perf_pmu__pathname_scnprintf(char *buf, size_t size, const char *pmu_name, const char *filename); +FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name); #endif /* __PMU_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index d948455e5ed4..9e5d881b0987 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -20,6 +20,7 @@ #include "stat.h" #include "metricgroup.h" #include "util/env.h" +#include "util/pmu.h" #include #include "util.h" @@ -83,7 +84,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } /* - * This one is needed not to drag the PMU bandwagon, jevents generated + * These ones are needed not to drag the PMU bandwagon, jevents generated * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for * doing per PMU perf_event_attr.exclude_guest handling, not really needed, so * far, for the perf python binding known usecases, revisit if this become @@ -94,6 +95,11 @@ struct perf_pmu *evsel__find_pmu(struct evsel *evsel __maybe_unused) return NULL; } +int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) +{ + return EOF; +} + /* * Add this one here not to drag util/metricgroup.c */ From 5f2c8efa78ab4215c5d53899a6d87b9af429fea1 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 20 Jan 2023 14:36:56 +0000 Subject: [PATCH 031/114] perf pmu: Remove remaining duplication of bus/event_source/devices/... Use the new perf_pmu__pathname_scnprintf() instead. No functional changes. Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Suzuki Poulouse Tested-by: Tanmay Jagdale Cc: Alexander Shishkin Cc: Bharat Bhushan Cc: George Cherian Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Linu Cherian Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sunil Kovvuri Goutham Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120143702.4035046-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index a771a5972fc5..23e1d70fa343 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -575,8 +575,6 @@ static void pmu_read_sysfs(void) * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) * may have a "cpus" file. */ -#define SYS_TEMPLATE_ID "./bus/event_source/devices/%s/identifier" - static struct perf_cpu_map *pmu_cpumask(const char *name) { struct perf_cpu_map *cpus; @@ -619,9 +617,9 @@ static char *pmu_id(const char *name) char path[PATH_MAX], *str; size_t len; - snprintf(path, PATH_MAX, SYS_TEMPLATE_ID, name); + perf_pmu__pathname_scnprintf(path, sizeof(path), name, "identifier"); - if (sysfs__read_str(path, &str, &len) < 0) + if (filename__read_str(path, &str, &len) < 0) return NULL; str[len - 1] = 0; /* remove line feed */ @@ -867,16 +865,11 @@ pmu_find_alias_name(const char *name __maybe_unused) return NULL; } -static int pmu_max_precise(const char *name) +static int pmu_max_precise(struct perf_pmu *pmu) { - char path[PATH_MAX]; int max_precise = -1; - scnprintf(path, PATH_MAX, - "bus/event_source/devices/%s/caps/max_precise", - name); - - sysfs__read_int(path, &max_precise); + perf_pmu__scan_file(pmu, "caps/max_precise", "%d", &max_precise); return max_precise; } @@ -935,7 +928,7 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) pmu->is_uncore = pmu_is_uncore(name); if (pmu->is_uncore) pmu->id = pmu_id(name); - pmu->max_precise = pmu_max_precise(name); + pmu->max_precise = pmu_max_precise(pmu); pmu_add_cpu_aliases(&aliases, pmu); pmu_add_sys_aliases(&aliases, pmu); From c2b6a8969c82990626cfb9a1af0532b06cdb5716 Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 20 Jan 2023 14:36:57 +0000 Subject: [PATCH 032/114] perf pmu: Add function to check if a pmu file exists Add a utility function perf_pmu__file_exists() to check if a given pmu file exists in the sysfs filesystem. Signed-off-by: German Gomez Acked-by: Suzuki Poulouse Tested-by: Tanmay Jagdale Cc: Alexander Shishkin Cc: Bharat Bhushan Cc: George Cherian Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Linu Cherian Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sunil Kovvuri Goutham Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120143702.4035046-5-james.clark@arm.com Signed-off-by: James Clark Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 10 ++++++++++ tools/perf/util/pmu.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 23e1d70fa343..8abf5b8439a7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1742,6 +1742,16 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, return ret; } +bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name) +{ + char path[PATH_MAX]; + + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, name)) + return false; + + return file_available(path); +} + static int perf_pmu__new_caps(struct list_head *list, char *name, char *value) { struct perf_pmu_caps *caps = zalloc(sizeof(*caps)); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 742d4db319a0..2bdc560f19c7 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -230,6 +230,8 @@ bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); +bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name); + int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); From 326163c552c4fb67bb7503f8e8b5778327fc97bf Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 20 Jan 2023 14:36:58 +0000 Subject: [PATCH 033/114] perf cs_etm: Keep separate symbols for ETMv4 and ETE parameters Previously, adding a new parameter at the end of ETMv4 meant adding it somewhere in the middle of ETE, which is not supported by the current header version. Reviewed-by: Mike Leach Signed-off-by: German Gomez Acked-by: Suzuki Poulouse Tested-by: Tanmay Jagdale Cc: Alexander Shishkin Cc: Bharat Bhushan Cc: George Cherian Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Linu Cherian Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sunil Kovvuri Goutham Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120143702.4035046-6-james.clark@arm.com Signed-off-by: James Clark Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 43 ++++++++++++++++++++++++++----- tools/perf/util/cs-etm-base.c | 32 +++++++++++++++++------ tools/perf/util/cs-etm.c | 12 ++++----- tools/perf/util/cs-etm.h | 11 +++++++- 4 files changed, 76 insertions(+), 22 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index a346d5f3dafa..b526ffe550a5 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -53,7 +53,15 @@ static const char * const metadata_etmv4_ro[] = { [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", - [CS_ETE_TRCDEVARCH] = "mgmt/trcdevarch" +}; + +static const char * const metadata_ete_ro[] = { + [CS_ETE_TRCIDR0] = "trcidr/trcidr0", + [CS_ETE_TRCIDR1] = "trcidr/trcidr1", + [CS_ETE_TRCIDR2] = "trcidr/trcidr2", + [CS_ETE_TRCIDR8] = "trcidr/trcidr8", + [CS_ETE_TRCAUTHSTATUS] = "mgmt/trcauthstatus", + [CS_ETE_TRCDEVARCH] = "mgmt/trcdevarch", }; static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); @@ -617,7 +625,7 @@ static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu) { struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; - int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETE_TRCDEVARCH]); + int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCDEVARCH]); /* * ETE if ARCHVER is 5 (ARCHVER is 4 for ETM) and ARCHPART is 0xA13. @@ -648,6 +656,31 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, metadata_etmv4_ro[CS_ETMV4_TRCAUTHSTATUS]); } +static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, int cpu) +{ + struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + + /* Get trace configuration register */ + data[CS_ETE_TRCCONFIGR] = cs_etmv4_get_config(itr); + /* Get traceID from the framework */ + data[CS_ETE_TRCTRACEIDR] = coresight_get_trace_id(cpu); + /* Get read-only information from sysFS */ + data[CS_ETE_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCIDR0]); + data[CS_ETE_TRCIDR1] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCIDR1]); + data[CS_ETE_TRCIDR2] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCIDR2]); + data[CS_ETE_TRCIDR8] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCIDR8]); + data[CS_ETE_TRCAUTHSTATUS] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCAUTHSTATUS]); + /* ETE uses the same registers as ETMv4 plus TRCDEVARCH */ + data[CS_ETE_TRCDEVARCH] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCDEVARCH]); +} + static void cs_etm_get_metadata(int cpu, u32 *offset, struct auxtrace_record *itr, struct perf_record_auxtrace_info *info) @@ -661,11 +694,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* first see what kind of tracer this cpu is affined to */ if (cs_etm_is_ete(itr, cpu)) { magic = __perf_cs_ete_magic; - /* ETE uses the same registers as ETMv4 plus TRCDEVARCH */ - cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu); - info->priv[*offset + CS_ETE_TRCDEVARCH] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETE_TRCDEVARCH]); + cs_etm_save_ete_header(&info->priv[*offset], itr, cpu); /* How much space was used */ increment = CS_ETE_PRIV_MAX; diff --git a/tools/perf/util/cs-etm-base.c b/tools/perf/util/cs-etm-base.c index 597542410854..282042c6e944 100644 --- a/tools/perf/util/cs-etm-base.c +++ b/tools/perf/util/cs-etm-base.c @@ -36,7 +36,20 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", - [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n" +}; + +static const char * const cs_ete_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", + [CS_ETE_TRCCONFIGR] = " TRCCONFIGR %llx\n", + [CS_ETE_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", + [CS_ETE_TRCIDR0] = " TRCIDR0 %llx\n", + [CS_ETE_TRCIDR1] = " TRCIDR1 %llx\n", + [CS_ETE_TRCIDR2] = " TRCIDR2 %llx\n", + [CS_ETE_TRCIDR8] = " TRCIDR8 %llx\n", + [CS_ETE_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", + [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n", }; static const char * const param_unk_fmt = @@ -96,18 +109,21 @@ static int cs_etm__print_cpu_metadata_v1(u64 *val, int *offset) else fprintf(stdout, cs_etm_priv_fmts[j], val[i]); } - } else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) { - /* - * ETE and ETMv4 can be printed in the same block because the number of parameters - * is saved and they share the list of parameter names. ETE is also only supported - * in V1 files. - */ + } else if (magic == __perf_cs_etmv4_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETMV4_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + } + } else if (magic == __perf_cs_ete_magic) { for (j = 0; j < total_params; j++, i++) { /* if newer record - could be excess params */ if (j >= CS_ETE_PRIV_MAX) fprintf(stdout, param_unk_fmt, j, val[i]); else - fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + fprintf(stdout, cs_ete_priv_fmts[j], val[i]); } } else { /* failure - note bad magic value and error out */ diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 33303d03c2fa..879576d5f899 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -464,12 +464,12 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, u64 **metadata = etm->metadata; t_params[idx].protocol = CS_ETM_PROTO_ETE; - t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; - t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; - t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; - t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; - t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; - t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; + t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0]; + t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1]; + t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2]; + t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8]; + t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR]; + t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR]; t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH]; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 5da50d5dae6b..c5925428afa9 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -82,7 +82,16 @@ enum { * added in header V1 */ enum { - CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX, + /* Dynamic, configurable parameters */ + CS_ETE_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1, + CS_ETE_TRCTRACEIDR, + /* RO, taken from sysFS */ + CS_ETE_TRCIDR0, + CS_ETE_TRCIDR1, + CS_ETE_TRCIDR2, + CS_ETE_TRCIDR8, + CS_ETE_TRCAUTHSTATUS, + CS_ETE_TRCDEVARCH, CS_ETE_PRIV_MAX }; From 2e2f7ceecc19fcac31bc194485e96a3b67b7d65e Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 20 Jan 2023 14:36:59 +0000 Subject: [PATCH 034/114] perf cs_etm: Record ts_source in AUXTRACE_INFO for ETMv4 and ETE Read the value of ts_source exposed by the driver and store it in the ETMv4 and ETE header. If the interface doesn't exist (such as in older Kernels), defaults to a safe value of -1. Signed-off-by: German Gomez Acked-by: Suzuki Poulouse Tested-by: Tanmay Jagdale Cc: Alexander Shishkin Cc: Bharat Bhushan Cc: George Cherian Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Linu Cherian Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sunil Kovvuri Goutham Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120143702.4035046-7-james.clark@arm.com Signed-off-by: James Clark Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 48 +++++++++++++++++++++++++++++++ tools/perf/util/cs-etm-base.c | 2 ++ tools/perf/util/cs-etm.h | 2 ++ 3 files changed, 52 insertions(+) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index b526ffe550a5..481e170cd3f1 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -53,6 +53,7 @@ static const char * const metadata_etmv4_ro[] = { [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", + [CS_ETMV4_TS_SOURCE] = "ts_source", }; static const char * const metadata_ete_ro[] = { @@ -62,6 +63,7 @@ static const char * const metadata_ete_ro[] = { [CS_ETE_TRCIDR8] = "trcidr/trcidr8", [CS_ETE_TRCAUTHSTATUS] = "mgmt/trcauthstatus", [CS_ETE_TRCDEVARCH] = "mgmt/trcdevarch", + [CS_ETE_TS_SOURCE] = "ts_source", }; static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); @@ -613,6 +615,32 @@ static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path) return val; } +static int cs_etm_get_ro_signed(struct perf_pmu *pmu, int cpu, const char *path) +{ + char pmu_path[PATH_MAX]; + int scan; + int val = 0; + + /* Get RO metadata from sysfs */ + snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path); + + scan = perf_pmu__scan_file(pmu, pmu_path, "%d", &val); + if (scan != 1) + pr_err("%s: error reading: %s\n", __func__, pmu_path); + + return val; +} + +static bool cs_etm_pmu_path_exists(struct perf_pmu *pmu, int cpu, const char *path) +{ + char pmu_path[PATH_MAX]; + + /* Get RO metadata from sysfs */ + snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path); + + return perf_pmu__file_exists(pmu, pmu_path); +} + #define TRCDEVARCH_ARCHPART_SHIFT 0 #define TRCDEVARCH_ARCHPART_MASK GENMASK(11, 0) #define TRCDEVARCH_ARCHPART(x) (((x) & TRCDEVARCH_ARCHPART_MASK) >> TRCDEVARCH_ARCHPART_SHIFT) @@ -654,6 +682,16 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, metadata_etmv4_ro[CS_ETMV4_TRCIDR8]); data[CS_ETMV4_TRCAUTHSTATUS] = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETMV4_TRCAUTHSTATUS]); + + /* Kernels older than 5.19 may not expose ts_source */ + if (cs_etm_pmu_path_exists(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETMV4_TS_SOURCE])) + data[CS_ETMV4_TS_SOURCE] = (__u64) cs_etm_get_ro_signed(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TS_SOURCE]); + else { + pr_warning("[%03d] pmu file 'ts_source' not found. Fallback to safe value (-1)\n", + cpu); + data[CS_ETMV4_TS_SOURCE] = (__u64) -1; + } } static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, int cpu) @@ -679,6 +717,16 @@ static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, in /* ETE uses the same registers as ETMv4 plus TRCDEVARCH */ data[CS_ETE_TRCDEVARCH] = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCDEVARCH]); + + /* Kernels older than 5.19 may not expose ts_source */ + if (cs_etm_pmu_path_exists(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TS_SOURCE])) + data[CS_ETE_TS_SOURCE] = (__u64) cs_etm_get_ro_signed(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TS_SOURCE]); + else { + pr_warning("[%03d] pmu file 'ts_source' not found. Fallback to safe value (-1)\n", + cpu); + data[CS_ETE_TS_SOURCE] = (__u64) -1; + } } static void cs_etm_get_metadata(int cpu, u32 *offset, diff --git a/tools/perf/util/cs-etm-base.c b/tools/perf/util/cs-etm-base.c index 282042c6e944..5f48b756c4cf 100644 --- a/tools/perf/util/cs-etm-base.c +++ b/tools/perf/util/cs-etm-base.c @@ -36,6 +36,7 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", + [CS_ETMV4_TS_SOURCE] = " TS_SOURCE %lld\n", }; static const char * const cs_ete_priv_fmts[] = { @@ -50,6 +51,7 @@ static const char * const cs_ete_priv_fmts[] = { [CS_ETE_TRCIDR8] = " TRCIDR8 %llx\n", [CS_ETE_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n", + [CS_ETE_TS_SOURCE] = " TS_SOURCE %lld\n", }; static const char * const param_unk_fmt = diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index c5925428afa9..ad790930bcbc 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -71,6 +71,7 @@ enum { CS_ETMV4_TRCIDR2, CS_ETMV4_TRCIDR8, CS_ETMV4_TRCAUTHSTATUS, + CS_ETMV4_TS_SOURCE, CS_ETMV4_PRIV_MAX, }; @@ -92,6 +93,7 @@ enum { CS_ETE_TRCIDR8, CS_ETE_TRCAUTHSTATUS, CS_ETE_TRCDEVARCH, + CS_ETE_TS_SOURCE, CS_ETE_PRIV_MAX }; From a7fe9a443b6064c68f86a2ee09bdfa7736660ef3 Mon Sep 17 00:00:00 2001 From: German Gomez Date: Fri, 20 Jan 2023 14:37:00 +0000 Subject: [PATCH 035/114] perf cs_etm: Set the time field in the synthetic samples If virtual timestamps are detected, set sample time field accordingly, otherwise warn the user that the samples will not include accurate time data. | Test notes (FEAT_TRF platform) | | $ ./perf record -e cs_etm//u -a -- sleep 4 | $ ./perf script --fields +time | perf 422 [000] 163.375100: 1 branches:uH: 0 [unknown] ([unknown]) | perf 422 [000] 163.375100: 1 branches:uH: ffffb8009544 ioctl+0x14 (/lib/aarch64-linux-gnu/libc-2.27.so) | perf 422 [000] 163.375100: 1 branches:uH: aaaaab6bebf4 perf_evsel__run_ioctl+0x90 (/home/german/linux/tools/perf/perf) | [...] | perf 422 [000] 167.393100: 1 branches:uH: aaaaab6bda00 __xyarray__entry+0x74 (/home/german/linux/tools/perf/perf) | perf 422 [000] 167.393099: 1 branches:uH: aaaaab6bda0c __xyarray__entry+0x80 (/home/german/linux/tools/perf/perf) | perf 422 [000] 167.393099: 1 branches:uH: ffffb8009538 ioctl+0x8 (/lib/aarch64-linux-gnu/libc-2.27.so) | | The time from the first sample to the last sample is 4 seconds Now that times are converted to nanoseconds, also try to estimate the timestamps more accurately be dividing by some fixed value for instructions per ns. This prevents long ranges from being estimated too far in the past than would be realistic. Signed-off-by: German Gomez Acked-by: Suzuki Poulouse Tested-by: Tanmay Jagdale Cc: Alexander Shishkin Cc: Bharat Bhushan Cc: George Cherian Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Linu Cherian Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sunil Kovvuri Goutham Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120143702.4035046-8-james.clark@arm.com Signed-off-by: James Clark Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 47 +++++++++-- tools/perf/util/cs-etm.c | 83 ++++++++++++++++++- tools/perf/util/cs-etm.h | 3 +- 3 files changed, 120 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 31fa3b45134a..440fe844ed17 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -30,6 +30,15 @@ #endif #endif +/* + * Assume a maximum of 0.1ns elapsed per instruction. This would be the + * case with a theoretical 10GHz core executing 1 instruction per cycle. + * Used to estimate the sample time for synthesized instructions because + * Coresight only emits a timestamp for a range of instructions rather + * than per instruction. + */ +const u32 INSTR_PER_NS = 10; + struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); @@ -112,6 +121,20 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, return 1; } +/* + * Calculate the number of nanoseconds elapsed. + * + * instr_count is updated in place with the remainder of the instructions + * which didn't make up a whole nanosecond. + */ +static u32 cs_etm_decoder__dec_instr_count_to_ns(u32 *instr_count) +{ + const u32 instr_copy = *instr_count; + + *instr_count %= INSTR_PER_NS; + return instr_copy / INSTR_PER_NS; +} + static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params, ocsd_etmv3_cfg *config) { @@ -267,8 +290,8 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, packet_queue->cs_timestamp = packet_queue->next_cs_timestamp; /* Estimate the timestamp for the next range packet */ - packet_queue->next_cs_timestamp += packet_queue->instr_count; - packet_queue->instr_count = 0; + packet_queue->next_cs_timestamp += + cs_etm_decoder__dec_instr_count_to_ns(&packet_queue->instr_count); /* Tell the front end which traceid_queue needs attention */ cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id); @@ -283,24 +306,31 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, const ocsd_trc_index_t indx) { struct cs_etm_packet_queue *packet_queue; + u64 converted_timestamp; /* First get the packet queue for this traceID */ packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id); if (!packet_queue) return OCSD_RESP_FATAL_SYS_ERR; + /* + * Coresight timestamps are raw timer values which need to be scaled to ns. Assume + * 0 is a bad value so don't try to convert it. + */ + converted_timestamp = elem->timestamp ? + cs_etm__convert_sample_time(etmq, elem->timestamp) : 0; + /* * We've seen a timestamp packet before - simply record the new value. * Function do_soft_timestamp() will report the value to the front end, * hence asking the decoder to keep decoding rather than stopping. */ if (packet_queue->cs_timestamp) { - packet_queue->next_cs_timestamp = elem->timestamp; + packet_queue->next_cs_timestamp = converted_timestamp; return OCSD_RESP_CONT; } - - if (!elem->timestamp) { + if (!converted_timestamp) { /* * Zero timestamps can be seen due to misconfiguration or hardware bugs. * Warn once, and don't try to subtract instr_count as it would result in an @@ -312,7 +342,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, ". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n", indx); - } else if (packet_queue->instr_count > elem->timestamp) { + } else if (packet_queue->instr_count / INSTR_PER_NS > converted_timestamp) { /* * Sanity check that the elem->timestamp - packet_queue->instr_count would not * result in an underflow. Warn and clamp at 0 if it would. @@ -327,9 +357,10 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * which instructions started by subtracting the number of instructions * executed to the timestamp. */ - packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count; + packet_queue->cs_timestamp = converted_timestamp - + (packet_queue->instr_count / INSTR_PER_NS); } - packet_queue->next_cs_timestamp = elem->timestamp; + packet_queue->next_cs_timestamp = converted_timestamp; packet_queue->instr_count = 0; /* Tell the front end which traceid_queue needs attention */ diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 879576d5f899..f65bac5ddbdb 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -35,6 +35,7 @@ #include "tool.h" #include "thread.h" #include "thread-stack.h" +#include "tsc.h" #include #include "util/synthetic-events.h" @@ -46,10 +47,12 @@ struct cs_etm_auxtrace { struct perf_session *session; struct machine *machine; struct thread *unknown_thread; + struct perf_tsc_conversion tc; u8 timeless_decoding; u8 snapshot_mode; u8 data_queued; + u8 has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */ int num_cpu; u64 latest_kernel_timestamp; @@ -1161,6 +1164,30 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq, sample->insn_len, (void *)sample->insn); } +u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) +{ + struct cs_etm_auxtrace *etm = etmq->etm; + + if (etm->has_virtual_ts) + return tsc_to_perf_time(cs_timestamp, &etm->tc); + else + return cs_timestamp; +} + +static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) +{ + struct cs_etm_auxtrace *etm = etmq->etm; + struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue; + + if (etm->timeless_decoding) + return 0; + else if (etm->has_virtual_ts) + return packet_queue->cs_timestamp; + else + return etm->latest_kernel_timestamp; +} + static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq, u64 addr, u64 period) @@ -1174,8 +1201,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); event->sample.header.size = sizeof(struct perf_event_header); - if (!etm->timeless_decoding) - sample.time = etm->latest_kernel_timestamp; + /* Set time field based on etm auxtrace config. */ + sample.time = cs_etm__resolve_sample_time(etmq, tidq); + sample.ip = addr; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -1232,8 +1260,9 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); - if (!etm->timeless_decoding) - sample.time = etm->latest_kernel_timestamp; + /* Set time field based on etm auxtrace config. */ + sample.time = cs_etm__resolve_sample_time(etmq, tidq); + sample.ip = ip; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -2746,12 +2775,42 @@ static int cs_etm__queue_aux_records(struct perf_session *session) return 0; } +#define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \ + (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1)) + +/* + * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual + * timestamps). + */ +static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) +{ + int j; + + for (j = 0; j < num_cpu; j++) { + switch (metadata[j][CS_ETM_MAGIC]) { + case __perf_cs_etmv4_magic: + if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1) + return false; + break; + case __perf_cs_ete_magic: + if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1) + return false; + break; + default: + /* Unknown / unsupported magic number. */ + return false; + } + } + return true; +} + int cs_etm__process_auxtrace_info_full(union perf_event *event, struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; struct cs_etm_auxtrace *etm = NULL; struct int_node *inode; + struct perf_record_time_conv *tc = &session->time_conv; int event_header_size = sizeof(struct perf_event_header); int total_size = auxtrace_info->header.size; int priv_size = 0; @@ -2886,6 +2945,13 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, etm->auxtrace_type = auxtrace_info->type; etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); + /* Use virtual timestamps if all ETMs report ts_source = 1 */ + etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); + + if (!etm->has_virtual_ts) + ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" + "The time field of the samples will not be set accurately.\n\n"); + etm->auxtrace.process_event = cs_etm__process_event; etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; etm->auxtrace.flush_events = cs_etm__flush_events; @@ -2915,6 +2981,15 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, goto err_delete_thread; } + etm->tc.time_shift = tc->time_shift; + etm->tc.time_mult = tc->time_mult; + etm->tc.time_zero = tc->time_zero; + if (event_contains(*tc, time_cycles)) { + etm->tc.time_cycles = tc->time_cycles; + etm->tc.time_mask = tc->time_mask; + etm->tc.cap_user_time_zero = tc->cap_user_time_zero; + etm->tc.cap_user_time_short = tc->cap_user_time_short; + } err = cs_etm__synth_events(etm, session); if (err) goto err_delete_thread; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index ad790930bcbc..98a4f7113d2f 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -192,7 +192,7 @@ struct cs_etm_packet_queue { u32 head; u32 tail; u32 instr_count; - u64 cs_timestamp; + u64 cs_timestamp; /* Timestamp from trace data, converted to ns if possible */ u64 next_cs_timestamp; struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER]; }; @@ -231,6 +231,7 @@ struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id); int cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused, struct perf_session *session __maybe_unused); +u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp); #else static inline int cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused, From 5670ebf54bd26482f57a094c53bdc562c106e0a9 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 20 Jan 2023 14:37:01 +0000 Subject: [PATCH 036/114] perf cs-etm: Ensure that Coresight timestamps don't go backwards There are some edge cases around estimated timestamps that can result in them going backwards. One is that after a discontinuity, the last used timestamp is set to 0. The duration of the next range is then subtracted which could result in an earlier timestamp than the last instruction. Fix this by not resetting the last timestamp used on a discontinuity, and make sure that new estimated timestamps are clamped to be later than that. Another case is that estimated timestamps could compound over time to end up being more than the next real timestamp in the trace. Fix this by clamping the estimates in cs_etm_decoder__do_soft_timestamp() to be no later than it. cs_etm_decoder__do_soft_timestamp() also updated next_cs_timestamp, which meant that the next real timestamp was lost and not stored anywhere. Fix that by only updating cs_timestamp for estimates and keep next_cs_timestamp untouched. Finally, use next_cs_timestamp to signify if a timestamp has been received previously. Because cs_timestamp has the first range subtracted, it could technically go to 0 which would break the logic. Testing ======= It can be verified that timestamps don't go backwards when tracing on a single core with the following commands. Across multiple cores it's expected that timestamps are interleaved: $ perf record -e cs_etm/@tmc_etr0/k -C 4 taskset -c 4 sleep 1 $ perf script --itrace=i1ns --ns -Fcomm,tid,pid,time,cpu,event,ip,sym,addr,symoff,flags,callindent > itrace $ sed 's/://g' itrace | awk -F ' ' ' { print $4 } ' | awk '{ if ($1 < prev) { print "line:" NR " " $0 } {prev=$1}}' Reported-by: Tanmay Jagdale Signed-off-by: James Clark Acked-by: Suzuki Poulouse Tested-by: Tanmay Jagdale Cc: Alexander Shishkin Cc: Bharat Bhushan Cc: George Cherian Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Linu Cherian Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sunil Kovvuri Goutham Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120143702.4035046-9-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 440fe844ed17..63afa2d05b46 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -283,15 +283,17 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, struct cs_etm_packet_queue *packet_queue, const uint8_t trace_chan_id) { + u64 estimated_ts; + /* No timestamp packet has been received, nothing to do */ - if (!packet_queue->cs_timestamp) + if (!packet_queue->next_cs_timestamp) return OCSD_RESP_CONT; - packet_queue->cs_timestamp = packet_queue->next_cs_timestamp; + estimated_ts = packet_queue->cs_timestamp + + cs_etm_decoder__dec_instr_count_to_ns(&packet_queue->instr_count); - /* Estimate the timestamp for the next range packet */ - packet_queue->next_cs_timestamp += - cs_etm_decoder__dec_instr_count_to_ns(&packet_queue->instr_count); + /* Estimated TS can never be higher than the next real one in the trace */ + packet_queue->cs_timestamp = min(packet_queue->next_cs_timestamp, estimated_ts); /* Tell the front end which traceid_queue needs attention */ cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id); @@ -307,6 +309,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, { struct cs_etm_packet_queue *packet_queue; u64 converted_timestamp; + u64 estimated_first_ts; /* First get the packet queue for this traceID */ packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id); @@ -325,7 +328,12 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * Function do_soft_timestamp() will report the value to the front end, * hence asking the decoder to keep decoding rather than stopping. */ - if (packet_queue->cs_timestamp) { + if (packet_queue->next_cs_timestamp) { + /* + * What was next is now where new ranges start from, overwriting + * any previous estimate in cs_timestamp + */ + packet_queue->cs_timestamp = packet_queue->next_cs_timestamp; packet_queue->next_cs_timestamp = converted_timestamp; return OCSD_RESP_CONT; } @@ -355,10 +363,12 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * or a discontinuity. Since timestamps packets are generated *after* * range packets have been generated, we need to estimate the time at * which instructions started by subtracting the number of instructions - * executed to the timestamp. + * executed to the timestamp. Don't estimate earlier than the last used + * timestamp though. */ - packet_queue->cs_timestamp = converted_timestamp - - (packet_queue->instr_count / INSTR_PER_NS); + estimated_first_ts = converted_timestamp - + (packet_queue->instr_count / INSTR_PER_NS); + packet_queue->cs_timestamp = max(packet_queue->cs_timestamp, estimated_first_ts); } packet_queue->next_cs_timestamp = converted_timestamp; packet_queue->instr_count = 0; @@ -373,7 +383,6 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, static void cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) { - packet_queue->cs_timestamp = 0; packet_queue->next_cs_timestamp = 0; packet_queue->instr_count = 0; } From 7158005b4eb350900b3cce44b013e685869d0f08 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 23 Jan 2023 14:02:24 +0530 Subject: [PATCH 037/114] perf test: Switch basic bpf filtering test to use syscall tracepoint BPF filtering tests can sometime fail. Running the test in verbose mode shows the following: $ sudo perf test 42 42: BPF filter : 42.1: Basic BPF filtering : FAILED! 42.2: BPF pinning : Skip 42.3: BPF prologue generation : Skip $ perf --version perf version 4.18.0-425.3.1.el8.ppc64le $ sudo perf test -v 42 42: BPF filter : 42.1: Basic BPF filtering : --- start --- test child forked, pid 711060 ... bpf: config 'func=do_epoll_wait' is ok Looking at the vmlinux_path (8 entries long) Using /usr/lib/debug/lib/modules/4.18.0-425.3.1.el8.ppc64le/vmlinux for symbols Open Debuginfo file: /usr/lib/debug/.build-id/81/56f5a07f92ccb62c5600ba0e4aacfb5f3a7534.debug Try to find probe point from debuginfo. Matched function: do_epoll_wait [4ef8cb0] found inline addr: 0xc00000000061dbe4 Probe point found: __se_compat_sys_epoll_pwait+196 found inline addr: 0xc00000000061d9f4 Probe point found: __se_sys_epoll_pwait+196 found inline addr: 0xc00000000061d824 Probe point found: __se_sys_epoll_wait+36 Found 3 probe_trace_events. Opening /sys/kernel/tracing//kprobe_events write=1 ... BPF filter result incorrect, expected 56, got 56 samples test child finished with -1 ---- end ---- BPF filter subtest 1: FAILED! The statement above about the result being incorrect looks weird, and it is due to that particular perf build missing commit 3e11300cdfd5f1 ("perf test: Fix bpf test sample mismatch reporting"). In reality, due to commit 4b04e0decd2518 ("perf test: Fix basic bpf filtering test"), perf expects there to be 56*3 samples. However, the number of samples we receive is going to be dependent on where the probes are installed, which is dependent on where do_epoll_wait gets inlined. On s390x, it looks like probes at all the inlined locations are hit. But, that is not the case on ppc64le. Fix this by switching the test to instead use the syscall tracepoint. This ensures that we will only ever install a single event enabling us to reliably determine the sample count. Reported-by: Disha Goel Signed-off-by: Naveen N. Rao Cc: Kajol Jain Cc: bpf@vger.kernel.org Link: http://lore.kernel.org/lkml/20230123083224.276404-1-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf-script-example.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c index 7981c69ed1b4..b638cc99d5ae 100644 --- a/tools/perf/tests/bpf-script-example.c +++ b/tools/perf/tests/bpf-script-example.c @@ -43,7 +43,7 @@ struct { __type(value, int); } flip_table SEC(".maps"); -SEC("func=do_epoll_wait") +SEC("syscalls:sys_enter_epoll_pwait") int bpf_func__SyS_epoll_pwait(void *ctx) { int ind =0; From fc5d836c6795a1e1991d2611a2338ffe06588b8a Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Sun, 22 Jan 2023 13:20:32 +0100 Subject: [PATCH 038/114] perf: Various spelling fixes Fix various spelling errors as reported by Debian's lintian tool. "amount of times" -> "number of times" ocurrence -> occurrence upto -> up to Signed-off-by: Diederik de Haas Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230122122034.48020-1-didi.debian@cknow.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-bench.txt | 2 +- tools/perf/builtin-bench.c | 2 +- tools/perf/builtin-script.c | 2 +- tools/perf/util/evswitch.h | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index a0529c7fa5ef..f04f0eaded98 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -18,7 +18,7 @@ COMMON OPTIONS -------------- -r:: --repeat=:: -Specify amount of times to repeat the run (default 10). +Specify number of times to repeat the run (default 10). -f:: --format=:: diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 334ab897aae3..bd4fd94a2ce0 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -150,7 +150,7 @@ unsigned int bench_repeat = 10; /* default number of times to repeat the run */ static const struct option bench_options[] = { OPT_STRING('f', "format", &bench_format_str, "default|simple", "Specify the output formatting style"), - OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify amount of times to repeat the run"), + OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify number of times to repeat the run"), OPT_END() }; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 69394ac0a20d..8901acdd7f5b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1301,7 +1301,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, goto out; /* - * Print final block upto sample + * Print final block up to sample * * Due to pipeline delays the LBRs might be missing a branch * or two, which can result in very large or negative blocks diff --git a/tools/perf/util/evswitch.h b/tools/perf/util/evswitch.h index fd30460b6218..8ffdbe526d98 100644 --- a/tools/perf/util/evswitch.h +++ b/tools/perf/util/evswitch.h @@ -22,9 +22,9 @@ bool evswitch__discard(struct evswitch *evswitch, struct evsel *evsel); #define OPTS_EVSWITCH(evswitch) \ OPT_STRING(0, "switch-on", &(evswitch)->on_name, \ - "event", "Consider events after the ocurrence of this event"), \ + "event", "Consider events after the occurrence of this event"), \ OPT_STRING(0, "switch-off", &(evswitch)->off_name, \ - "event", "Stop considering events after the ocurrence of this event"), \ + "event", "Stop considering events after the occurrence of this event"), \ OPT_BOOLEAN(0, "show-on-off-events", &(evswitch)->show_on_off_events, \ "Show the on/off switch events, used with --switch-on and --switch-off") From f1942108461d31f44ecd0ea14bddfe3822a2629a Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 19 Jan 2023 19:57:19 +0530 Subject: [PATCH 039/114] perf test buildid: Fix shell string substitutions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The perf test named “build id cache operations” skips with below error on some distros: <<>> 78: build id cache operations : test child forked, pid 111101 WARNING: wine not found. PE binaries will not be run. test binaries: /tmp/perf.ex.SHA1.PKz /tmp/perf.ex.MD5.Gt3 ./tests/shell/../pe-file.exe DEBUGINFOD_URLS= Adding 4abd406f041feb4f10ecde3fc30fd0639e1a91cb /tmp/perf.ex.SHA1.PKz: Ok build id: 4abd406f041feb4f10ecde3fc30fd0639e1a91cb ./tests/shell/buildid.sh: 69: ./tests/shell/buildid.sh: Bad substitution test child finished with -2 build id cache operations: Skip <<>> The test script "tests/shell/buildid.sh" uses some of the string substitution ways which are supported in bash, but not in "sh" or other shells. Above error on line number 69 that reports "Bad substitution" is: <<>> link=${build_id_dir}/.build-id/${id:0:2}/${id:2} <<>> Here the way of getting first two characters from id ie, ${id:0:2} and similarly expressions like ${id:2} is not recognised in "sh". So the line errors and instead of hitting failure, the test gets skipped as shown in logs. So the syntax issue causes test not to be executed in such cases. Similarly usage : "${@: -1}" [ to pick last argument passed to a function] in “test_record” doesn’t work in all distros. Fix this by using alternative way with shell substitution to pick required characters from the string. Also fix the usage of “${@: -1}” to work in all cases. Another usage in “test_record” is: <<>> ${perf} record --buildid-all -o ${data} $@ &> ${log} <<>> This causes the 'perf record' to start in background and Results in the data file not being created by the time "check" function is invoked. Below log shows 'perf record' result getting displayed after the call to "check" function. <<>> running: perf record /tmp/perf.ex.SHA1.EAU build id: 4abd406f041feb4f10ecde3fc30fd0639e1a91cb link: /tmp/perf.debug.mLT/.build-id/4a/bd406f041feb4f10ecde3fc30fd0639e1a91cb failed: link /tmp/perf.debug.mLT/.build-id/4a/bd406f041feb4f10ecde3fc30fd0639e1a91cb does not exist test child finished with -1 build id cache operations: FAILED! root@machine:~/athira/linux/tools/perf# Couldn't synthesize bpf events. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.010 MB /tmp/perf.data.bFF ] <<>> Fix this by redirecting output instead of using “&” which starts the command in background. Reviewed-by: David Laight Signed-off-by: Athira Jajeev Tested-by: Disha Goel Acked-by: Ian Rogers Cc: Andi Kleen Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230119142719.32628-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/buildid.sh | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh index aaf851108ca3..0ce22ea0a7f1 100755 --- a/tools/perf/tests/shell/buildid.sh +++ b/tools/perf/tests/shell/buildid.sh @@ -66,7 +66,9 @@ check() esac echo "build id: ${id}" - link=${build_id_dir}/.build-id/${id:0:2}/${id:2} + id_file=${id#??} + id_dir=${id%$id_file} + link=$build_id_dir/.build-id/$id_dir/$id_file echo "link: ${link}" if [ ! -h $link ]; then @@ -74,7 +76,7 @@ check() exit 1 fi - file=${build_id_dir}/.build-id/${id:0:2}/`readlink ${link}`/elf + file=${build_id_dir}/.build-id/$id_dir/`readlink ${link}`/elf echo "file: ${file}" # Check for file permission of original file @@ -130,20 +132,22 @@ test_record() { data=$(mktemp /tmp/perf.data.XXX) build_id_dir=$(mktemp -d /tmp/perf.debug.XXX) - log=$(mktemp /tmp/perf.log.XXX) + log_out=$(mktemp /tmp/perf.log.out.XXX) + log_err=$(mktemp /tmp/perf.log.err.XXX) perf="perf --buildid-dir ${build_id_dir}" echo "running: perf record $@" - ${perf} record --buildid-all -o ${data} $@ &> ${log} + ${perf} record --buildid-all -o ${data} $@ 1>${log_out} 2>${log_err} if [ $? -ne 0 ]; then echo "failed: record $@" - echo "see log: ${log}" + echo "see log: ${log_err}" exit 1 fi - check ${@: -1} + args="$*" + check ${args##* } - rm -f ${log} + rm -f ${log_out} ${log_err} rm -rf ${build_id_dir} rm -rf ${data} } From 0b58d89b1e7ca46ca4f7457a6aacb2ce6ce75051 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 Jan 2023 14:49:18 -0300 Subject: [PATCH 040/114] perf tools: Add Ian Rogers to MAINTAINERS as a reviewer Ian has been reviewing perf tooling patches consistently for a long time, so lets reflect that in the MAINTAINERS file so that contributors add him to the CC list in patch submissions. Reviewed-by: Ian Rogers Acked-by: Ingo Molnar Acked-by: Jiri Olsa Acked-by: Namhyung Kim Acked-by: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f781f936ae35..7060ba8c6146 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16418,6 +16418,7 @@ R: Mark Rutland R: Alexander Shishkin R: Jiri Olsa R: Namhyung Kim +R: Ian Rogers L: linux-perf-users@vger.kernel.org L: linux-kernel@vger.kernel.org S: Supported From 6bc75b4c9042325e62fe43f73161b17b5687be6c Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 24 Jan 2023 11:02:20 +0000 Subject: [PATCH 041/114] perf cs-etm: Improve missing sink warning message Make the sink error message more similar to the event error message that reminds about missing kernel support. The available sinks are also determined by the hardware so mention that too. Also, usually it's not necessary to specify the sink, so add that as a hint. Now the error for a made up sink looks like this: $ perf record -e cs_etm/@abc/ Couldn't find sink "abc" on event cs_etm/@abc/. Missing kernel or device support? Hint: An appropriate sink will be picked automatically if one isn't is specified. For any error other than ENOENT, the same message as before is displayed. Signed-off-by: James Clark Acked-by: Suzuki Poulouse Suggested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/ec7502e6-b406-3997-c2a5-24f98e5c4854@arm.com Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230124110220.460551-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 481e170cd3f1..7f71c8a237ff 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -283,9 +283,15 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, ret = perf_pmu__scan_file(pmu, path, "%x", &hash); if (ret != 1) { - pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n", - sink, evsel__name(evsel), errno, - str_error_r(errno, msg, sizeof(msg))); + if (errno == ENOENT) + pr_err("Couldn't find sink \"%s\" on event %s\n" + "Missing kernel or device support?\n\n" + "Hint: An appropriate sink will be picked automatically if one isn't specified.\n", + sink, evsel__name(evsel)); + else + pr_err("Failed to set sink \"%s\" on event %s with %d (%s)\n", + sink, evsel__name(evsel), errno, + str_error_r(errno, msg, sizeof(msg))); return ret; } From 86569c0ab166abdd268032a25ecd369cf0959d2b Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 24 Jan 2023 14:59:29 +0000 Subject: [PATCH 042/114] perf mem/c2c: Document that SPE is used for mem and c2c on ARM Setup is non-trivial so also link to the full SPE docs. Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linux-perf-users@vger.kernel.or Link: https://lore.kernel.org/r/20230124145929.557891-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-c2c.txt | 8 ++++++-- tools/perf/Documentation/perf-mem.txt | 7 ++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index af5c3106f468..4e8c263e1721 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -22,7 +22,11 @@ you to track down the cacheline contentions. On Intel, the tool is based on load latency and precise store facility events provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling with thresholding feature. On AMD, the tool uses IBS op pmu (due to hardware -limitations, perf c2c is not supported on Zen3 cpus). +limitations, perf c2c is not supported on Zen3 cpus). On Arm64 it uses SPE to +sample load and store operations, therefore hardware and kernel support is +required. See linkperf:perf-arm-spe[1] for a setup guide. Due to the +statistical nature of Arm SPE sampling, not every memory operation will be +sampled. These events provide: - memory address of the access @@ -333,4 +337,4 @@ Check Joe's blog on c2c tool for detailed use case explanation: SEE ALSO -------- -linkperf:perf-record[1], linkperf:perf-mem[1] +linkperf:perf-record[1], linkperf:perf-mem[1], linkperf:perf-arm-spe[1] diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 005c95580b1e..19862572e3f2 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -23,6 +23,11 @@ Note that on Intel systems the memory latency reported is the use-latency, not the pure load (or store latency). Use latency includes any pipeline queueing delays in addition to the memory subsystem latency. +On Arm64 this uses SPE to sample load and store operations, therefore hardware +and kernel support is required. See linkperf:perf-arm-spe[1] for a setup guide. +Due to the statistical nature of SPE sampling, not every memory operation will +be sampled. + OPTIONS ------- ...:: @@ -93,4 +98,4 @@ all perf record options. SEE ALSO -------- -linkperf:perf-record[1], linkperf:perf-report[1] +linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-arm-spe[1] From 22e06e682537193a426ef13e57c34d52c45c5581 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 20 Jan 2023 10:58:28 -0800 Subject: [PATCH 043/114] perf buildid: Avoid copy of uninitialized memory build_id__init() only copies the buildid data up to size leaving the rest of the data array uninitialized. Copying the full array during synthesis means the written event contains uninitialized memory. Ensure the size is less that the buffer size and only copy the bytes that were initialized. This was detected by the Clang/LLVM memory sanitizer. v2. Avoids the potential for copying too much as suggested by Arnaldo. Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Rix Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20230120185828.43231-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/synthetic-events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 3ab6a92b1a6d..9ab9308ee80c 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -2219,8 +2219,8 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 len = pos->long_name_len + 1; len = PERF_ALIGN(len, NAME_ALIGN); - memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data)); - ev.build_id.size = pos->bid.size; + ev.build_id.size = min(pos->bid.size, sizeof(pos->bid.data)); + memcpy(&ev.build_id.build_id, pos->bid.data, ev.build_id.size); ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE; ev.build_id.pid = machine->pid; From dfadf8b315f5cf5d0077181d9882a7e1f3da1749 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 25 Jan 2023 18:04:42 +0530 Subject: [PATCH 044/114] perf test: Fix DWARF unwind test by adding non-inline to expected function in a backtrace 'DWARF unwind' 'perf test' can sometimes fail: $ perf test -v 74 Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc 74: Test dwarf unwind : --- start --- test child forked, pid 3785254 Problems creating module maps, continuing anyway... Problems creating module maps, continuing anyway... unwind: test__arch_unwind_sample:ip = 0x102d0ad4c (0x36ad4c) unwind: access_mem addr 0x7fffc33128c8, val 1031c3228, offset 120 unwind: access_mem addr 0x7fffc33128d0, val 12427cc70, offset 128 unwind: test_dwarf_unwind__krava_3:ip = 0x102b8768b (0x1e768b) unwind: access_mem addr 0x7fffc3313048, val 7fffc3313050, offset 2040 unwind: access_mem addr 0x7fffc3313060, val 102b8777c, offset 2064 unwind: test_dwarf_unwind__krava_2:ip = 0x102b8770b (0x1e770b) unwind: access_mem addr 0x7fffc3313088, val 7fffc3313090, offset 2104 unwind: access_mem addr 0x7fffc33130a0, val 102b87890, offset 2128 unwind: test_dwarf_unwind__krava_1:ip = 0x102b8777b (0x1e777b) unwind: access_mem addr 0x7fffc3313108, val 10323a274, offset 2232 unwind: access_mem addr 0x7fffc3313110, val ffffffffffffffff, offset 2240 unwind: access_mem addr 0x7fffc3313118, val 102c08ed0, offset 2248 unwind: access_mem addr 0x7fffc3313120, val 1031db000, offset 2256 unwind: access_mem addr 0x7fffc3313128, val 7fffc3313130, offset 2264 unwind: access_mem addr 0x7fffc3313140, val 102b45ee8, offset 2288 unwind: '':ip = 0x102b8788f (0x1e788f) failed: got unresolved address 0x102b8788f unwind: failed with 'no error' got wrong number of stack entries 0 != 8 test child finished with -1 ---- end ---- Test dwarf unwind: FAILED! We expect to resolve test__dwarf_unwind as the last symbol, but that function can be optimized away: $ objdump -tT /usr/bin/perf | grep dwarf_unwind 000000000083b018 g DO .data 0000000000000040 Base tests__dwarf_unwind 00000000001e7750 g DF .text 0000000000000068 Base 0x60 test_dwarf_unwind__krava_1 00000000001e76e0 g DF .text 0000000000000068 Base 0x60 test_dwarf_unwind__krava_2 00000000001e7620 g DF .text 00000000000000b4 Base 0x60 test_dwarf_unwind__krava_3 00000000001e74f0 g DF .text 0000000000000128 Base 0x60 test_dwarf_unwind__compare 00000000001e7350 g DF .text 000000000000019c Base 0x60 test_dwarf_unwind__thread 000000000083b000 g DO .data 0000000000000018 Base suite__dwarf_unwind Fix this similar to commit fdf7c49c200d1b99 ("perf tests: Fix dwarf unwind for stripped binaries") by marking the function as a global and adding the 'noinline' attribute to it. With this patch: $ objdump -tT perf | grep dwarf_unwind 000000000083b018 g DO .data 0000000000000040 Base tests__dwarf_unwind 00000000001e80f0 g DF .text 0000000000000068 Base 0x60 test_dwarf_unwind__krava_1 00000000001e8080 g DF .text 0000000000000068 Base 0x60 test_dwarf_unwind__krava_2 00000000001e7fc0 g DF .text 00000000000000b4 Base 0x60 test_dwarf_unwind__krava_3 00000000001e7e90 g DF .text 0000000000000128 Base 0x60 test_dwarf_unwind__compare 00000000001e7cf0 g DF .text 000000000000019c Base 0x60 test_dwarf_unwind__thread 00000000001e8160 g DF .text 0000000000000248 Base 0x60 test__dwarf_unwind 000000000083b000 g DO .data 0000000000000018 Base suite__dwarf_unwind $ ./perf test 74 74: Test dwarf unwind : Ok Reported-by: Disha Goel Signed-off-by: Naveen N. Rao Link: http://lore.kernel.org/lkml/20230125123442.107156-1-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/dwarf-unwind.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index afdca7f2959f..ee983b677a6a 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -67,6 +67,7 @@ int test_dwarf_unwind__compare(void *p1, void *p2); int test_dwarf_unwind__krava_3(struct thread *thread); int test_dwarf_unwind__krava_2(struct thread *thread); int test_dwarf_unwind__krava_1(struct thread *thread); +int test__dwarf_unwind(struct test_suite *test, int subtest); #define MAX_STACK 8 @@ -195,8 +196,8 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_1(struct thread *th return ret; } -static int test__dwarf_unwind(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) +noinline int test__dwarf_unwind(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) { struct machine *machine; struct thread *thread; From c6535b6ba93477c491e3816af5eed845813c6f3b Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Fri, 20 Jan 2023 15:37:06 +0000 Subject: [PATCH 045/114] perf cs-etm: Update decoder code for OpenCSD version 1.4 OpenCSD version 1.4 is released with support for FEAT_ITE. This adds a new packet type, with associated output element ID in the packet type enum - OCSD_GEN_TRC_ELEM_INSTRUMENTATION. As we just ignore this packet in perf, add to the switch statement to avoid the "enum not handled in switch error", but conditionally so as not to break the perf build for older OpenCSD installations. Reviewed-by: James Clark Signed-off-by: Mike Leach Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120153706.20388-1-mike.leach@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 63afa2d05b46..d0e521dfcf35 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -644,6 +644,9 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( case OCSD_GEN_TRC_ELEM_CUSTOM: case OCSD_GEN_TRC_ELEM_SYNC_MARKER: case OCSD_GEN_TRC_ELEM_MEMTRANS: +#if (OCSD_VER_NUM >= 0x010400) + case OCSD_GEN_TRC_ELEM_INSTRUMENTATION: +#endif default: break; } From 1746212daeba95e9ae1639227dc0c3591d41deeb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 30 Jan 2023 18:33:47 -0800 Subject: [PATCH 046/114] perf inject: Use perf_data__read() for auxtrace In copy_bytes(), it reads the data from the (input) fd and writes it to the output file. But it does with the read(2) unconditionally which caused a problem of mixing buffered vs unbuffered I/O together. You can see the problem when using pipes. $ perf record -e intel_pt// -o- true | perf inject -b > /dev/null [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] 0x45c0 [0x30]: failed to process type: 71 It should use perf_data__read() to honor the 'use_stdio' setting. Fixes: 601366678c93618f ("perf data: Allow to use stdio functions for pipe mode") Reviewed-by: Adrian Hunter Reviewed-by: James Clark Signed-off-by: Namhyung Kim Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20230131023350.1903992-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 3f4e4dd5abf3..f8182417b734 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -215,14 +215,14 @@ static int perf_event__repipe_event_update(struct perf_tool *tool, #ifdef HAVE_AUXTRACE_SUPPORT -static int copy_bytes(struct perf_inject *inject, int fd, off_t size) +static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size) { char buf[4096]; ssize_t ssz; int ret; while (size > 0) { - ssz = read(fd, buf, min(size, (off_t)sizeof(buf))); + ssz = perf_data__read(data, buf, min(size, (off_t)sizeof(buf))); if (ssz < 0) return -errno; ret = output_bytes(inject, buf, ssz); @@ -260,7 +260,7 @@ static s64 perf_event__repipe_auxtrace(struct perf_session *session, ret = output_bytes(inject, event, event->header.size); if (ret < 0) return ret; - ret = copy_bytes(inject, perf_data__fd(session->data), + ret = copy_bytes(inject, session->data, event->auxtrace.size); } else { ret = output_bytes(inject, event, From aeb802f872a7c42e4381f36041e77d1745908255 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 30 Jan 2023 18:33:48 -0800 Subject: [PATCH 047/114] perf intel-pt: Do not try to queue auxtrace data on pipe When it processes AUXTRACE_INFO, it calls to auxtrace_queue_data() to collect AUXTRACE data first. That won't work with pipe since it needs lseek() to read the scattered aux data. $ perf record -o- -e intel_pt// true | perf report -i- --itrace=i100 # To display the perf.data header info, please use --header/--header-only options. # 0x4118 [0xa0]: failed to process type: 70 Error: failed to process sample For the pipe mode, it can handle the aux data as it gets. But there's no guarantee it can get the aux data in time. So the following warning will be shown at the beginning: WARNING: Intel PT with pipe mode is not recommended. The output cannot relied upon. In particular, time stamps and the order of events may be incorrect. Fixes: dbd134322e74f19d ("perf intel-pt: Add support for decoding AUX area samples") Reviewed-by: Adrian Hunter Reviewed-by: James Clark Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20230131023350.1903992-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-intel-pt.txt | 30 ++++++++++++++++++++++ tools/perf/util/auxtrace.c | 3 +++ tools/perf/util/intel-pt.c | 6 +++++ 3 files changed, 39 insertions(+) diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 7b6ccd2fa3bf..9d485a9cdb19 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -1821,6 +1821,36 @@ Can be compiled and traced: $ +Pipe mode +--------- +Pipe mode is a problem for Intel PT and possibly other auxtrace users. +It's not recommended to use a pipe as data output with Intel PT because +of the following reason. + +Essentially the auxtrace buffers do not behave like the regular perf +event buffers. That is because the head and tail are updated by +software, but in the auxtrace case the data is written by hardware. +So the head and tail do not get updated as data is written. + +In the Intel PT case, the head and tail are updated only when the trace +is disabled by software, for example: + - full-trace, system wide : when buffer passes watermark + - full-trace, not system-wide : when buffer passes watermark or + context switches + - snapshot mode : as above but also when a snapshot is made + - sample mode : as above but also when a sample is made + +That means finished-round ordering doesn't work. An auxtrace buffer +can turn up that has data that extends back in time, possibly to the +very beginning of tracing. + +For a perf.data file, that problem is solved by going through the trace +and queuing up the auxtrace buffers in advance. + +For pipe mode, the order of events and timestamps can presumably +be messed up. + + EXAMPLE ------- diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c2e323cd7d49..d4b04fa07a11 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1133,6 +1133,9 @@ int auxtrace_queue_data(struct perf_session *session, bool samples, bool events) if (auxtrace__dont_decode(session)) return 0; + if (perf_data__is_pipe(session->data)) + return 0; + if (!session->auxtrace || !session->auxtrace->queue_data) return -EINVAL; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 6d3921627e33..b8b29756fbf1 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -4379,6 +4379,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_setup_pebs_events(pt); + if (perf_data__is_pipe(session->data)) { + pr_warning("WARNING: Intel PT with pipe mode is not recommended.\n" + " The output cannot relied upon. In particular,\n" + " timestamps and the order of events may be incorrect.\n"); + } + if (pt->sampling_mode || list_empty(&session->auxtrace_index)) err = auxtrace_queue_data(session, true, true); else From 14bf4784412c9f89a626798026262daa8fc81034 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 30 Jan 2023 18:33:49 -0800 Subject: [PATCH 048/114] perf session: Avoid calling lseek(2) for pipe We should not call lseek(2) for pipes as it won't work. And we already in the proper place to read the data for AUXTRACE. Add the comment like in the PERF_RECORD_HEADER_TRACING_DATA. Reviewed-by: Adrian Hunter Reviewed-by: James Clark Signed-off-by: Namhyung Kim Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20230131023350.1903992-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7c021c6cedb9..fdfe772f2699 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1699,8 +1699,13 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_AUXTRACE_INFO: return tool->auxtrace_info(session, event); case PERF_RECORD_AUXTRACE: - /* setup for reading amidst mmap */ - lseek(fd, file_offset + event->header.size, SEEK_SET); + /* + * Setup for reading amidst mmap, but only when we + * are in 'file' mode. The 'pipe' fd is in proper + * place already. + */ + if (!perf_data__is_pipe(session->data)) + lseek(fd, file_offset + event->header.size, SEEK_SET); return tool->auxtrace(session, event); case PERF_RECORD_AUXTRACE_ERROR: perf_session__auxtrace_error_inc(session, event); From e072b097d29e40784d4d0dd8a017df54af2a2026 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 30 Jan 2023 18:33:50 -0800 Subject: [PATCH 049/114] perf test: Add pipe mode test to the Intel PT test suite The test_pipe() function will check perf report and perf inject with pipe input. Reviewed-by: Adrian Hunter Signed-off-by: Namhyung Kim Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20230131023350.1903992-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_intel_pt.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh index f5ed7b1af419..4ddb17cb83c5 100755 --- a/tools/perf/tests/shell/test_intel_pt.sh +++ b/tools/perf/tests/shell/test_intel_pt.sh @@ -620,6 +620,22 @@ test_event_trace() return 0 } +test_pipe() +{ + echo "--- Test with pipe mode ---" + # Check if it works with pipe + if ! perf_record_no_bpf -o- -e intel_pt//u uname | perf report -q -i- --itrace=i10000 ; then + echo "perf record + report failed with pipe mode" + return 1 + fi + if ! perf_record_no_bpf -o- -e intel_pt//u uname | perf inject -b > /dev/null ; then + echo "perf record + inject failed with pipe mode" + return 1 + fi + echo OK + return 0 +} + count_result() { if [ "$1" -eq 2 ] ; then @@ -647,6 +663,7 @@ test_virtual_lbr || ret=$? ; count_result $ret ; ret=0 test_power_event || ret=$? ; count_result $ret ; ret=0 test_no_tnt || ret=$? ; count_result $ret ; ret=0 test_event_trace || ret=$? ; count_result $ret ; ret=0 +test_pipe || ret=$? ; count_result $ret ; ret=0 cleanup From 84cce3d60c220debf126bd0b6ecbd63af2a46f76 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 1 Feb 2023 23:34:20 +0530 Subject: [PATCH 050/114] perf tests shell: Add check for perf data file in record+probe_libc_inet_pton test The "probe libc's inet_pton & backtrace it with ping" test installs a uprobe and uses perf record/script to check the backtrace. Currently even if the "perf record" fails, the test reports success. Logs below: # ./perf test -v "probe libc's inet_pton & backtrace it with ping" 81: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 304211 failed to open /tmp/perf.data.Btf: No such file or directory test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok Fix this by adding check for presence of perf.data file before proceeding with "perf script". With the patch changes, test reports fail correctly. # ./perf test -v "probe libc's inet_pton & backtrace it with ping" 81: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 304358 FAIL: perf record failed to create "/tmp/perf.data.Uoi" test child finished with -1 ---- end ---- probe libc's inet_pton & backtrace it with ping: FAILED! Signed-off-by: Athira Rajeev Cc: Andi Kleen Cc: Disha Goel Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/r/20230201180421.59640-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 57e7a6a470c9..08cdd902d0cf 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -58,6 +58,11 @@ trace_libc_inet_pton_backtrace() { perf_data=`mktemp -u /tmp/perf.data.XXX` perf_script=`mktemp -u /tmp/perf.script.XXX` perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1 + # check if perf data file got created in above step. + if [ ! -e $perf_data ]; then + printf "FAIL: perf record failed to create \"%s\" \n" "$perf_data" + return 1 + fi perf script -i $perf_data | tac | grep -m1 ^ping -B9 | tac > $perf_script exec 3<$perf_script From 766b0beedb2fa8ef86defc5233f4473b71ca091e Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Wed, 1 Feb 2023 23:34:21 +0530 Subject: [PATCH 051/114] perf tests shell: Fix check for libtracevent support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test “Use vfs_getname probe to get syscall args filenames” fails in environment with missing libtraceevent support as below: 82: Use vfs_getname probe to get syscall args filenames : --- start --- test child forked, pid 304726 Recording open file: event syntax error: 'probe:vfs_getname*' \___ unsupported tracepoint libtraceevent is necessary for tracepoint support Run 'perf list' for a list of valid events Usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events test child finished with -1 ---- end ---- Use vfs_getname probe to get syscall args filenames: FAILED! The environment has debuginfo but is missing the libtraceevent devel. Hence perf is compiled without libtraceevent support. The test tries to add probe “probe:vfs_getname” and then uses it with “perf record”. This fails at function “parse_events_add_tracepoint" due to missing libtraceevent. Similarly "probe libc's inet_pton & backtrace it with ping" test slso fails with same reason. Add a function in 'perf test shell' library to check if perf record with —dry-run reports any error on missing support for libtraceevent. Update both the tests to use this new function “skip_no_probe_record_support” before proceeding With using probe point via perf builtin record. With the change, 82: Use vfs_getname probe to get syscall args filenames : --- start --- test child forked, pid 305014 Recording open file: libtraceevent is necessary for tracepoint support test child finished with -2 ---- end ---- Use vfs_getname probe to get syscall args filenames: Skip 81: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 305036 libtraceevent is necessary for tracepoint support test child finished with -2 ---- end ---- probe libc's inet_pton & backtrace it with ping: Skip Signed-off-by: Athira Rajeev Cc: Andi Kleen Cc: Disha Goel Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: kjain@linux.ibm.com, Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/r/20230201180421.59640-2-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/probe_vfs_getname.sh | 8 ++++++++ tools/perf/tests/shell/record+probe_libc_inet_pton.sh | 6 ++++++ tools/perf/tests/shell/record+script_probe_vfs_getname.sh | 3 +++ 3 files changed, 17 insertions(+) diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index ed0a3972c4c8..60c5e34f90c4 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -22,3 +22,11 @@ skip_if_no_debuginfo() { add_probe_vfs_getname -v 2>&1 | grep -E -q "^(Failed to find the path for the kernel|Debuginfo-analysis is not supported)|(file has no debug information)" && return 2 return 1 } + +# check if perf is compiled with libtraceevent support +skip_no_probe_record_support() { + if [ $had_vfs_getname -eq 1 ] ; then + perf record --dry-run -e $1 2>&1 | grep "libtraceevent is necessary for tracepoint support" && return 2 + return 1 + fi +} diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 08cdd902d0cf..b4149b2db4c6 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -11,6 +11,7 @@ # Arnaldo Carvalho de Melo , 2017 . $(dirname $0)/lib/probe.sh +. $(dirname $0)/lib/probe_vfs_getname.sh libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g') nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254 @@ -57,6 +58,11 @@ trace_libc_inet_pton_backtrace() { perf_data=`mktemp -u /tmp/perf.data.XXX` perf_script=`mktemp -u /tmp/perf.script.XXX` + + # Check presence of libtraceevent support to run perf record + skip_no_probe_record_support "$event_name/$eventattr/" + [ $? -eq 2 ] && return 2 + perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1 # check if perf data file got created in above step. if [ ! -e $perf_data ]; then diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index 7f83b2715b9a..1341437e1bd9 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -17,6 +17,9 @@ skip_if_no_perf_probe || exit 2 record_open_file() { echo "Recording open file:" + # Check presence of libtraceevent support to run perf record + skip_no_probe_record_support "probe:vfs_getname*" + [ $? -eq 2 ] && return 2 perf record -o ${perfdata} -e probe:vfs_getname\* touch $file } From 66fe2d53a067d343980048bf5c9df3e852a9e436 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 31 Jan 2023 15:16:17 +0200 Subject: [PATCH 052/114] perf symbols: Correct plt entry sizes for x86 In 32-bit executables the .plt entry size can be set to 4 when it is really 16. In fact the only sizes used for x86 (32 or 64 bit) are 8 or 16, so check for those and, if not, use the alignment to choose which it is. Example on Ubuntu 22.04 gcc 11.3: Before: $ cat tstpltlib.c void fn1(void) {} void fn2(void) {} void fn3(void) {} void fn4(void) {} $ cat tstplt.c void fn1(void); void fn2(void); void fn3(void); void fn4(void); int main() { fn4(); fn1(); fn2(); fn3(); return 0; } $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ gcc -m32 -Wall -Wextra -shared -o libtstpltlib32.so tstpltlib.c $ gcc -m32 -Wall -Wextra -o tstplt32 tstplt.c -L . -ltstpltlib32 -Wl,-rpath=$(pwd) $ perf record -e intel_pt//u --filter 'filter main @ ./tstplt32' ./tstplt32 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.011 MB perf.data ] $ readelf -SW tstplt32 | grep 'plt\|Name' [Nr] Name Type Addr Off Size ES Flg Lk Inf Al [10] .rel.plt REL 0000041c 00041c 000028 08 AI 5 22 4 [12] .plt PROGBITS 00001030 001030 000060 04 AX 0 0 16 <- ES is 0x04, should be 0x10 [13] .plt.got PROGBITS 00001090 001090 000008 08 AX 0 0 8 $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 17894.383903029: tr strt 0 [unknown] => 565b81cd main+0x0 17894.383903029: tr end call 565b81d4 main+0x7 => 565b80d0 __x86.get_pc_thunk.bx+0x0 17894.383903031: tr strt 0 [unknown] => 565b81d9 main+0xc 17894.383903031: tr end call 565b81df main+0x12 => 565b8070 [unknown] 17894.383903032: tr strt 0 [unknown] => 565b81e4 main+0x17 17894.383903032: tr end call 565b81e4 main+0x17 => 565b8050 [unknown] 17894.383903033: tr strt 0 [unknown] => 565b81e9 main+0x1c 17894.383903033: tr end call 565b81e9 main+0x1c => 565b8080 [unknown] 17894.383903033: tr strt 0 [unknown] => 565b81ee main+0x21 17894.383903033: tr end call 565b81ee main+0x21 => 565b8060 [unknown] 17894.383903237: tr strt 0 [unknown] => 565b81f3 main+0x26 17894.383903237: tr end return 565b81fc main+0x2f => f7c21519 [unknown] After: $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 17894.383903029: tr strt 0 [unknown] => 565b81cd main+0x0 17894.383903029: tr end call 565b81d4 main+0x7 => 565b80d0 __x86.get_pc_thunk.bx+0x0 17894.383903031: tr strt 0 [unknown] => 565b81d9 main+0xc 17894.383903031: tr end call 565b81df main+0x12 => 565b8070 fn4@plt+0x0 17894.383903032: tr strt 0 [unknown] => 565b81e4 main+0x17 17894.383903032: tr end call 565b81e4 main+0x17 => 565b8050 fn1@plt+0x0 17894.383903033: tr strt 0 [unknown] => 565b81e9 main+0x1c 17894.383903033: tr end call 565b81e9 main+0x1c => 565b8080 fn2@plt+0x0 17894.383903033: tr strt 0 [unknown] => 565b81ee main+0x21 17894.383903033: tr end call 565b81ee main+0x21 => 565b8060 fn3@plt+0x0 17894.383903237: tr strt 0 [unknown] => 565b81f3 main+0x26 17894.383903237: tr end return 565b81fc main+0x2f => f7c21519 [unknown] Reviewed-by: Namhyung Kim Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20230131131625.6964-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index aa62735aea7b..9328c162d68f 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -360,14 +360,23 @@ static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, *plt_header_size = 128; *plt_entry_size = 32; return true; + case EM_386: + case EM_X86_64: + *plt_entry_size = shdr_plt->sh_entsize; + /* Size is 8 or 16, if not, assume alignment indicates size */ + if (*plt_entry_size != 8 && *plt_entry_size != 16) + *plt_entry_size = shdr_plt->sh_addralign == 8 ? 8 : 16; + *plt_header_size = *plt_entry_size; + break; default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ *plt_header_size = shdr_plt->sh_entsize; *plt_entry_size = shdr_plt->sh_entsize; - if (*plt_entry_size) - return true; - pr_debug("Missing PLT entry size for %s\n", dso->long_name); - return false; + break; } + if (*plt_entry_size) + return true; + pr_debug("Missing PLT entry size for %s\n", dso->long_name); + return false; } /* From b2529f829ad65bf124a6b1c4fcc90093d8f0c9cf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 31 Jan 2023 15:16:18 +0200 Subject: [PATCH 053/114] perf symbols: Add support for x86 .plt.sec The section .plt.sec was originally added for MPX and was first called .plt.bnd. While MPX has been deprecated, .plt.sec is now also used for IBT. On x86_64, IBT may be enabled by default, but can be switched off using gcc option -fcf-protection=none, or switched on by -z ibt or -z ibtplt. On 32-bit, option -z ibt or -z ibtplt will enable IBT. With .plt.sec, calls are made into .plt.sec instead of .plt, so it makes more sense to put the symbols there instead of .plt. A notable difference is that .plt.sec does not have a header entry. For x86, when synthesizing symbols for plt, use offset and entry size of .plt.sec instead of .plt when there is a .plt.sec section. Example on Ubuntu 22.04 gcc 11.3: Before: $ cat tstpltlib.c void fn1(void) {} void fn2(void) {} void fn3(void) {} void fn4(void) {} $ cat tstplt.c void fn1(void); void fn2(void); void fn3(void); void fn4(void); int main() { fn4(); fn1(); fn2(); fn3(); return 0; } $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ gcc -Wall -Wextra -shared -o libtstpltlib.so tstpltlib.c $ gcc -Wall -Wextra -z ibt -o tstplt tstplt.c -L . -ltstpltlib -Wl,-rpath=$(pwd) $ readelf -SW tstplt | grep 'plt\|Name' [Nr] Name Type Address Off Size ES Flg Lk Inf Al [11] .rela.plt RELA 0000000000000698 000698 000060 18 AI 6 24 8 [13] .plt PROGBITS 0000000000001020 001020 000050 10 AX 0 0 16 [14] .plt.got PROGBITS 0000000000001070 001070 000010 10 AX 0 0 16 [15] .plt.sec PROGBITS 0000000000001080 001080 000040 10 AX 0 0 16 $ perf record -e intel_pt//u --filter 'filter main @ ./tstplt' ./tstplt [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.015 MB perf.data ] $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 38970.522546686: tr strt 0 [unknown] => 55fc222a81a9 main+0x0 38970.522546686: tr end call 55fc222a81b1 main+0x8 => 55fc222a80a0 [unknown] 38970.522546687: tr strt 0 [unknown] => 55fc222a81b6 main+0xd 38970.522546687: tr end call 55fc222a81b6 main+0xd => 55fc222a8080 [unknown] 38970.522546688: tr strt 0 [unknown] => 55fc222a81bb main+0x12 38970.522546688: tr end call 55fc222a81bb main+0x12 => 55fc222a80b0 [unknown] 38970.522546688: tr strt 0 [unknown] => 55fc222a81c0 main+0x17 38970.522546688: tr end call 55fc222a81c0 main+0x17 => 55fc222a8090 [unknown] 38970.522546689: tr strt 0 [unknown] => 55fc222a81c5 main+0x1c 38970.522546894: tr end return 55fc222a81cb main+0x22 => 7f3a4dc29d90 __libc_start_call_main+0x80 After: $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 38970.522546686: tr strt 0 [unknown] => 55fc222a81a9 main+0x0 38970.522546686: tr end call 55fc222a81b1 main+0x8 => 55fc222a80a0 fn4@plt+0x0 38970.522546687: tr strt 0 [unknown] => 55fc222a81b6 main+0xd 38970.522546687: tr end call 55fc222a81b6 main+0xd => 55fc222a8080 fn1@plt+0x0 38970.522546688: tr strt 0 [unknown] => 55fc222a81bb main+0x12 38970.522546688: tr end call 55fc222a81bb main+0x12 => 55fc222a80b0 fn2@plt+0x0 38970.522546688: tr strt 0 [unknown] => 55fc222a81c0 main+0x17 38970.522546688: tr end call 55fc222a81c0 main+0x17 => 55fc222a8090 fn3@plt+0x0 38970.522546689: tr strt 0 [unknown] => 55fc222a81c5 main+0x1c 38970.522546894: tr end return 55fc222a81cb main+0x22 => 7f3a4dc29d90 __libc_start_call_main+0x80 Reviewed-by: Namhyung Kim Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20230131131625.6964-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 9328c162d68f..bb1b5cb3ff12 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -379,6 +379,11 @@ static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, return false; } +static bool machine_is_x86(GElf_Half e_machine) +{ + return e_machine == EM_386 || e_machine == EM_X86_64; +} + /* * We need to check if we have a .dynsym, so that we can handle the * .plt, synthesizing its symbols, that aren't on the symtabs (be it @@ -391,8 +396,8 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) uint32_t nr_rel_entries, idx; GElf_Sym sym; u64 plt_offset, plt_header_size, plt_entry_size; - GElf_Shdr shdr_plt; - struct symbol *f; + GElf_Shdr shdr_plt, plt_sec_shdr; + struct symbol *f, *plt_sym; GElf_Shdr shdr_rel_plt, shdr_dynsym; Elf_Data *syms, *symstrs; Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; @@ -422,10 +427,23 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) return 0; /* Add a symbol for .plt header */ - f = symbol__new(shdr_plt.sh_offset, plt_header_size, STB_GLOBAL, STT_FUNC, ".plt"); - if (!f) + plt_sym = symbol__new(shdr_plt.sh_offset, plt_header_size, STB_GLOBAL, STT_FUNC, ".plt"); + if (!plt_sym) goto out_elf_end; - symbols__insert(&dso->symbols, f); + symbols__insert(&dso->symbols, plt_sym); + + /* Only x86 has .plt.sec */ + if (machine_is_x86(ehdr.e_machine) && + elf_section_by_name(elf, &ehdr, &plt_sec_shdr, ".plt.sec", NULL)) { + if (!get_plt_sizes(dso, &ehdr, &plt_sec_shdr, &plt_header_size, &plt_entry_size)) + return 0; + /* Extend .plt symbol to entire .plt */ + plt_sym->end = plt_sym->start + shdr_plt.sh_size; + /* Use .plt.sec offset */ + plt_offset = plt_sec_shdr.sh_offset; + } else { + plt_offset = shdr_plt.sh_offset + plt_header_size; + } scn_dynsym = ss->dynsym; shdr_dynsym = ss->dynshdr; @@ -474,8 +492,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) goto out_elf_end; nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; - plt_offset = shdr_plt.sh_offset; - plt_offset += plt_header_size; ri.is_rela = shdr_rel_plt.sh_type == SHT_RELA; From 78250284b157490122106516470ec6c40ead3986 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 31 Jan 2023 15:16:19 +0200 Subject: [PATCH 054/114] perf symbols: Sort plt relocations for x86 For x86, with the addition of IFUNCs, relocation information becomes disordered with respect to plt. Correct that by sorting the relocations by offset. Example: Before: $ cat tstpltlib.c void fn1(void) {} void fn2(void) {} void fn3(void) {} void fn4(void) {} $ cat tstpltifunc.c #include void thing1(void) { printf("thing1\n"); } void thing2(void) { printf("thing2\n"); } typedef void (*thing_fn_t)(void); thing_fn_t thing_ifunc(void) { int x; if (x & 1) return thing2; return thing1; } void thing(void) __attribute__ ((ifunc ("thing_ifunc"))); void fn1(void); void fn2(void); void fn3(void); void fn4(void); int main() { fn4(); fn1(); thing(); fn2(); fn3(); return 0; } $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ gcc -Wall -Wextra -shared -o libtstpltlib.so tstpltlib.c $ gcc -Wall -Wextra -Wno-uninitialized -o tstpltifunc tstpltifunc.c -L . -ltstpltlib -Wl,-rpath="$(pwd)" $ readelf -rW tstpltifunc | grep -A99 plt Relocation section '.rela.plt' at offset 0x738 contains 8 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000003f98 0000000300000007 R_X86_64_JUMP_SLOT 0000000000000000 puts@GLIBC_2.2.5 + 0 0000000000003fa8 0000000400000007 R_X86_64_JUMP_SLOT 0000000000000000 __stack_chk_fail@GLIBC_2.4 + 0 0000000000003fb0 0000000500000007 R_X86_64_JUMP_SLOT 0000000000000000 fn1 + 0 0000000000003fb8 0000000600000007 R_X86_64_JUMP_SLOT 0000000000000000 fn3 + 0 0000000000003fc0 0000000800000007 R_X86_64_JUMP_SLOT 0000000000000000 fn4 + 0 0000000000003fc8 0000000900000007 R_X86_64_JUMP_SLOT 0000000000000000 fn2 + 0 0000000000003fd0 0000000b00000007 R_X86_64_JUMP_SLOT 0000000000000000 getrandom@GLIBC_2.25 + 0 0000000000003fa0 0000000000000025 R_X86_64_IRELATIVE 125d $ perf record -e intel_pt//u --filter 'filter main @ ./tstpltifunc' ./tstpltifunc thing2 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.029 MB perf.data ] $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 20417.302513948: tr strt 0 [unknown] => 5629a74892be main+0x0 20417.302513948: tr end call 5629a74892c6 main+0x8 => 5629a7489110 fn2@plt+0x0 20417.302513949: tr strt 0 [unknown] => 5629a74892cb main+0xd 20417.302513949: tr end call 5629a74892cb main+0xd => 5629a74890f0 fn3@plt+0x0 20417.302513950: tr strt 0 [unknown] => 5629a74892d0 main+0x12 20417.302513950: tr end call 5629a74892d0 main+0x12 => 5629a74890d0 __stack_chk_fail@plt+0x0 20417.302528114: tr strt 0 [unknown] => 5629a74892d5 main+0x17 20417.302528114: tr end call 5629a74892d5 main+0x17 => 5629a7489120 getrandom@plt+0x0 20417.302528115: tr strt 0 [unknown] => 5629a74892da main+0x1c 20417.302528115: tr end call 5629a74892da main+0x1c => 5629a7489100 fn4@plt+0x0 20417.302528115: tr strt 0 [unknown] => 5629a74892df main+0x21 20417.302528115: tr end return 5629a74892e5 main+0x27 => 7ff14da29d90 __libc_start_call_main+0x80 After: $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 20417.302513948: tr strt 0 [unknown] => 5629a74892be main+0x0 20417.302513948: tr end call 5629a74892c6 main+0x8 => 5629a7489110 fn4@plt+0x0 20417.302513949: tr strt 0 [unknown] => 5629a74892cb main+0xd 20417.302513949: tr end call 5629a74892cb main+0xd => 5629a74890f0 fn1@plt+0x0 20417.302513950: tr strt 0 [unknown] => 5629a74892d0 main+0x12 20417.302513950: tr end call 5629a74892d0 main+0x12 => 5629a74890d0 offset_0x10d0@plt+0x0 20417.302528114: tr strt 0 [unknown] => 5629a74892d5 main+0x17 20417.302528114: tr end call 5629a74892d5 main+0x17 => 5629a7489120 fn2@plt+0x0 20417.302528115: tr strt 0 [unknown] => 5629a74892da main+0x1c 20417.302528115: tr end call 5629a74892da main+0x1c => 5629a7489100 fn3@plt+0x0 20417.302528115: tr strt 0 [unknown] => 5629a74892df main+0x21 20417.302528115: tr end return 5629a74892e5 main+0x27 => 7ff14da29d90 __libc_start_call_main+0x80 Reviewed-by: Namhyung Kim Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20230131131625.6964-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 60 ++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index bb1b5cb3ff12..07cfcf8f40e3 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -324,6 +324,8 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) } struct rel_info { + u32 nr_entries; + u32 *sorted; bool is_rela; Elf_Data *reldata; GElf_Rela rela; @@ -332,6 +334,7 @@ struct rel_info { static u32 get_rel_symidx(struct rel_info *ri, u32 idx) { + idx = ri->sorted ? ri->sorted[idx] : idx; if (ri->is_rela) { gelf_getrela(ri->reldata, idx, &ri->rela); return GELF_R_SYM(ri->rela.r_info); @@ -340,6 +343,49 @@ static u32 get_rel_symidx(struct rel_info *ri, u32 idx) return GELF_R_SYM(ri->rel.r_info); } +static u64 get_rel_offset(struct rel_info *ri, u32 x) +{ + if (ri->is_rela) { + GElf_Rela rela; + + gelf_getrela(ri->reldata, x, &rela); + return rela.r_offset; + } else { + GElf_Rel rel; + + gelf_getrel(ri->reldata, x, &rel); + return rel.r_offset; + } +} + +static int rel_cmp(const void *a, const void *b, void *r) +{ + struct rel_info *ri = r; + u64 a_offset = get_rel_offset(ri, *(const u32 *)a); + u64 b_offset = get_rel_offset(ri, *(const u32 *)b); + + return a_offset < b_offset ? -1 : (a_offset > b_offset ? 1 : 0); +} + +static int sort_rel(struct rel_info *ri) +{ + size_t sz = sizeof(ri->sorted[0]); + u32 i; + + ri->sorted = calloc(ri->nr_entries, sz); + if (!ri->sorted) + return -1; + for (i = 0; i < ri->nr_entries; i++) + ri->sorted[i] = i; + qsort_r(ri->sorted, ri->nr_entries, sz, rel_cmp, ri); + return 0; +} + +static void exit_rel(struct rel_info *ri) +{ + free(ri->sorted); +} + static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, u64 *plt_header_size, u64 *plt_entry_size) { @@ -393,7 +439,7 @@ static bool machine_is_x86(GElf_Half e_machine) */ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) { - uint32_t nr_rel_entries, idx; + uint32_t idx; GElf_Sym sym; u64 plt_offset, plt_header_size, plt_entry_size; GElf_Shdr shdr_plt, plt_sec_shdr; @@ -491,11 +537,18 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) if (symstrs->d_size == 0) goto out_elf_end; - nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; + ri.nr_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; ri.is_rela = shdr_rel_plt.sh_type == SHT_RELA; - for (idx = 0; idx < nr_rel_entries; idx++) { + /* + * x86 doesn't insert IFUNC relocations in .plt order, so sort to get + * back in order. + */ + if (machine_is_x86(ehdr.e_machine) && sort_rel(&ri)) + goto out_elf_end; + + for (idx = 0; idx < ri.nr_entries; idx++) { const char *elf_name = NULL; char *demangled = NULL; @@ -523,6 +576,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) err = 0; out_elf_end: + exit_rel(&ri); if (err == 0) return nr; pr_debug("%s: problems reading %s PLT info.\n", From 05963491c094ca3de397ef0cfe5537b666ae4412 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 31 Jan 2023 15:16:20 +0200 Subject: [PATCH 055/114] perf symbols: Record whether a symbol is an alias for an IFUNC symbol To assist with synthesizing plt symbols for IFUNCs, record whether a symbol is an alias of an IFUNC symbol. Reviewed-by: Namhyung Kim Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20230131131625.6964-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 4 ++++ tools/perf/util/symbol.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a024f06f75d8..d05727fcb30d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -201,10 +201,14 @@ again: continue; if (choose_best_symbol(curr, next) == SYMBOL_A) { + if (next->type == STT_GNU_IFUNC) + curr->ifunc_alias = true; rb_erase_cached(&next->rb_node, symbols); symbol__delete(next); goto again; } else { + if (curr->type == STT_GNU_IFUNC) + next->ifunc_alias = true; nd = rb_next(&curr->rb_node); rb_erase_cached(&curr->rb_node, symbols); symbol__delete(curr); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2fdeb22bd02f..7558735543c2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -64,6 +64,8 @@ struct symbol { u8 inlined:1; /** Has symbol__annotate2 been performed. */ u8 annotate2:1; + /** Symbol is an alias of an STT_GNU_IFUNC */ + u8 ifunc_alias:1; /** Architecture specific. Unused except on PPC where it holds st_other. */ u8 arch_sym; /** The name of length namelen associated with the symbol. */ From b7dbc0be6e4f2a5268d76884d6651e29f95673ea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 31 Jan 2023 15:16:21 +0200 Subject: [PATCH 056/114] perf symbols: Add support for IFUNC symbols for x86_64 For x86_64, the GNU linker is putting IFUNC information in the relocation addend, so use it to try to find a symbol for plt entries that refer to IFUNCs. Example: Before: $ cat tstpltlib.c void fn1(void) {} void fn2(void) {} void fn3(void) {} void fn4(void) {} $ cat tstpltifunc.c #include void thing1(void) { printf("thing1\n"); } void thing2(void) { printf("thing2\n"); } typedef void (*thing_fn_t)(void); thing_fn_t thing_ifunc(void) { int x; if (x & 1) return thing2; return thing1; } void thing(void) __attribute__ ((ifunc ("thing_ifunc"))); void fn1(void); void fn2(void); void fn3(void); void fn4(void); int main() { fn4(); fn1(); thing(); fn2(); fn3(); return 0; } $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ gcc -Wall -Wextra -shared -o libtstpltlib.so tstpltlib.c $ gcc -Wall -Wextra -Wno-uninitialized -o tstpltifunc tstpltifunc.c -L . -ltstpltlib -Wl,-rpath="$(pwd)" $ readelf -rW tstpltifunc | grep -A99 plt Relocation section '.rela.plt' at offset 0x738 contains 8 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000003f98 0000000300000007 R_X86_64_JUMP_SLOT 0000000000000000 puts@GLIBC_2.2.5 + 0 0000000000003fa8 0000000400000007 R_X86_64_JUMP_SLOT 0000000000000000 __stack_chk_fail@GLIBC_2.4 + 0 0000000000003fb0 0000000500000007 R_X86_64_JUMP_SLOT 0000000000000000 fn1 + 0 0000000000003fb8 0000000600000007 R_X86_64_JUMP_SLOT 0000000000000000 fn3 + 0 0000000000003fc0 0000000800000007 R_X86_64_JUMP_SLOT 0000000000000000 fn4 + 0 0000000000003fc8 0000000900000007 R_X86_64_JUMP_SLOT 0000000000000000 fn2 + 0 0000000000003fd0 0000000b00000007 R_X86_64_JUMP_SLOT 0000000000000000 getrandom@GLIBC_2.25 + 0 0000000000003fa0 0000000000000025 R_X86_64_IRELATIVE 125d $ perf record -e intel_pt//u --filter 'filter main @ ./tstpltifunc' ./tstpltifunc thing2 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.016 MB perf.data ] $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 21860.073683659: tr strt 0 [unknown] => 561e212c42be main+0x0 21860.073683659: tr end call 561e212c42c6 main+0x8 => 561e212c4110 fn4@plt+0x0 21860.073683661: tr strt 0 [unknown] => 561e212c42cb main+0xd 21860.073683661: tr end call 561e212c42cb main+0xd => 561e212c40f0 fn1@plt+0x0 21860.073683661: tr strt 0 [unknown] => 561e212c42d0 main+0x12 21860.073683661: tr end call 561e212c42d0 main+0x12 => 561e212c40d0 offset_0x10d0@plt+0x0 21860.073698451: tr strt 0 [unknown] => 561e212c42d5 main+0x17 21860.073698451: tr end call 561e212c42d5 main+0x17 => 561e212c4120 fn2@plt+0x0 21860.073698451: tr strt 0 [unknown] => 561e212c42da main+0x1c 21860.073698451: tr end call 561e212c42da main+0x1c => 561e212c4100 fn3@plt+0x0 21860.073698452: tr strt 0 [unknown] => 561e212c42df main+0x21 21860.073698452: tr end return 561e212c42e5 main+0x27 => 7fb51cc29d90 __libc_start_call_main+0x80 After: $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 21860.073683659: tr strt 0 [unknown] => 561e212c42be main+0x0 21860.073683659: tr end call 561e212c42c6 main+0x8 => 561e212c4110 fn4@plt+0x0 21860.073683661: tr strt 0 [unknown] => 561e212c42cb main+0xd 21860.073683661: tr end call 561e212c42cb main+0xd => 561e212c40f0 fn1@plt+0x0 21860.073683661: tr strt 0 [unknown] => 561e212c42d0 main+0x12 21860.073683661: tr end call 561e212c42d0 main+0x12 => 561e212c40d0 thing_ifunc@plt+0x0 21860.073698451: tr strt 0 [unknown] => 561e212c42d5 main+0x17 21860.073698451: tr end call 561e212c42d5 main+0x17 => 561e212c4120 fn2@plt+0x0 21860.073698451: tr strt 0 [unknown] => 561e212c42da main+0x1c 21860.073698451: tr end call 561e212c42da main+0x1c => 561e212c4100 fn3@plt+0x0 21860.073698452: tr strt 0 [unknown] => 561e212c42df main+0x21 21860.073698452: tr end return 561e212c42e5 main+0x27 => 7fb51cc29d90 __libc_start_call_main+0x80 Reviewed-by: Namhyung Kim Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20230131131625.6964-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 38 +++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 07cfcf8f40e3..a002fc0bea03 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -381,6 +381,42 @@ static int sort_rel(struct rel_info *ri) return 0; } +/* + * For x86_64, the GNU linker is putting IFUNC information in the relocation + * addend. + */ +static bool addend_may_be_ifunc(GElf_Ehdr *ehdr, struct rel_info *ri) +{ + return ehdr->e_machine == EM_X86_64 && ri->is_rela && + GELF_R_TYPE(ri->rela.r_info) == R_X86_64_IRELATIVE; +} + +static bool get_ifunc_name(Elf *elf, struct dso *dso, GElf_Ehdr *ehdr, + struct rel_info *ri, char *buf, size_t buf_sz) +{ + u64 addr = ri->rela.r_addend; + struct symbol *sym; + GElf_Phdr phdr; + + if (!addend_may_be_ifunc(ehdr, ri)) + return false; + + if (elf_read_program_header(elf, addr, &phdr)) + return false; + + addr -= phdr.p_vaddr - phdr.p_offset; + + sym = dso__find_symbol_nocache(dso, addr); + + /* Expecting the address to be an IFUNC or IFUNC alias */ + if (!sym || sym->start != addr || (sym->type != STT_GNU_IFUNC && !sym->ifunc_alias)) + return false; + + snprintf(buf, buf_sz, "%s@plt", sym->name); + + return true; +} + static void exit_rel(struct rel_info *ri) { free(ri->sorted); @@ -560,7 +596,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) elf_name = demangled; if (*elf_name) snprintf(sympltname, sizeof(sympltname), "%s@plt", elf_name); - else + else if (!get_ifunc_name(elf, dso, &ehdr, &ri, sympltname, sizeof(sympltname))) snprintf(sympltname, sizeof(sympltname), "offset_%#" PRIx64 "@plt", plt_offset); free(demangled); From 60fbb3e49abe8421b677d5eee32fe7fb27b05e3b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 31 Jan 2023 15:16:22 +0200 Subject: [PATCH 057/114] perf symbols: Allow for .plt without header A static executable can have a .plt due to the presence of IFUNCs. In that case the .plt does not have a header. Check for whether there is a header by comparing the number of entries to the number of relocation entries. Reviewed-by: Namhyung Kim Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20230131131625.6964-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a002fc0bea03..8f7802097c72 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -489,6 +489,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) Elf *elf; int nr = 0, err = -1; struct rel_info ri = { .is_rela = false }; + bool lazy_plt; elf = ss->elf; ehdr = ss->ehdr; @@ -523,8 +524,10 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) plt_sym->end = plt_sym->start + shdr_plt.sh_size; /* Use .plt.sec offset */ plt_offset = plt_sec_shdr.sh_offset; + lazy_plt = false; } else { - plt_offset = shdr_plt.sh_offset + plt_header_size; + plt_offset = shdr_plt.sh_offset; + lazy_plt = true; } scn_dynsym = ss->dynsym; @@ -577,6 +580,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) ri.is_rela = shdr_rel_plt.sh_type == SHT_RELA; + if (lazy_plt) { + /* + * Assume a .plt with the same number of entries as the number + * of relocation entries is not lazy and does not have a header. + */ + if (ri.nr_entries * plt_entry_size == shdr_plt.sh_size) + dso__delete_symbol(dso, plt_sym); + else + plt_offset += plt_header_size; + } + /* * x86 doesn't insert IFUNC relocations in .plt order, so sort to get * back in order. From a1ab12856f27f37465daf640dce8df044ecbc64b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 31 Jan 2023 15:16:23 +0200 Subject: [PATCH 058/114] perf symbols: Allow for static executables with .plt A statically linked executable can have a .plt due to IFUNCs, in which case .symtab is used not .dynsym. Check the section header link to see if that is the case, and then use symtab instead. Example: Before: $ cat tstifunc.c #include void thing1(void) { printf("thing1\n"); } void thing2(void) { printf("thing2\n"); } typedef void (*thing_fn_t)(void); thing_fn_t thing_ifunc(void) { int x; if (x & 1) return thing2; return thing1; } void thing(void) __attribute__ ((ifunc ("thing_ifunc"))); int main() { thing(); return 0; } $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ gcc -static -Wall -Wextra -Wno-uninitialized -o tstifuncstatic tstifunc.c $ readelf -SW tstifuncstatic | grep 'Name\|plt\|dyn' [Nr] Name Type Address Off Size ES Flg Lk Inf Al [ 4] .rela.plt RELA 00000000004002e8 0002e8 000258 18 AI 29 20 8 [ 6] .plt PROGBITS 0000000000401020 001020 000190 00 AX 0 0 16 [20] .got.plt PROGBITS 00000000004c5000 0c4000 0000e0 08 WA 0 0 8 $ perf record -e intel_pt//u --filter 'filter main @ ./tstifuncstatic' ./tstifuncstatic thing1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.008 MB perf.data ] $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 15786.690189535: tr strt 0 [unknown] => 4017cd main+0x0 15786.690189535: tr end call 4017d5 main+0x8 => 401170 [unknown] 15786.690197660: tr strt 0 [unknown] => 4017da main+0xd 15786.690197660: tr end return 4017e0 main+0x13 => 401c1a __libc_start_call_main+0x6a After: $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 15786.690189535: tr strt 0 [unknown] => 4017cd main+0x0 15786.690189535: tr end call 4017d5 main+0x8 => 401170 thing_ifunc@plt+0x0 15786.690197660: tr strt 0 [unknown] => 4017da main+0xd 15786.690197660: tr end return 4017e0 main+0x13 => 401c1a __libc_start_call_main+0x6a Reviewed-by: Namhyung Kim Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20230131131625.6964-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 30 ++++++++++++++++++++---------- tools/perf/util/symsrc.h | 1 + 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 8f7802097c72..9e265a726418 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -483,7 +483,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) GElf_Shdr shdr_rel_plt, shdr_dynsym; Elf_Data *syms, *symstrs; Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; - size_t dynsym_idx; GElf_Ehdr ehdr; char sympltname[1024]; Elf *elf; @@ -530,13 +529,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) lazy_plt = true; } - scn_dynsym = ss->dynsym; - shdr_dynsym = ss->dynshdr; - dynsym_idx = ss->dynsym_idx; - - if (scn_dynsym == NULL) - return 0; - scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, ".rela.plt", NULL); if (scn_plt_rel == NULL) { @@ -550,8 +542,25 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) shdr_rel_plt.sh_type != SHT_REL) return 0; - if (shdr_rel_plt.sh_link != dynsym_idx) + if (!shdr_rel_plt.sh_link) + return 0; + + if (shdr_rel_plt.sh_link == ss->dynsym_idx) { + scn_dynsym = ss->dynsym; + shdr_dynsym = ss->dynshdr; + } else if (shdr_rel_plt.sh_link == ss->symtab_idx) { + /* + * A static executable can have a .plt due to IFUNCs, in which + * case .symtab is used not .dynsym. + */ + scn_dynsym = ss->symtab; + shdr_dynsym = ss->symshdr; + } else { goto out_elf_end; + } + + if (!scn_dynsym) + return 0; /* * Fetch the relocation section to find the idxes to the GOT @@ -1077,8 +1086,9 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64); + ss->symtab_idx = 0; ss->symtab = elf_section_by_name(elf, &ehdr, &ss->symshdr, ".symtab", - NULL); + &ss->symtab_idx); if (ss->symshdr.sh_type != SHT_SYMTAB) ss->symtab = NULL; diff --git a/tools/perf/util/symsrc.h b/tools/perf/util/symsrc.h index 2665b4bde751..edf82028c9e6 100644 --- a/tools/perf/util/symsrc.h +++ b/tools/perf/util/symsrc.h @@ -26,6 +26,7 @@ struct symsrc { GElf_Shdr opdshdr; Elf_Scn *symtab; + size_t symtab_idx; GElf_Shdr symshdr; Elf_Scn *dynsym; From 51a188ad8c2d89c5c5425d0818cc14cdec336df9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 31 Jan 2023 15:16:24 +0200 Subject: [PATCH 059/114] perf symbols: Start adding support for .plt.got for x86 For x86, .plt.got is used, for example, when the address is taken of a dynamically linked function. Start adding support by synthesizing a symbol for each entry. A subsequent patch will attempt to get a better name for the symbol. Example: Before: $ cat tstpltlib.c void fn1(void) {} void fn2(void) {} void fn3(void) {} void fn4(void) {} $ cat tstpltgot.c void fn1(void); void fn2(void); void fn3(void); void fn4(void); void callfn(void (*fn)(void)) { fn(); } int main() { fn4(); fn1(); callfn(fn3); fn2(); fn3(); return 0; } $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ gcc -Wall -Wextra -shared -o libtstpltlib.so tstpltlib.c $ gcc -Wall -Wextra -o tstpltgot tstpltgot.c -L . -ltstpltlib -Wl,-rpath="$(pwd)" $ readelf -SW tstpltgot | grep 'Name\|plt\|dyn' [Nr] Name Type Address Off Size ES Flg Lk Inf Al [ 6] .dynsym DYNSYM 00000000000003d8 0003d8 0000f0 18 A 7 1 8 [ 7] .dynstr STRTAB 00000000000004c8 0004c8 0000c6 00 A 0 0 1 [10] .rela.dyn RELA 00000000000005d8 0005d8 0000d8 18 A 6 0 8 [11] .rela.plt RELA 00000000000006b0 0006b0 000048 18 AI 6 24 8 [13] .plt PROGBITS 0000000000001020 001020 000040 10 AX 0 0 16 [14] .plt.got PROGBITS 0000000000001060 001060 000020 10 AX 0 0 16 [15] .plt.sec PROGBITS 0000000000001080 001080 000030 10 AX 0 0 16 [23] .dynamic DYNAMIC 0000000000003d90 002d90 000210 10 WA 7 0 8 $ perf record -e intel_pt//u --filter 'filter main @ ./tstpltgot , filter callfn @ ./tstpltgot' ./tstpltgot [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.011 MB perf.data ] $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 28393.810326915: tr strt 0 [unknown] => 562350baa1b2 main+0x0 28393.810326915: tr end call 562350baa1ba main+0x8 => 562350baa090 fn4@plt+0x0 28393.810326917: tr strt 0 [unknown] => 562350baa1bf main+0xd 28393.810326917: tr end call 562350baa1bf main+0xd => 562350baa080 fn1@plt+0x0 28393.810326917: tr strt 0 [unknown] => 562350baa1c4 main+0x12 28393.810326917: call 562350baa1ce main+0x1c => 562350baa199 callfn+0x0 28393.810326917: tr end call 562350baa1ad callfn+0x14 => 7f607d36110f fn3+0x0 28393.810326922: tr strt 0 [unknown] => 562350baa1af callfn+0x16 28393.810326922: return 562350baa1b1 callfn+0x18 => 562350baa1d3 main+0x21 28393.810326922: tr end call 562350baa1d3 main+0x21 => 562350baa0a0 fn2@plt+0x0 28393.810326924: tr strt 0 [unknown] => 562350baa1d8 main+0x26 28393.810326924: tr end call 562350baa1d8 main+0x26 => 562350baa060 [unknown] <- call to fn3 via .plt.got 28393.810326925: tr strt 0 [unknown] => 562350baa1dd main+0x2b 28393.810326925: tr end return 562350baa1e3 main+0x31 => 7f607d029d90 __libc_start_call_main+0x80 After: $ perf script --itrace=be --ns -F+flags,-event,+addr,-period,-comm,-tid,-cpu,-dso 28393.810326915: tr strt 0 [unknown] => 562350baa1b2 main+0x0 28393.810326915: tr end call 562350baa1ba main+0x8 => 562350baa090 fn4@plt+0x0 28393.810326917: tr strt 0 [unknown] => 562350baa1bf main+0xd 28393.810326917: tr end call 562350baa1bf main+0xd => 562350baa080 fn1@plt+0x0 28393.810326917: tr strt 0 [unknown] => 562350baa1c4 main+0x12 28393.810326917: call 562350baa1ce main+0x1c => 562350baa199 callfn+0x0 28393.810326917: tr end call 562350baa1ad callfn+0x14 => 7f607d36110f fn3+0x0 28393.810326922: tr strt 0 [unknown] => 562350baa1af callfn+0x16 28393.810326922: return 562350baa1b1 callfn+0x18 => 562350baa1d3 main+0x21 28393.810326922: tr end call 562350baa1d3 main+0x21 => 562350baa0a0 fn2@plt+0x0 28393.810326924: tr strt 0 [unknown] => 562350baa1d8 main+0x26 28393.810326924: tr end call 562350baa1d8 main+0x26 => 562350baa060 offset_0x1060@plt+0x0 28393.810326925: tr strt 0 [unknown] => 562350baa1dd main+0x2b 28393.810326925: tr end return 562350baa1e3 main+0x31 => 7f607d029d90 __libc_start_call_main+0x80 Reviewed-by: Namhyung Kim Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20230131131625.6964-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 9e265a726418..dc6dcf296608 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -466,6 +466,30 @@ static bool machine_is_x86(GElf_Half e_machine) return e_machine == EM_386 || e_machine == EM_X86_64; } +static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf, + GElf_Ehdr *ehdr, + char *buf, size_t buf_sz) +{ + struct symbol *sym; + GElf_Shdr shdr; + Elf_Scn *scn; + size_t i; + + scn = elf_section_by_name(elf, ehdr, &shdr, ".plt.got", NULL); + if (!scn || !shdr.sh_entsize) + return 0; + + for (i = 0; i < shdr.sh_size; i += shdr.sh_entsize) { + snprintf(buf, buf_sz, "offset_%#" PRIx64 "@plt", (u64)shdr.sh_offset + i); + sym = symbol__new(shdr.sh_offset + i, shdr.sh_entsize, STB_GLOBAL, STT_FUNC, buf); + if (!sym) + return -1; + symbols__insert(&dso->symbols, sym); + } + + return 0; +} + /* * We need to check if we have a .dynsym, so that we can handle the * .plt, synthesizing its symbols, that aren't on the symtabs (be it @@ -514,6 +538,11 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) goto out_elf_end; symbols__insert(&dso->symbols, plt_sym); + /* Only x86 has .plt.got */ + if (machine_is_x86(ehdr.e_machine) && + dso__synthesize_plt_got_symbols(dso, elf, &ehdr, sympltname, sizeof(sympltname))) + goto out_elf_end; + /* Only x86 has .plt.sec */ if (machine_is_x86(ehdr.e_machine) && elf_section_by_name(elf, &ehdr, &plt_sec_shdr, ".plt.sec", NULL)) { From ce4c8e7966f317ef2af896778b5fa9105a5cd351 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 31 Jan 2023 15:16:25 +0200 Subject: [PATCH 060/114] perf symbols: Get symbols for .plt.got for x86-64 For x86_64, determine a symbol for .plt.got entries. That requires computing the target offset and finding that in .rela.dyn, which in turn means .rela.dyn needs to be sorted by offset. Example: In this example, the GNU C Library is using .plt.got for malloc and free. Before: $ gcc --version gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 Copyright (C) 2021 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. $ perf record -e intel_pt//u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.027 MB perf.data ] $ perf script --itrace=be --ns -F-event,+addr,-period,-comm,-tid,-cpu > /tmp/cmp1.txt After: $ perf script --itrace=be --ns -F-event,+addr,-period,-comm,-tid,-cpu > /tmp/cmp2.txt $ diff /tmp/cmp1.txt /tmp/cmp2.txt | head -12 15509,15510c15509,15510 < 27046.755390907: 7f0b2943e3ab _nl_normalize_codeset+0x5b (/usr/lib/x86_64-linux-gnu/libc.so.6) => 7f0b29428380 offset_0x28380@plt+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6) < 27046.755390907: 7f0b29428384 offset_0x28380@plt+0x4 (/usr/lib/x86_64-linux-gnu/libc.so.6) => 7f0b294a5120 malloc+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6) --- > 27046.755390907: 7f0b2943e3ab _nl_normalize_codeset+0x5b (/usr/lib/x86_64-linux-gnu/libc.so.6) => 7f0b29428380 malloc@plt+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6) > 27046.755390907: 7f0b29428384 malloc@plt+0x4 (/usr/lib/x86_64-linux-gnu/libc.so.6) => 7f0b294a5120 malloc+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6) 15821,15822c15821,15822 < 27046.755394865: 7f0b2943850c _nl_load_locale_from_archive+0x5bc (/usr/lib/x86_64-linux-gnu/libc.so.6) => 7f0b29428370 offset_0x28370@plt+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6) < 27046.755394865: 7f0b29428374 offset_0x28370@plt+0x4 (/usr/lib/x86_64-linux-gnu/libc.so.6) => 7f0b294a5460 cfree@GLIBC_2.2.5+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6) --- > 27046.755394865: 7f0b2943850c _nl_load_locale_from_archive+0x5bc (/usr/lib/x86_64-linux-gnu/libc.so.6) => 7f0b29428370 free@plt+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6) > 27046.755394865: 7f0b29428374 free@plt+0x4 (/usr/lib/x86_64-linux-gnu/libc.so.6) => 7f0b294a5460 cfree@GLIBC_2.2.5+0x0 (/usr/lib/x86_64-linux-gnu/libc.so.6) Reviewed-by: Namhyung Kim Signed-off-by: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20230131131625.6964-10-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 161 ++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index dc6dcf296608..41882ae8452e 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -466,28 +466,181 @@ static bool machine_is_x86(GElf_Half e_machine) return e_machine == EM_386 || e_machine == EM_X86_64; } +struct rela_dyn { + GElf_Addr offset; + u32 sym_idx; +}; + +struct rela_dyn_info { + struct dso *dso; + Elf_Data *plt_got_data; + u32 nr_entries; + struct rela_dyn *sorted; + Elf_Data *dynsym_data; + Elf_Data *dynstr_data; + Elf_Data *rela_dyn_data; +}; + +static void exit_rela_dyn(struct rela_dyn_info *di) +{ + free(di->sorted); +} + +static int cmp_offset(const void *a, const void *b) +{ + const struct rela_dyn *va = a; + const struct rela_dyn *vb = b; + + return va->offset < vb->offset ? -1 : (va->offset > vb->offset ? 1 : 0); +} + +static int sort_rela_dyn(struct rela_dyn_info *di) +{ + u32 i, n; + + di->sorted = calloc(di->nr_entries, sizeof(di->sorted[0])); + if (!di->sorted) + return -1; + + /* Get data for sorting: the offset and symbol index */ + for (i = 0, n = 0; i < di->nr_entries; i++) { + GElf_Rela rela; + u32 sym_idx; + + gelf_getrela(di->rela_dyn_data, i, &rela); + sym_idx = GELF_R_SYM(rela.r_info); + if (sym_idx) { + di->sorted[n].sym_idx = sym_idx; + di->sorted[n].offset = rela.r_offset; + n += 1; + } + } + + /* Sort by offset */ + di->nr_entries = n; + qsort(di->sorted, n, sizeof(di->sorted[0]), cmp_offset); + + return 0; +} + +static void get_rela_dyn_info(Elf *elf, GElf_Ehdr *ehdr, struct rela_dyn_info *di, Elf_Scn *scn) +{ + GElf_Shdr rela_dyn_shdr; + GElf_Shdr shdr; + + di->plt_got_data = elf_getdata(scn, NULL); + + scn = elf_section_by_name(elf, ehdr, &rela_dyn_shdr, ".rela.dyn", NULL); + if (!scn || !rela_dyn_shdr.sh_link || !rela_dyn_shdr.sh_entsize) + return; + + di->nr_entries = rela_dyn_shdr.sh_size / rela_dyn_shdr.sh_entsize; + di->rela_dyn_data = elf_getdata(scn, NULL); + + scn = elf_getscn(elf, rela_dyn_shdr.sh_link); + if (!scn || !gelf_getshdr(scn, &shdr) || !shdr.sh_link) + return; + + di->dynsym_data = elf_getdata(scn, NULL); + di->dynstr_data = elf_getdata(elf_getscn(elf, shdr.sh_link), NULL); + + if (!di->plt_got_data || !di->dynstr_data || !di->dynsym_data || !di->rela_dyn_data) + return; + + /* Sort into offset order */ + sort_rela_dyn(di); +} + +/* Get instruction displacement from a plt entry for x86_64 */ +static u32 get_x86_64_plt_disp(const u8 *p) +{ + u8 endbr64[] = {0xf3, 0x0f, 0x1e, 0xfa}; + int n = 0; + + /* Skip endbr64 */ + if (!memcmp(p, endbr64, sizeof(endbr64))) + n += sizeof(endbr64); + /* Skip bnd prefix */ + if (p[n] == 0xf2) + n += 1; + /* jmp with 4-byte displacement */ + if (p[n] == 0xff && p[n + 1] == 0x25) { + n += 2; + /* Also add offset from start of entry to end of instruction */ + return n + 4 + le32toh(*(const u32 *)(p + n)); + } + return 0; +} + +static bool get_plt_got_name(GElf_Shdr *shdr, size_t i, + struct rela_dyn_info *di, + char *buf, size_t buf_sz) +{ + struct rela_dyn vi, *vr; + const char *sym_name; + char *demangled; + GElf_Sym sym; + u32 disp; + + if (!di->sorted) + return false; + + disp = get_x86_64_plt_disp(di->plt_got_data->d_buf + i); + if (!disp) + return false; + + /* Compute target offset of the .plt.got entry */ + vi.offset = shdr->sh_offset + di->plt_got_data->d_off + i + disp; + + /* Find that offset in .rela.dyn (sorted by offset) */ + vr = bsearch(&vi, di->sorted, di->nr_entries, sizeof(di->sorted[0]), cmp_offset); + if (!vr) + return false; + + /* Get the associated symbol */ + gelf_getsym(di->dynsym_data, vr->sym_idx, &sym); + sym_name = elf_sym__name(&sym, di->dynstr_data); + demangled = demangle_sym(di->dso, 0, sym_name); + if (demangled != NULL) + sym_name = demangled; + + snprintf(buf, buf_sz, "%s@plt", sym_name); + + free(demangled); + + return *sym_name; +} + static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf, GElf_Ehdr *ehdr, char *buf, size_t buf_sz) { + struct rela_dyn_info di = { .dso = dso }; struct symbol *sym; GElf_Shdr shdr; Elf_Scn *scn; + int err = -1; size_t i; scn = elf_section_by_name(elf, ehdr, &shdr, ".plt.got", NULL); if (!scn || !shdr.sh_entsize) return 0; + if (ehdr->e_machine == EM_X86_64) + get_rela_dyn_info(elf, ehdr, &di, scn); + for (i = 0; i < shdr.sh_size; i += shdr.sh_entsize) { - snprintf(buf, buf_sz, "offset_%#" PRIx64 "@plt", (u64)shdr.sh_offset + i); + if (!get_plt_got_name(&shdr, i, &di, buf, buf_sz)) + snprintf(buf, buf_sz, "offset_%#" PRIx64 "@plt", (u64)shdr.sh_offset + i); sym = symbol__new(shdr.sh_offset + i, shdr.sh_entsize, STB_GLOBAL, STT_FUNC, buf); if (!sym) - return -1; + goto out; symbols__insert(&dso->symbols, sym); } - - return 0; + err = 0; +out: + exit_rela_dyn(&di); + return err; } /* From 3980ee9ad8575915cda3790447af700bb865b493 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 31 Jan 2023 19:17:48 +0530 Subject: [PATCH 061/114] perf probe: Fix usage when libtraceevent is missing While parsing the tracepoint events in parse_events_add_tracepoint() function, code checks for HAVE_LIBTRACEEVENT support. This is needed since libtraceevent is necessary for tracepoint. But while adding probe points, check for LIBTRACEEVENT is not done in case of perf probe. Hence, in environment with missing libtraceevent-devel, it is observed that adding a probe point shows below message though it can't be used via perf record. Example: Adding probe point: ./perf probe 'vfs_getname=getname_flags:72 pathname=result->name:string' Added new event: probe:vfs_getname (on getname_flags:72 with pathname=result->name:string) You can now use it in all perf tools, such as: perf record -e probe:vfs_getname -aR sleep 1 But trying perf record: ./perf record -e probe:vfs_getname -aR sleep 1 event syntax error: 'probe:vfs_getname' \___ unsupported tracepoint libtraceevent is necessary for tracepoint support Run 'perf list' for a list of valid events The builtin tool like perf record needs libtraceevent to parse tracefs. But still the probe can be used by enabling via tracefs. Patch fixes the probe usage message to the user based on presence of libtraceevent. With the fix, # ./perf probe 'pmu:myprobe=schedule' Added new event: pmu:myprobe (on schedule) perf is not linked with libtraceevent, to use the new probe you can use tracefs: cd /sys/kernel/tracing/ echo 1 > events/pmu/myprobe/enable echo 1 > tracing_on cat trace_pipe Before removing the probe, echo 0 > events/pmu/myprobe/enable Signed-off-by: Athira Rajeev Cc: Andi Kleen Cc: Disha Goel Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230131134748.54567-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-probe.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index ed73d0b89ca2..e72f6cea76f7 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -383,9 +383,18 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs) /* Note that it is possible to skip all events because of blacklist */ if (event) { +#ifndef HAVE_LIBTRACEEVENT + pr_info("\nperf is not linked with libtraceevent, to use the new probe you can use tracefs:\n\n"); + pr_info("\tcd /sys/kernel/tracing/\n"); + pr_info("\techo 1 > events/%s/%s/enable\n", group, event); + pr_info("\techo 1 > tracing_on\n"); + pr_info("\tcat trace_pipe\n"); + pr_info("\tBefore removing the probe, echo 0 > events/%s/%s/enable\n", group, event); +#else /* Show how to use the event. */ pr_info("\nYou can now use it in all perf tools, such as:\n\n"); pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event); +#endif } out_cleanup: From 1c249565426e3a9940102c0ba9f63914f7cda73d Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Wed, 25 Jan 2023 10:34:18 -0800 Subject: [PATCH 062/114] perf symbols: Symbol lookup with kcore can fail if multiple segments match stext This problem was encountered on an arm64 system with a lot of memory. Without kernel debug symbols installed, and with both kcore and kallsyms available, perf managed to get confused and returned "unknown" for all of the kernel symbols that it tried to look up. On this system, stext fell within the vmalloc segment. The kcore symbol matching code tries to find the first segment that contains stext and uses that to replace the segment generated from just the kallsyms information. In this case, however, there were two: a very large vmalloc segment, and the text segment. This caused perf to get confused because multiple overlapping segments were inserted into the RB tree that holds the discovered segments. However, that alone wasn't sufficient to cause the problem. Even when we could find the segment, the offsets were adjusted in such a way that the newly generated symbols didn't line up with the instruction addresses in the trace. The most obvious solution would be to consult which segment type is text from kcore, but this information is not exposed to users. Instead, select the smallest matching segment that contains stext instead of the first matching segment. This allows us to match the text segment instead of vmalloc, if one is contained within the other. Reviewed-by: Adrian Hunter Signed-off-by: Krister Johansen Cc: Alexander Shishkin Cc: David Reaver Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20230125183418.GD1963@templeofstupid.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index d05727fcb30d..317c0706852f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1377,10 +1377,23 @@ static int dso__load_kcore(struct dso *dso, struct map *map, /* Find the kernel map using the '_stext' symbol */ if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) { + u64 replacement_size = 0; + list_for_each_entry(new_map, &md.maps, node) { - if (stext >= new_map->start && stext < new_map->end) { + u64 new_size = new_map->end - new_map->start; + + if (!(stext >= new_map->start && stext < new_map->end)) + continue; + + /* + * On some architectures, ARM64 for example, the kernel + * text can get allocated inside of the vmalloc segment. + * Select the smallest matching segment, in case stext + * falls within more than one in the list. + */ + if (!replacement_map || new_size < replacement_size) { replacement_map = new_map; - break; + replacement_size = new_size; } } } From 2889959489ef4118953116496a06203aaf6c35b6 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 27 Jan 2023 14:55:46 -0600 Subject: [PATCH 063/114] perf arm-spe: Only warn once for each unsupported address packet Unknown address packet indexes are not an error as the Arm architecture can (and has with SPEv1.2) define new ones and implementation defined ones are also allowed. The error message for every occurrence of the packet is needlessly noisy as well. Change the message to print just once for each unknown index. Reviewed-by: Leo Yan Signed-off-by: Rob Herring Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Link: https://lore.kernel.org/r/20230127205546.667740-1-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 091987dd3966..40dcedfd75cd 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -68,7 +68,11 @@ static u64 arm_spe_calc_ip(int index, u64 payload) /* Clean highest byte */ payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); } else { - pr_err("unsupported address packet index: 0x%x\n", index); + static u32 seen_idx = 0; + if (!(seen_idx & BIT(index))) { + seen_idx |= BIT(index); + pr_warning("ignoring unsupported address packet index: 0x%x\n", index); + } } return payload; From 1df49ef9ee3179a213754a26cc64065e8aa24c0c Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 30 Jan 2023 11:19:10 -0700 Subject: [PATCH 064/114] perf tools docs: Use canonical ftrace path The canonical location for the tracefs filesystem is at /sys/kernel/tracing. But, from Documentation/trace/ftrace.rst: Before 4.1, all ftrace tracing control files were within the debugfs file system, which is typically located at /sys/kernel/debug/tracing. For backward compatibility, when mounting the debugfs file system, the tracefs file system will be automatically mounted at: /sys/kernel/debug/tracing A few spots in the perf docs still refer to this older debugfs path, so let's update them to avoid confusion. Signed-off-by: Ross Zwisler Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Cc: linux-trace-kernel@vger.kernel.org Link: http://lore.kernel.org/lkml/20230130181915.1113313-5-zwisler@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 2 +- tools/perf/Documentation/perf-script-perl.txt | 2 +- tools/perf/Documentation/perf-script-python.txt | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index c5a3cb0f57c7..d5f78e125efe 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -232,7 +232,7 @@ This can be overridden by setting the kernel.perf_event_paranoid sysctl to -1, which allows non root to use these events. For accessing trace point events perf needs to have read access to -/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed +/sys/kernel/tracing, even when perf_event_paranoid is in a relaxed setting. TRACING diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt index fa4f39d305a7..5b479f5e62ff 100644 --- a/tools/perf/Documentation/perf-script-perl.txt +++ b/tools/perf/Documentation/perf-script-perl.txt @@ -55,7 +55,7 @@ Traces meant to be processed using a script should be recorded with the above option: -a to enable system-wide collection. The format file for the sched_wakeup event defines the following fields -(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): +(see /sys/kernel/tracing/events/sched/sched_wakeup/format): ---- format: diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index cf4b7f4b625a..6a8581012e16 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -319,7 +319,7 @@ So those are the essential steps in writing and running a script. The process can be generalized to any tracepoint or set of tracepoints you're interested in - basically find the tracepoint(s) you're interested in by looking at the list of available events shown by -'perf list' and/or look in /sys/kernel/debug/tracing/events/ for +'perf list' and/or look in /sys/kernel/tracing/events/ for detailed event and field info, record the corresponding trace data using 'perf record', passing it the list of interesting events, generate a skeleton script using 'perf script -g python' and modify the @@ -449,7 +449,7 @@ Traces meant to be processed using a script should be recorded with the above option: -a to enable system-wide collection. The format file for the sched_wakeup event defines the following fields -(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): +(see /sys/kernel/tracing/events/sched/sched_wakeup/format): ---- format: From 3fd7a168bf51497909dbfb7347af28b5c57e74a6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 26 Jan 2023 13:36:10 -0800 Subject: [PATCH 065/114] perf script: Add 'cgroup' field for output There's no field for the cgroup, let's add one. To do that, users need to specify --all-cgroup option for perf record to capture the cgroup info. $ perf record --all-cgroups -- true $ perf script -F comm,pid,cgroup true 337112 /user.slice/user-657345.slice/user@657345.service/... true 337112 /user.slice/user-657345.slice/user@657345.service/... true 337112 /user.slice/user-657345.slice/user@657345.service/... true 337112 /user.slice/user-657345.slice/user@657345.service/... If it's recorded without the --all-cgroups, it'd complain. $ perf script -F comm,pid,cgroup Samples for 'cycles:u' event do not have CGROUP attribute set. Cannot print 'cgroup' field. Hint: run 'perf record --all-cgroups ...' Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20230126213610.3381147-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 5 ++++- tools/perf/builtin-script.c | 22 +++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 8d77182fbf31..a2ebadc9d948 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -134,7 +134,7 @@ OPTIONS srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat, - machine_pid, vcpu. + machine_pid, vcpu, cgroup. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -231,6 +231,9 @@ OPTIONS perf inject to insert a perf.data file recorded inside a virtual machine into a perf.data file recorded on the host at the same time. + The cgroup fields requires sample having the cgroup id which is saved + when "--all-cgroups" option is passed to 'perf record'. + Finally, a user may not set fields to none for all event types. i.e., -F "" is not allowed. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8901acdd7f5b..cb6b34da4eef 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -59,6 +59,7 @@ #include "util/dlfilter.h" #include "util/record.h" #include "util/util.h" +#include "util/cgroup.h" #include "perf.h" #include @@ -130,6 +131,7 @@ enum perf_output_field { PERF_OUTPUT_BRSTACKINSNLEN = 1ULL << 36, PERF_OUTPUT_MACHINE_PID = 1ULL << 37, PERF_OUTPUT_VCPU = 1ULL << 38, + PERF_OUTPUT_CGROUP = 1ULL << 39, }; struct perf_script { @@ -200,6 +202,7 @@ struct output_option { {.str = "brstackinsnlen", .field = PERF_OUTPUT_BRSTACKINSNLEN}, {.str = "machine_pid", .field = PERF_OUTPUT_MACHINE_PID}, {.str = "vcpu", .field = PERF_OUTPUT_VCPU}, + {.str = "cgroup", .field = PERF_OUTPUT_CGROUP}, }; enum { @@ -542,6 +545,12 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_INS_LAT)) return -EINVAL; + if (PRINT_FIELD(CGROUP) && + evsel__check_stype(evsel, PERF_SAMPLE_CGROUP, "CGROUP", PERF_OUTPUT_CGROUP)) { + pr_err("Hint: run 'perf record --all-cgroups ...'\n"); + return -EINVAL; + } + return 0; } @@ -2220,6 +2229,17 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(CODE_PAGE_SIZE)) fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str)); + if (PRINT_FIELD(CGROUP)) { + const char *cgrp_name; + struct cgroup *cgrp = cgroup__find(machine->env, + sample->cgroup); + if (cgrp != NULL) + cgrp_name = cgrp->name; + else + cgrp_name = "unknown"; + fprintf(fp, " %s", cgrp_name); + } + perf_sample__fprintf_ipc(sample, attr, fp); fprintf(fp, "\n"); @@ -3856,7 +3876,7 @@ int cmd_script(int argc, const char **argv) "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," "brstackinsnlen,brstackoff,callindent,insn,insnlen,synth," "phys_addr,metric,misc,srccode,ipc,tod,data_page_size," - "code_page_size,ins_lat", + "code_page_size,ins_lat,machine_pid,vcpu,cgroup", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), From 7b204399aee0d1048109d37456cd61b7f1bc0aed Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Jan 2023 16:09:36 -0800 Subject: [PATCH 066/114] perf lock contention: Add -S/--callstack-filter option The -S/--callstack-filter is to limit display entries having the given string in the callstack (not only in the caller in the output). The following example shows lock contention results if the callstack has 'net' substring somewhere. Note that the caller '__dev_queue_xmit' does not match to it, but it has 'inet6_csk_xmit' in the callstack. This applies even if you don't use -v option to show the full callstack. $ sudo ./perf lock con -abv -S net sleep 1 ... contended total wait max wait avg wait type caller 5 70.20 us 16.13 us 14.04 us spinlock __dev_queue_xmit+0xb6d 0xffffffffa5dd1c60 _raw_spin_lock+0x30 0xffffffffa5b8f6ed __dev_queue_xmit+0xb6d 0xffffffffa5cd8267 ip6_finish_output2+0x2c7 0xffffffffa5cdac14 ip6_finish_output+0x1d4 0xffffffffa5cdb477 ip6_xmit+0x457 0xffffffffa5d1fd17 inet6_csk_xmit+0xd7 0xffffffffa5c5f4aa __tcp_transmit_skb+0x54a 0xffffffffa5c6467d tcp_keepalive_timer+0x2fd Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230126000936.3017683-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-lock.txt | 6 +++ tools/perf/builtin-lock.c | 68 +++++++++++++++++++++++++- tools/perf/util/bpf_lock_contention.c | 2 +- tools/perf/util/lock-contention.h | 1 + 4 files changed, 75 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 0f9f720e599d..11b8901d8d13 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -187,6 +187,12 @@ CONTENTION OPTIONS --lock-filter=:: Show lock contention only for given lock addresses or names (comma separated list). +-S:: +--callstack-filter=:: + Show lock contention only if the callstack contains the given string. + Note that it matches the substring so 'rq' would match both 'raw_spin_rq_lock' + and 'irq_enter_rcu'. + SEE ALSO -------- diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 506c2fe42d52..216a9a252bf4 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -63,11 +63,22 @@ static unsigned long bpf_map_entries = 10240; static int max_stack_depth = CONTENTION_STACK_DEPTH; static int stack_skip = CONTENTION_STACK_SKIP; static int print_nr_entries = INT_MAX / 2; +static LIST_HEAD(callstack_filters); + +struct callstack_filter { + struct list_head list; + char name[]; +}; static struct lock_filter filters; static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR; +static bool needs_callstack(void) +{ + return verbose > 0 || !list_empty(&callstack_filters); +} + static struct thread_stat *thread_stat_find(u32 tid) { struct rb_node *node; @@ -1060,7 +1071,7 @@ static int report_lock_contention_begin_event(struct evsel *evsel, if (!ls) return -ENOMEM; - if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) { + if (aggr_mode == LOCK_AGGR_CALLER && needs_callstack()) { ls->callstack = get_callstack(sample, max_stack_depth); if (ls->callstack == NULL) return -ENOMEM; @@ -1595,6 +1606,31 @@ static void print_contention_result(struct lock_contention *con) if (!st->wait_time_total) continue; + if (aggr_mode == LOCK_AGGR_CALLER && !list_empty(&callstack_filters)) { + struct map *kmap; + struct symbol *sym; + u64 ip; + + for (int i = 0; i < max_stack_depth; i++) { + struct callstack_filter *filter; + + if (!st->callstack || !st->callstack[i]) + break; + + ip = st->callstack[i]; + sym = machine__find_kernel_symbol(con->machine, ip, &kmap); + if (sym == NULL) + continue; + + list_for_each_entry(filter, &callstack_filters, list) { + if (strstr(sym->name, filter->name)) + goto found; + } + } + continue; + } + +found: list_for_each_entry(key, &lock_keys, list) { key->print(key, st); pr_info(" "); @@ -1743,6 +1779,7 @@ static int __cmd_contention(int argc, const char **argv) .max_stack = max_stack_depth, .stack_skip = stack_skip, .filters = &filters, + .save_callstack = needs_callstack(), }; session = perf_session__new(use_bpf ? NULL : &data, &eops); @@ -2123,6 +2160,33 @@ static int parse_lock_addr(const struct option *opt __maybe_unused, const char * return ret; } +static int parse_call_stack(const struct option *opt __maybe_unused, const char *str, + int unset __maybe_unused) +{ + char *s, *tmp, *tok; + int ret = 0; + + s = strdup(str); + if (s == NULL) + return -1; + + for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { + struct callstack_filter *entry; + + entry = malloc(sizeof(*entry) + strlen(tok) + 1); + if (entry == NULL) { + pr_err("Memory allocation failure\n"); + return -1; + } + + strcpy(entry->name, tok); + list_add_tail(&entry->list, &callstack_filters); + } + + free(s); + return ret; +} + int cmd_lock(int argc, const char **argv) { const struct option lock_options[] = { @@ -2190,6 +2254,8 @@ int cmd_lock(int argc, const char **argv) "Filter specific type of locks", parse_lock_type), OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES", "Filter specific address/symbol of locks", parse_lock_addr), + OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES", + "Filter specific function in the callstack", parse_call_stack), OPT_PARENT(lock_options) }; diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 0236334fd69b..4902ac331f41 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -268,7 +268,7 @@ int lock_contention_read(struct lock_contention *con) break; } - if (verbose > 0) { + if (con->save_callstack) { st->callstack = memdup(stack_trace, stack_size); if (st->callstack == NULL) break; diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index b99e83fccf5c..17e594d57a61 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -128,6 +128,7 @@ struct lock_contention { int max_stack; int stack_skip; int aggr_mode; + bool save_callstack; }; #ifdef HAVE_BPF_SKEL From dd15480a3d67b9cf04a1f6f5d60f1c0dc018e22f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Jan 2023 11:24:31 -0800 Subject: [PATCH 067/114] perf stat: Hide invalid uncore event output for aggr mode The current display code for perf stat iterates given cpus and build the aggr map to collect the event data for the aggregation mode. But uncore events have their own cpu maps and it won't guarantee that it'd match to the aggr map. For example, per-package uncore events would generate a single value for each socket. When user asks per-core aggregation mode, the output would contain 0 values for other cores. Thus it needs to check the uncore PMU's cpumask and if it matches to the current aggregation id. Before: $ sudo ./perf stat -a --per-core -e power/energy-pkg/ sleep 1 Performance counter stats for 'system wide': S0-D0-C0 1 3.73 Joules power/energy-pkg/ S0-D0-C1 0 Joules power/energy-pkg/ S0-D0-C2 0 Joules power/energy-pkg/ S0-D0-C3 0 Joules power/energy-pkg/ 1.001404046 seconds time elapsed Some events weren't counted. Try disabling the NMI watchdog: echo 0 > /proc/sys/kernel/nmi_watchdog perf stat ... echo 1 > /proc/sys/kernel/nmi_watchdog The core 1, 2 and 3 should not be printed because the event is handled in a cpu in the core 0 only. With this change, the output becomes like below. After: $ sudo ./perf stat -a --per-core -e power/energy-pkg/ sleep 1 Performance counter stats for 'system wide': S0-D0-C0 1 2.09 Joules power/energy-pkg/ Fixes: b897613510890d6e ("perf stat: Update event skip condition for system-wide per-thread mode and merged uncore and hybrid events") Signed-off-by: Namhyung Kim Tested-by: Ian Rogers Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Athira Rajeev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230125192431.2929677-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 51 ++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 8bd8b0142630..1b5cb20efd23 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -787,6 +787,51 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun uniquify_event_name(counter); } +/** + * should_skip_zero_count() - Check if the event should print 0 values. + * @config: The perf stat configuration (including aggregation mode). + * @counter: The evsel with its associated cpumap. + * @id: The aggregation id that is being queried. + * + * Due to mismatch between the event cpumap or thread-map and the + * aggregation mode, sometimes it'd iterate the counter with the map + * which does not contain any values. + * + * For example, uncore events have dedicated CPUs to manage them, + * result for other CPUs should be zero and skipped. + * + * Return: %true if the value should NOT be printed, %false if the value + * needs to be printed like "" or "". + */ +static bool should_skip_zero_counter(struct perf_stat_config *config, + struct evsel *counter, + const struct aggr_cpu_id *id) +{ + struct perf_cpu cpu; + int idx; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise it will have too many 0 output. + */ + if (config->aggr_mode == AGGR_THREAD && config->system_wide) + return true; + /* + * Skip value 0 when it's an uncore event and the given aggr id + * does not belong to the PMU cpumask. + */ + if (!counter->pmu || !counter->pmu->is_uncore) + return false; + + perf_cpu_map__for_each_cpu(cpu, idx, counter->pmu->cpus) { + struct aggr_cpu_id own_id = config->aggr_get_id(config, cpu); + + if (aggr_cpu_id__equal(id, &own_id)) + return false; + } + return true; +} + static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, struct outstate *os) @@ -814,11 +859,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, ena = aggr->counts.ena; run = aggr->counts.run; - /* - * Skip value 0 when enabling --per-thread globally, otherwise it will - * have too many 0 output. - */ - if (val == 0 && config->aggr_mode == AGGR_THREAD && config->system_wide) + if (val == 0 && should_skip_zero_counter(config, counter, &id)) return; if (!metric_only) { From 5b420cf003da4da34955dfd95c5334bec0815114 Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Wed, 1 Feb 2023 22:49:44 +0100 Subject: [PATCH 068/114] perf test: Replace 'grep | wc -l' with 'grep -c' To count the number of results from grep, use the '-c' parameter instead of piping it to 'wc'. See also https://www.shellcheck.net/wiki/SC2126 Signed-off-by: Diederik de Haas Acked-by: Carsten Haitzler Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230201214945.127474-2-didi.debian@cknow.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/coresight.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/shell/lib/coresight.sh b/tools/perf/tests/shell/lib/coresight.sh index 45a1477256b6..7e27e5c5bc9c 100644 --- a/tools/perf/tests/shell/lib/coresight.sh +++ b/tools/perf/tests/shell/lib/coresight.sh @@ -58,9 +58,9 @@ perf_dump_aux_verify() { # compiler may produce different code depending on the compiler and # optimization options, so this is rough just to see if we're # either missing almost all the data or all of it - ATOM_FX_NUM=`grep I_ATOM_F "$DUMP" | wc -l` - ASYNC_NUM=`grep I_ASYNC "$DUMP" | wc -l` - TRACE_INFO_NUM=`grep I_TRACE_INFO "$DUMP" | wc -l` + ATOM_FX_NUM=`grep -c I_ATOM_F "$DUMP"` + ASYNC_NUM=`grep -c I_ASYNC "$DUMP"` + TRACE_INFO_NUM=`grep -c I_TRACE_INFO "$DUMP"` rm -f "$DUMP" # Arguments provide minimums for a pass From a912f5975ffc82d52bbb5937eafe367d44db711c Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Wed, 1 Feb 2023 22:49:45 +0100 Subject: [PATCH 069/114] perf test: Replace legacy `...` with $(...) As detailed in https://www.shellcheck.net/wiki/SC2006: The use of `...` is legacy syntax with several issues: 1. It has a series of undefined behaviors related to quoting in POSIX. 2. It imposes a custom escaping mode with surprising results. 3. It's exceptionally hard to nest. $(...) command substitution has none of these problems, and is therefore strongly encouraged. Signed-off-by: Diederik de Haas Acked-by: Carsten Haitzler Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230201214945.127474-3-didi.debian@cknow.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lib/coresight.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/perf/tests/shell/lib/coresight.sh b/tools/perf/tests/shell/lib/coresight.sh index 7e27e5c5bc9c..6c3d34ec64d8 100644 --- a/tools/perf/tests/shell/lib/coresight.sh +++ b/tools/perf/tests/shell/lib/coresight.sh @@ -58,9 +58,9 @@ perf_dump_aux_verify() { # compiler may produce different code depending on the compiler and # optimization options, so this is rough just to see if we're # either missing almost all the data or all of it - ATOM_FX_NUM=`grep -c I_ATOM_F "$DUMP"` - ASYNC_NUM=`grep -c I_ASYNC "$DUMP"` - TRACE_INFO_NUM=`grep -c I_TRACE_INFO "$DUMP"` + ATOM_FX_NUM=$(grep -c I_ATOM_F "$DUMP") + ASYNC_NUM=$(grep -c I_ASYNC "$DUMP") + TRACE_INFO_NUM=$(grep -c I_TRACE_INFO "$DUMP") rm -f "$DUMP" # Arguments provide minimums for a pass @@ -96,18 +96,18 @@ perf_dump_aux_tid_verify() { # The TID test tools will print a TID per stdout line that are being # tested - TIDS=`cat "$2"` + TIDS=$(cat "$2") # Scan the perf report to find the TIDs that are actually CID in hex # and build a list of the ones found - FOUND_TIDS=`perf report --stdio --dump -i "$1" | \ + FOUND_TIDS=$(perf report --stdio --dump -i "$1" | \ grep -o "CID=0x[0-9a-z]\+" | sed 's/CID=//g' | \ - uniq | sort | uniq` + uniq | sort | uniq) # No CID=xxx found - maybe your kernel is reporting these as # VMID=xxx so look there if test -z "$FOUND_TIDS"; then - FOUND_TIDS=`perf report --stdio --dump -i "$1" | \ + FOUND_TIDS=$(perf report --stdio --dump -i "$1" | \ grep -o "VMID=0x[0-9a-z]\+" | sed 's/VMID=//g' | \ - uniq | sort | uniq` + uniq | sort | uniq) fi # Iterate over the list of TIDs that the test says it has and find @@ -116,7 +116,7 @@ perf_dump_aux_tid_verify() { for TID2 in $TIDS; do FOUND="" for TIDHEX in $FOUND_TIDS; do - TID=`printf "%i" $TIDHEX` + TID=$(printf "%i" $TIDHEX) if test "$TID" -eq "$TID2"; then FOUND="y" break From 1bad502775df1fdc824774d0f6e41a629548982a Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 10 Nov 2022 11:50:05 +0800 Subject: [PATCH 070/114] tools x86: Keep list sorted by number in unistd_{32,64}.h It is better to keep list sorted by number in unistd_{32,64}.h, so that we can add more syscall number to a proper position. This is preparation for later patch, no functionality change. Signed-off-by: Tiezhu Yang Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/1668052208-14047-2-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/unistd_32.h | 16 ++++++++-------- tools/arch/x86/include/uapi/asm/unistd_64.h | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index 60a89dba01b6..e1cc62d238d2 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -1,16 +1,16 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NR_perf_event_open -# define __NR_perf_event_open 336 +#ifndef __NR_gettid +#define __NR_gettid 224 #endif #ifndef __NR_futex -# define __NR_futex 240 -#endif -#ifndef __NR_gettid -# define __NR_gettid 224 +#define __NR_futex 240 #endif #ifndef __NR_getcpu -# define __NR_getcpu 318 +#define __NR_getcpu 318 +#endif +#ifndef __NR_perf_event_open +#define __NR_perf_event_open 336 #endif #ifndef __NR_setns -# define __NR_setns 346 +#define __NR_setns 346 #endif diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index cb52a3a8b8fc..ce8b7ab12045 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -1,16 +1,16 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NR_perf_event_open -# define __NR_perf_event_open 298 +#ifndef __NR_gettid +#define __NR_gettid 186 #endif #ifndef __NR_futex -# define __NR_futex 202 +#define __NR_futex 202 #endif -#ifndef __NR_gettid -# define __NR_gettid 186 -#endif -#ifndef __NR_getcpu -# define __NR_getcpu 309 +#ifndef __NR_perf_event_open +#define __NR_perf_event_open 298 #endif #ifndef __NR_setns #define __NR_setns 308 #endif +#ifndef __NR_getcpu +#define __NR_getcpu 309 +#endif From 3fe91f32620f658adfc073f9df831e7b82498575 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 10 Nov 2022 11:50:06 +0800 Subject: [PATCH 071/114] perf bench syscall: Introduce bench_syscall_common() In the current code, there is only a basic syscall benchmark via getppid, this is not enough. Introduce bench_syscall_common() so that we can add more syscalls to benchmark. This is preparation for later patch, no functionality change. Signed-off-by: Tiezhu Yang Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/1668052208-14047-3-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/unistd_32.h | 3 +++ tools/arch/x86/include/uapi/asm/unistd_64.h | 3 +++ tools/perf/bench/syscall.c | 29 ++++++++++++++++++--- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index e1cc62d238d2..4d8873a9f519 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NR_getppid +#define __NR_getppid 64 +#endif #ifndef __NR_gettid #define __NR_gettid 224 #endif diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index ce8b7ab12045..e29038af133c 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NR_getppid +#define __NR_getppid 110 +#endif #ifndef __NR_gettid #define __NR_gettid 186 #endif diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c index 9b751016f4b6..746fd7171921 100644 --- a/tools/perf/bench/syscall.c +++ b/tools/perf/bench/syscall.c @@ -30,25 +30,41 @@ static const char * const bench_syscall_usage[] = { NULL }; -int bench_syscall_basic(int argc, const char **argv) +static int bench_syscall_common(int argc, const char **argv, int syscall) { struct timeval start, stop, diff; unsigned long long result_usec = 0; + const char *name = NULL; int i; argc = parse_options(argc, argv, options, bench_syscall_usage, 0); gettimeofday(&start, NULL); - for (i = 0; i < loops; i++) - getppid(); + for (i = 0; i < loops; i++) { + switch (syscall) { + case __NR_getppid: + getppid(); + break; + default: + break; + } + } gettimeofday(&stop, NULL); timersub(&stop, &start, &diff); + switch (syscall) { + case __NR_getppid: + name = "getppid()"; + break; + default: + break; + } + switch (bench_format) { case BENCH_FORMAT_DEFAULT: - printf("# Executed %'d getppid() calls\n", loops); + printf("# Executed %'d %s calls\n", loops, name); result_usec = diff.tv_sec * 1000000; result_usec += diff.tv_usec; @@ -79,3 +95,8 @@ int bench_syscall_basic(int argc, const char **argv) return 0; } + +int bench_syscall_basic(int argc, const char **argv) +{ + return bench_syscall_common(argc, argv, __NR_getppid); +} From 391f84e5555c20a5b5ca4a2b0c3daec383765a09 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 10 Nov 2022 11:50:07 +0800 Subject: [PATCH 072/114] perf bench syscall: Add getpgid syscall benchmark This commit adds a simple getpgid syscall benchmark, more syscall benchmarks can be added in the future. Signed-off-by: Tiezhu Yang Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/1668052208-14047-4-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/unistd_32.h | 3 +++ tools/arch/x86/include/uapi/asm/unistd_64.h | 3 +++ tools/perf/bench/bench.h | 1 + tools/perf/bench/syscall.c | 11 +++++++++++ tools/perf/builtin-bench.c | 1 + 5 files changed, 19 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index 4d8873a9f519..053122c79ee1 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -2,6 +2,9 @@ #ifndef __NR_getppid #define __NR_getppid 64 #endif +#ifndef __NR_getpgid +#define __NR_getpgid 132 +#endif #ifndef __NR_gettid #define __NR_gettid 224 #endif diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index e29038af133c..54a6c4d93ada 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -2,6 +2,9 @@ #ifndef __NR_getppid #define __NR_getppid 110 #endif +#ifndef __NR_getpgid +#define __NR_getpgid 121 +#endif #ifndef __NR_gettid #define __NR_gettid 186 #endif diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index a5d49b3b6a09..0c58448273da 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -22,6 +22,7 @@ int bench_numa(int argc, const char **argv); int bench_sched_messaging(int argc, const char **argv); int bench_sched_pipe(int argc, const char **argv); int bench_syscall_basic(int argc, const char **argv); +int bench_syscall_getpgid(int argc, const char **argv); int bench_mem_memcpy(int argc, const char **argv); int bench_mem_memset(int argc, const char **argv); int bench_mem_find_bit(int argc, const char **argv); diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c index 746fd7171921..6411b146ba68 100644 --- a/tools/perf/bench/syscall.c +++ b/tools/perf/bench/syscall.c @@ -46,6 +46,9 @@ static int bench_syscall_common(int argc, const char **argv, int syscall) case __NR_getppid: getppid(); break; + case __NR_getpgid: + getpgid(0); + break; default: break; } @@ -58,6 +61,9 @@ static int bench_syscall_common(int argc, const char **argv, int syscall) case __NR_getppid: name = "getppid()"; break; + case __NR_getpgid: + name = "getpgid()"; + break; default: break; } @@ -100,3 +106,8 @@ int bench_syscall_basic(int argc, const char **argv) { return bench_syscall_common(argc, argv, __NR_getppid); } + +int bench_syscall_getpgid(int argc, const char **argv) +{ + return bench_syscall_common(argc, argv, __NR_getpgid); +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index bd4fd94a2ce0..281b22e0f257 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -52,6 +52,7 @@ static struct bench sched_benchmarks[] = { static struct bench syscall_benchmarks[] = { { "basic", "Benchmark for basic getppid(2) calls", bench_syscall_basic }, + { "getpgid", "Benchmark for getpgid(2) calls", bench_syscall_getpgid }, { "all", "Run all syscall benchmarks", NULL }, { NULL, NULL, NULL }, }; From 540f8b5640ec08d6ba667d933298bbb350ced77c Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 10 Nov 2022 11:50:08 +0800 Subject: [PATCH 073/114] perf bench syscall: Add execve syscall benchmark This commit adds the execve syscall benchmark, more syscall benchmarks can be added in the future. Signed-off-by: Tiezhu Yang Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/1668052208-14047-5-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/unistd_32.h | 3 ++ tools/arch/x86/include/uapi/asm/unistd_64.h | 3 ++ tools/perf/bench/bench.h | 1 + tools/perf/bench/syscall.c | 36 +++++++++++++++++++++ tools/perf/builtin-bench.c | 1 + 5 files changed, 44 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index 053122c79ee1..2712d5e03e2e 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NR_execve +#define __NR_execve 11 +#endif #ifndef __NR_getppid #define __NR_getppid 64 #endif diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index 54a6c4d93ada..a6f7fe84d4df 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NR_execve +#define __NR_execve 59 +#endif #ifndef __NR_getppid #define __NR_getppid 110 #endif diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 0c58448273da..e43893151a3e 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -23,6 +23,7 @@ int bench_sched_messaging(int argc, const char **argv); int bench_sched_pipe(int argc, const char **argv); int bench_syscall_basic(int argc, const char **argv); int bench_syscall_getpgid(int argc, const char **argv); +int bench_syscall_execve(int argc, const char **argv); int bench_mem_memcpy(int argc, const char **argv); int bench_mem_memset(int argc, const char **argv); int bench_mem_find_bit(int argc, const char **argv); diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c index 6411b146ba68..fe79f7f3091e 100644 --- a/tools/perf/bench/syscall.c +++ b/tools/perf/bench/syscall.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,27 @@ static const char * const bench_syscall_usage[] = { NULL }; +static void test_execve(void) +{ + const char *pathname = "/bin/true"; + char *const argv[] = { (char *)pathname, NULL }; + pid_t pid = fork(); + + if (pid < 0) { + fprintf(stderr, "fork failed\n"); + exit(1); + } else if (pid == 0) { + execve(pathname, argv, NULL); + fprintf(stderr, "execve /bin/true failed\n"); + exit(1); + } else { + if (waitpid(pid, NULL, 0) < 0) { + fprintf(stderr, "waitpid failed\n"); + exit(1); + } + } +} + static int bench_syscall_common(int argc, const char **argv, int syscall) { struct timeval start, stop, diff; @@ -49,6 +71,12 @@ static int bench_syscall_common(int argc, const char **argv, int syscall) case __NR_getpgid: getpgid(0); break; + case __NR_execve: + test_execve(); + /* Only loop 10000 times to save time */ + if (i == 10000) + loops = 10000; + break; default: break; } @@ -64,6 +92,9 @@ static int bench_syscall_common(int argc, const char **argv, int syscall) case __NR_getpgid: name = "getpgid()"; break; + case __NR_execve: + name = "execve()"; + break; default: break; } @@ -111,3 +142,8 @@ int bench_syscall_getpgid(int argc, const char **argv) { return bench_syscall_common(argc, argv, __NR_getpgid); } + +int bench_syscall_execve(int argc, const char **argv) +{ + return bench_syscall_common(argc, argv, __NR_execve); +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 281b22e0f257..814e9afc86f6 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -53,6 +53,7 @@ static struct bench sched_benchmarks[] = { static struct bench syscall_benchmarks[] = { { "basic", "Benchmark for basic getppid(2) calls", bench_syscall_basic }, { "getpgid", "Benchmark for getpgid(2) calls", bench_syscall_getpgid }, + { "execve", "Benchmark for execve(2) calls", bench_syscall_execve }, { "all", "Run all syscall benchmarks", NULL }, { NULL, NULL, NULL }, }; From 79b7ca7802d24cd4042d0fd508afb59169f74a7a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 1 Feb 2023 21:04:55 -0800 Subject: [PATCH 074/114] perf test: Add more test cases for perf lock contention Check callstack filter with two different aggregation mode. $ sudo ./perf test -v contention 88: kernel lock contention analysis test : --- start --- test child forked, pid 83416 Testing perf lock record and perf lock contention Testing perf lock contention --use-bpf Testing perf lock record and perf lock contention at the same time Testing perf lock contention --threads Testing perf lock contention --lock-addr Testing perf lock contention --type-filter (w/ spinlock) Testing perf lock contention --lock-filter (w/ tasklist_lock) Testing perf lock contention --callstack-filter (w/ unix_stream) Testing perf lock contention --callstack-filter with task aggregation test child finished with 0 ---- end ---- kernel lock contention analysis test: Ok Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230202050455.2187592-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lock_contention.sh | 66 +++++++++++++++++++++-- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index b05f1b1ca6c8..be5fcafb26aa 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -128,7 +128,7 @@ test_type_filter() echo "Testing perf lock contention --type-filter (w/ spinlock)" perf lock contention -i ${perfdata} -Y spinlock -q 2> ${result} if [ $(grep -c -v spinlock "${result}") != "0" ]; then - echo "[Fail] Recorded should not have non-spinlocks:" $(cat "${result}") + echo "[Fail] Recorded result should not have non-spinlocks:" $(cat "${result}") err=1 exit fi @@ -139,7 +139,7 @@ test_type_filter() perf lock con -a -b -Y spinlock -q -- perf bench sched messaging > /dev/null 2> ${result} if [ $(grep -c -v spinlock "${result}") != "0" ]; then - echo "[Fail] Recorded should not have non-spinlocks:" $(cat "${result}") + echo "[Fail] BPF result should not have non-spinlocks:" $(cat "${result}") err=1 exit fi @@ -160,7 +160,7 @@ test_lock_filter() local type=$(head -1 "${result}" | awk '{ print $8 }' | sed -e 's/:.*//') if [ $(grep -c -v "${type}" "${result}") != "0" ]; then - echo "[Fail] Recorded should not have non-${type} locks:" $(cat "${result}") + echo "[Fail] Recorded result should not have non-${type} locks:" $(cat "${result}") err=1 exit fi @@ -171,7 +171,63 @@ test_lock_filter() perf lock con -a -b -L tasklist_lock -q -- perf bench sched messaging > /dev/null 2> ${result} if [ $(grep -c -v "${type}" "${result}") != "0" ]; then - echo "[Fail] Recorded should not have non-${type} locks:" $(cat "${result}") + echo "[Fail] BPF result should not have non-${type} locks:" $(cat "${result}") + err=1 + exit + fi +} + +test_stack_filter() +{ + echo "Testing perf lock contention --callstack-filter (w/ unix_stream)" + perf lock contention -i ${perfdata} -v -q 2> ${result} + if [ $(grep -c unix_stream "${result}") == "0" ]; then + echo "[Skip] Could not find 'unix_stream'" + return + fi + + perf lock contention -i ${perfdata} -E 1 -S unix_stream -q 2> ${result} + if [ $(cat "${result}" | wc -l) != "1" ]; then + echo "[Fail] Recorded result should have a lock from unix_stream:" $(cat "${result}") + err=1 + exit + fi + + if ! perf lock con -b true > /dev/null 2>&1 ; then + return + fi + + perf lock con -a -b -S unix_stream -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} + if [ $(cat "${result}" | wc -l) != "1" ]; then + echo "[Fail] BPF result should have a lock from unix_stream:" $(cat "${result}") + err=1 + exit + fi +} + +test_aggr_task_stack_filter() +{ + echo "Testing perf lock contention --callstack-filter with task aggregation" + perf lock contention -i ${perfdata} -v -q 2> ${result} + if [ $(grep -c unix_stream "${result}") == "0" ]; then + echo "[Skip] Could not find 'unix_stream'" + return + fi + + perf lock contention -i ${perfdata} -t -E 1 -S unix_stream -q 2> ${result} + if [ $(cat "${result}" | wc -l) != "1" ]; then + echo "[Fail] Recorded result should have a task from unix_stream:" $(cat "${result}") + err=1 + exit + fi + + if ! perf lock con -b true > /dev/null 2>&1 ; then + return + fi + + perf lock con -a -b -t -S unix_stream -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result} + if [ $(cat "${result}" | wc -l) != "1" ]; then + echo "[Fail] BPF result should have a task from unix_stream:" $(cat "${result}") err=1 exit fi @@ -186,5 +242,7 @@ test_aggr_task test_aggr_addr test_type_filter test_lock_filter +test_stack_filter +test_aggr_task_stack_filter exit ${err} From 6ade6c6460357a4878db24f468bbc66e3eddcd42 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 2 Feb 2023 17:56:14 +0530 Subject: [PATCH 075/114] perf script: Show branch speculation info Show the branch speculation info if provided by the branch recording hardware feature. This can be useful for optimizing code further. The speculation info is appended to the end of the list of fields so any existing tools that use "/" as a delimiter for access fields via an index remain unaffected. Also show "-" instead of "N/A" when speculation info is unavailable because "/" is used as the field separator. E.g. $ perf record -j any,u,save_type ./test_branch $ perf script --fields brstacksym Before: [...] check_match+0x60/strcmp+0x0/P/-/-/0/CALL do_lookup_x+0x3c5/check_match+0x0/P/-/-/0/CALL [...] After: [...] check_match+0x60/strcmp+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH do_lookup_x+0x3c5/check_match+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH [...] The bitfield swapping scheme used duing sample parsing has changed because of the addition of new branch flags, namely "spec", "new_type" and "priv". Earlier, these were all part of the "reserved" field but now, each of these fields get swapped separately. Change the expected flag values accordingly for the test to pass. E.g. $ perf test -v 27 Before: 27: Sample parsing : --- start --- test child forked, pid 61979 parsing failed for sample_type 0x800 test child finished with -1 ---- end ---- Sample parsing: FAILED! After: 27: Sample parsing : --- start --- test child forked, pid 63293 test child finished with 0 ---- end ---- Sample parsing: Ok Signed-off-by: Sandipan Das Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Borislav Petkov Cc: Dave Hansen Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Thomas Richter Cc: x86@kernel.org Link: https://lore.kernel.org/r/56e272583552526e999ba0b536ac009ae3613966.1675333809.git.sandipan.das@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 +++-- tools/perf/tests/sample-parsing.c | 2 +- tools/perf/util/branch.c | 15 +++++++++++++++ tools/perf/util/branch.h | 2 ++ tools/perf/util/evsel.c | 15 ++++++++++++--- 5 files changed, 33 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index cb6b34da4eef..339b441015eb 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -889,12 +889,13 @@ mispred_str(struct branch_entry *br) static int print_bstack_flags(FILE *fp, struct branch_entry *br) { - return fprintf(fp, "/%c/%c/%c/%d/%s ", + return fprintf(fp, "/%c/%c/%c/%d/%s/%s ", mispred_str(br), br->flags.in_tx ? 'X' : '-', br->flags.abort ? 'A' : '-', br->flags.cycles, - get_branch_type(br)); + get_branch_type(br), + br->flags.spec ? branch_spec_desc(br->flags.spec) : "-"); } static int perf_sample__fprintf_brstack(struct perf_sample *sample, diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 927c7f0cc4cc..25a3f6cece50 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -37,7 +37,7 @@ * in branch_stack variable. */ #define BS_EXPECTED_BE 0xa000d00000000000 -#define BS_EXPECTED_LE 0xd5000000 +#define BS_EXPECTED_LE 0x1aa00000000 #define FLAG(s) s->branch_stack->entries[i].flags static bool samples_same(const struct perf_sample *s1, diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index 6d38238481d3..378f16a24751 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -212,3 +212,18 @@ int branch_type_str(struct branch_type_stat *st, char *bf, int size) return printed; } + +const char *branch_spec_desc(int spec) +{ + const char *branch_spec_outcomes[PERF_BR_SPEC_MAX] = { + "N/A", + "SPEC_WRONG_PATH", + "NON_SPEC_CORRECT_PATH", + "SPEC_CORRECT_PATH", + }; + + if (spec >= 0 && spec < PERF_BR_SPEC_MAX) + return branch_spec_outcomes[spec]; + + return NULL; +} diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 3ed792db1125..e41bfffe2217 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -89,4 +89,6 @@ const char *get_branch_type(struct branch_entry *e); void branch_type_stat_display(FILE *fp, struct branch_type_stat *st); int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize); +const char *branch_spec_desc(int spec); + #endif /* _PERF_BRANCH_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8550638587e5..019e53db03b3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2319,7 +2319,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) * abort:1 //transaction abort * cycles:16 //cycle count to last branch * type:4 //branch type - * reserved:40 + * spec:2 //branch speculation info + * new_type:4 //additional branch type + * priv:3 //privilege level + * reserved:31 * } * } * @@ -2335,7 +2338,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) new_val |= bitfield_swap(value, 3, 1); new_val |= bitfield_swap(value, 4, 16); new_val |= bitfield_swap(value, 20, 4); - new_val |= bitfield_swap(value, 24, 40); + new_val |= bitfield_swap(value, 24, 2); + new_val |= bitfield_swap(value, 26, 4); + new_val |= bitfield_swap(value, 30, 3); + new_val |= bitfield_swap(value, 33, 31); } else { new_val = bitfield_swap(value, 63, 1); new_val |= bitfield_swap(value, 62, 1); @@ -2343,7 +2349,10 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) new_val |= bitfield_swap(value, 60, 1); new_val |= bitfield_swap(value, 44, 16); new_val |= bitfield_swap(value, 40, 4); - new_val |= bitfield_swap(value, 0, 40); + new_val |= bitfield_swap(value, 38, 2); + new_val |= bitfield_swap(value, 34, 4); + new_val |= bitfield_swap(value, 31, 3); + new_val |= bitfield_swap(value, 0, 31); } return new_val; From 8eaf8ec3c09b88e35c1c3c761ac4188ee425aeb6 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 2 Feb 2023 17:56:15 +0530 Subject: [PATCH 076/114] perf session: Show branch speculation info in raw dump Show the branch speculation info if provided by the branch recording hardware feature. This can be useful for purposes of code optimization. E.g. $ perf record -j any,u ./test_branch $ perf report --dump-raw-trace Before: [...] 8380958377610 0x40b178 [0x1b0]: PERF_RECORD_SAMPLE(IP, 0x2): 7952/7952: 0x4f851a period: 48973 addr: 0 ... branch stack: nr:16 ..... 0: 00000000004b52fd -> 00000000004f82c0 0 cycles P 0 ..... 1: ffffffff8220137c -> 00000000004b52f0 0 cycles M 0 ..... 2: 000000000041d1c4 -> 00000000004b52f0 0 cycles P 0 ..... 3: 00000000004e7ead -> 000000000041d1b0 0 cycles M 0 ..... 4: 00000000004e7f91 -> 00000000004e7ead 0 cycles P 0 ..... 5: 00000000004e7ea8 -> 00000000004e7f70 0 cycles P 0 ..... 6: 00000000004e7e52 -> 00000000004e7e98 0 cycles M 0 ..... 7: 00000000004e7e1f -> 00000000004e7e40 0 cycles M 0 ..... 8: 00000000004e7f60 -> 00000000004e7df0 0 cycles P 0 ..... 9: 00000000004e7f58 -> 00000000004e7f60 0 cycles M 0 ..... 10: 000000000041d85d -> 00000000004e7f50 0 cycles P 0 ..... 11: 000000000043306a -> 000000000041d840 0 cycles P 0 ..... 12: ffffffff8220137c -> 0000000000433040 0 cycles M 0 ..... 13: 000000000041e4a1 -> 0000000000433040 0 cycles P 0 ..... 14: ffffffff8220137c -> 000000000041e490 0 cycles M 0 ..... 15: 000000000041d89b -> 000000000041e487 0 cycles P 0 ... thread: test_branch:7952 ...... dso: /data/sandipan/test_branch [...] After: [...] 8380958377610 0x40b178 [0x1b0]: PERF_RECORD_SAMPLE(IP, 0x2): 7952/7952: 0x4f851a period: 48973 addr: 0 ... branch stack: nr:16 ..... 0: 00000000004b52fd -> 00000000004f82c0 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 1: ffffffff8220137c -> 00000000004b52f0 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 2: 000000000041d1c4 -> 00000000004b52f0 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 3: 00000000004e7ead -> 000000000041d1b0 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 4: 00000000004e7f91 -> 00000000004e7ead 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 5: 00000000004e7ea8 -> 00000000004e7f70 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 6: 00000000004e7e52 -> 00000000004e7e98 0 cycles M 0 SPEC_CORRECT_PATH ..... 7: 00000000004e7e1f -> 00000000004e7e40 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 8: 00000000004e7f60 -> 00000000004e7df0 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 9: 00000000004e7f58 -> 00000000004e7f60 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 10: 000000000041d85d -> 00000000004e7f50 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 11: 000000000043306a -> 000000000041d840 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 12: ffffffff8220137c -> 0000000000433040 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 13: 000000000041e4a1 -> 0000000000433040 0 cycles P 0 NON_SPEC_CORRECT_PATH ..... 14: ffffffff8220137c -> 000000000041e490 0 cycles M 0 NON_SPEC_CORRECT_PATH ..... 15: 000000000041d89b -> 000000000041e487 0 cycles P 0 NON_SPEC_CORRECT_PATH ... thread: test_branch:7952 ...... dso: /data/sandipan/test_branch [...] With the addition of new branch flags, the "brstacksym" fields in perf script output now shows speculation information after the branch type. Change the regular expressions accordingly for the test to pass. Since branch speculation information may vary across platforms, the test does not look for specific values. E.g. $ perf test -v 110 Before: 110: Check branch stack sampling : --- start --- test child forked, pid 54154 Testing user branch stack sampling + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$ /tmp/__perf_test.program.AfhUI/perf.script + cleanup + rm -rf /tmp/__perf_test.program.AfhUI test child finished with -1 ---- end ---- Check branch stack sampling: FAILED! After: 110: Check branch stack sampling : --- start --- test child forked, pid 43716 Testing user branch stack sampling + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x66/brstack_foo+0x0/P/-/-/0/IND_CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_foo+0x1b/brstack_bar+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x58/brstack_foo+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x5d/brstack_bar+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bar+0x31/brstack_foo+0x20/P/-/-/0/RET/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_foo+0x36/brstack_bench+0x5d/P/-/-/0/RET/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack_bench+0x76/brstack_bench+0x7d/P/-/-/0/COND/NON_SPEC_CORRECT_PATH + grep -E -m1 ^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$ /tmp/__perf_test.program.xgzAi/perf.script brstack+0x5a/brstack+0x41/P/-/-/0/UNCOND/NON_SPEC_CORRECT_PATH + set +x Testing branch stack filtering permutation (any_call,CALL|IND_CALL|COND_CALL|SYSCALL|IRQ) Testing branch stack filtering permutation (call,CALL|SYSCALL) Testing branch stack filtering permutation (cond,COND) Testing branch stack filtering permutation (any_ret,RET|COND_RET|SYSRET|ERET) Testing branch stack filtering permutation (call,cond,CALL|SYSCALL|COND) Testing branch stack filtering permutation (any_call,cond,CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND) Testing branch stack filtering permutation (cond,any_call,any_ret,COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET) test child finished with 0 ---- end ---- Check branch stack sampling: Ok Signed-off-by: Sandipan Das Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Borislav Petkov Cc: Dave Hansen Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Thomas Richter Cc: x86@kernel.org Link: https://lore.kernel.org/r/048d67c9de3cc8e3dbf19aaa7ff718dec91364c5.1675333809.git.sandipan.das@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_brstack.sh | 18 +++++++++--------- tools/perf/util/session.c | 5 +++-- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh index 59195eb80052..1c49d8293003 100755 --- a/tools/perf/tests/shell/test_brstack.sh +++ b/tools/perf/tests/shell/test_brstack.sh @@ -30,14 +30,14 @@ test_user_branches() { # brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL set -x - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET$" $TMPDIR/perf.script - grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET$" $TMPDIR/perf.script - grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND$" $TMPDIR/perf.script - grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND/.*$" $TMPDIR/perf.script + grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND/.*$" $TMPDIR/perf.script set +x # some branch types are still not being tested: @@ -57,7 +57,7 @@ test_filter() { # fail if we find any branch type that doesn't match any of the expected ones # also consider UNKNOWN branch types (-) - if grep -E -vm1 "^[^ ]*/($expect|-|( *))$" $TMPDIR/perf.script; then + if grep -E -vm1 "^[^ ]*/($expect|-|( *))/.*$" $TMPDIR/perf.script; then return 1 fi } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index fdfe772f2699..749d5b5c135b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1180,7 +1180,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) struct branch_entry *e = &entries[i]; if (!callstack) { - printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s\n", + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x %s %s\n", i, e->from, e->to, (unsigned short)e->flags.cycles, e->flags.mispred ? "M" : " ", @@ -1188,7 +1188,8 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) e->flags.abort ? "A" : " ", e->flags.in_tx ? "T" : " ", (unsigned)e->flags.reserved, - get_branch_type(e)); + get_branch_type(e), + e->flags.spec ? branch_spec_desc(e->flags.spec) : ""); } else { if (i == 0) { printf("..... %2"PRIu64": %016" PRIx64 "\n" From 3241cd11d9a093783a97cea7136179206553fa86 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:31 -0800 Subject: [PATCH 077/114] perf jevents metric: Correct Function equality rhs may not be defined, say for source_count, so add a guard. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/metric.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py index 4797ed4fd817..2f2fd220e843 100644 --- a/tools/perf/pmu-events/metric.py +++ b/tools/perf/pmu-events/metric.py @@ -261,8 +261,10 @@ class Function(Expression): def Equals(self, other: Expression) -> bool: if isinstance(other, Function): - return self.fn == other.fn and self.lhs.Equals( - other.lhs) and self.rhs.Equals(other.rhs) + result = self.fn == other.fn and self.lhs.Equals(other.lhs) + if self.rhs: + result = result and self.rhs.Equals(other.rhs) + return result return False From 2efbb73d46eaf5618d9782648117ad635e6b1251 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:32 -0800 Subject: [PATCH 078/114] perf jevents metric: Add ability to rewrite metrics in terms of others Add RewriteMetricsInTermsOfOthers that iterates over pairs of names and expressions trying to replace an expression, within the current expression, with its name. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/metric.py | 73 +++++++++++++++++++++++++++- tools/perf/pmu-events/metric_test.py | 10 ++++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py index 2f2fd220e843..77ea6ff98538 100644 --- a/tools/perf/pmu-events/metric.py +++ b/tools/perf/pmu-events/metric.py @@ -4,7 +4,7 @@ import ast import decimal import json import re -from typing import Dict, List, Optional, Set, Union +from typing import Dict, List, Optional, Set, Tuple, Union class Expression: @@ -26,6 +26,9 @@ class Expression: """Returns true when two expressions are the same.""" raise NotImplementedError() + def Substitute(self, name: str, expression: 'Expression') -> 'Expression': + raise NotImplementedError() + def __str__(self) -> str: return self.ToPerfJson() @@ -186,6 +189,15 @@ class Operator(Expression): other.lhs) and self.rhs.Equals(other.rhs) return False + def Substitute(self, name: str, expression: Expression) -> Expression: + if self.Equals(expression): + return Event(name) + lhs = self.lhs.Substitute(name, expression) + rhs = None + if self.rhs: + rhs = self.rhs.Substitute(name, expression) + return Operator(self.operator, lhs, rhs) + class Select(Expression): """Represents a select ternary in the parse tree.""" @@ -225,6 +237,14 @@ class Select(Expression): other.false_val) and self.true_val.Equals(other.true_val) return False + def Substitute(self, name: str, expression: Expression) -> Expression: + if self.Equals(expression): + return Event(name) + true_val = self.true_val.Substitute(name, expression) + cond = self.cond.Substitute(name, expression) + false_val = self.false_val.Substitute(name, expression) + return Select(true_val, cond, false_val) + class Function(Expression): """A function in an expression like min, max, d_ratio.""" @@ -267,6 +287,15 @@ class Function(Expression): return result return False + def Substitute(self, name: str, expression: Expression) -> Expression: + if self.Equals(expression): + return Event(name) + lhs = self.lhs.Substitute(name, expression) + rhs = None + if self.rhs: + rhs = self.rhs.Substitute(name, expression) + return Function(self.fn, lhs, rhs) + def _FixEscapes(s: str) -> str: s = re.sub(r'([^\\]),', r'\1\\,', s) @@ -293,6 +322,9 @@ class Event(Expression): def Equals(self, other: Expression) -> bool: return isinstance(other, Event) and self.name == other.name + def Substitute(self, name: str, expression: Expression) -> Expression: + return self + class Constant(Expression): """A constant within the expression tree.""" @@ -317,6 +349,9 @@ class Constant(Expression): def Equals(self, other: Expression) -> bool: return isinstance(other, Constant) and self.value == other.value + def Substitute(self, name: str, expression: Expression) -> Expression: + return self + class Literal(Expression): """A runtime literal within the expression tree.""" @@ -336,6 +371,9 @@ class Literal(Expression): def Equals(self, other: Expression) -> bool: return isinstance(other, Literal) and self.value == other.value + def Substitute(self, name: str, expression: Expression) -> Expression: + return self + def min(lhs: Union[int, float, Expression], rhs: Union[int, float, Expression]) -> Function: @@ -461,6 +499,7 @@ class MetricGroup: class _RewriteIfExpToSelect(ast.NodeTransformer): + """Transformer to convert if-else nodes to Select expressions.""" def visit_IfExp(self, node): # pylint: disable=invalid-name @@ -498,7 +537,37 @@ def ParsePerfJson(orig: str) -> Expression: for kw in keywords: py = re.sub(rf'Event\(r"{kw}"\)', kw, py) - parsed = ast.parse(py, mode='eval') + try: + parsed = ast.parse(py, mode='eval') + except SyntaxError as e: + raise SyntaxError(f'Parsing expression:\n{orig}') from e _RewriteIfExpToSelect().visit(parsed) parsed = ast.fix_missing_locations(parsed) return _Constify(eval(compile(parsed, orig, 'eval'))) + + +def RewriteMetricsInTermsOfOthers(metrics: List[Tuple[str, Expression]] + )-> Dict[str, Expression]: + """Shorten metrics by rewriting in terms of others. + + Args: + metrics (list): pairs of metric names and their expressions. + Returns: + Dict: mapping from a metric name to a shortened expression. + """ + updates: Dict[str, Expression] = dict() + for outer_name, outer_expression in metrics: + updated = outer_expression + while True: + for inner_name, inner_expression in metrics: + if inner_name.lower() == outer_name.lower(): + continue + if inner_name in updates: + inner_expression = updates[inner_name] + updated = updated.Substitute(inner_name, inner_expression) + if updated.Equals(outer_expression): + break + if outer_name in updates and updated.Equals(updates[outer_name]): + break + updates[outer_name] = updated + return updates diff --git a/tools/perf/pmu-events/metric_test.py b/tools/perf/pmu-events/metric_test.py index 15315d0f716c..ced5998bd827 100644 --- a/tools/perf/pmu-events/metric_test.py +++ b/tools/perf/pmu-events/metric_test.py @@ -2,7 +2,9 @@ import unittest from metric import Constant from metric import Event +from metric import Expression from metric import ParsePerfJson +from metric import RewriteMetricsInTermsOfOthers class TestMetricExpressions(unittest.TestCase): @@ -153,5 +155,13 @@ class TestMetricExpressions(unittest.TestCase): after = '0 * SLOTS' self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after) + def test_RewriteMetricsInTermsOfOthers(self): + Expression.__eq__ = lambda e1, e2: e1.Equals(e2) + before = [('m1', ParsePerfJson('a + b + c + d')), + ('m2', ParsePerfJson('a + b + c'))] + after = {'m1': ParsePerfJson('m2 + d')} + self.assertEqual(RewriteMetricsInTermsOfOthers(before), after) + Expression.__eq__ = None + if __name__ == '__main__': unittest.main() From df5499ddb859bb848ddad5939d258da8a781af3a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:33 -0800 Subject: [PATCH 079/114] perf jevents: Rewrite metrics in the same file with each other Rewrite metrics within the same file in terms of each other. For example, on Power8 other_stall_cpi is rewritten from: "PM_CMPLU_STALL / PM_RUN_INST_CMPL - PM_CMPLU_STALL_BRU_CRU / PM_RUN_INST_CMPL - PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL - PM_CMPLU_STALL_VSU / PM_RUN_INST_CMPL - PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL - PM_CMPLU_STALL_NTCG_FLUSH / PM_RUN_INST_CMPL - PM_CMPLU_STALL_NO_NTF / PM_RUN_INST_CMPL" to: "stall_cpi - bru_cru_stall_cpi - fxu_stall_cpi - vsu_stall_cpi - lsu_stall_cpi - ntcg_flush_cpi - no_ntf_stall_cpi" Which more closely matches the definition on Power9. To avoid recomputation decorate the function with a cache. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 0416b7442171..15a1671740cc 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -3,6 +3,7 @@ """Convert directories of JSON events to C code.""" import argparse import csv +from functools import lru_cache import json import metric import os @@ -337,18 +338,28 @@ class JsonEvent: s = self.build_c_string() return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n' - +@lru_cache(maxsize=None) def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]: """Read json events from the specified file.""" - try: - result = json.load(open(path), object_hook=JsonEvent) + events = json.load(open(path), object_hook=JsonEvent) except BaseException as err: print(f"Exception processing {path}") raise - for event in result: + metrics: list[Tuple[str, metric.Expression]] = [] + for event in events: event.topic = topic - return result + if event.metric_name and '-' not in event.metric_name: + metrics.append((event.metric_name, event.metric_expr)) + updates = metric.RewriteMetricsInTermsOfOthers(metrics) + if updates: + for event in events: + if event.metric_name in updates: + # print(f'Updated {event.metric_name} from\n"{event.metric_expr}"\n' + # f'to\n"{updates[event.metric_name]}"') + event.metric_expr = updates[event.metric_name] + + return events def preprocess_arch_std_files(archpath: str) -> None: """Read in all architecture standard events.""" From db95818e888a927456686518880ed0145b1f20ce Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:34 -0800 Subject: [PATCH 080/114] perf pmu-events: Add separate metric from pmu_event Create a new pmu_metric for the metric related variables from pmu_event but that is initially just a clone of pmu_event. Add iterators for pmu_metric and use in places that metrics are desired rather than events. Make the event iterator skip metric only events, and the metric iterator skip event only events. Reviewed-by: John Garry Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/header.c | 4 +- tools/perf/pmu-events/empty-pmu-events.c | 49 ++++++- tools/perf/pmu-events/jevents.py | 62 ++++++++- tools/perf/pmu-events/pmu-events.h | 26 ++++ tools/perf/tests/pmu-events.c | 35 +++-- tools/perf/util/metricgroup.c | 161 +++++++++++------------ tools/perf/util/metricgroup.h | 2 +- 7 files changed, 228 insertions(+), 111 deletions(-) diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index e8fe36b10d20..78eef77d8a8d 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -40,11 +40,11 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return bufp; } -int arch_get_runtimeparam(const struct pmu_event *pe) +int arch_get_runtimeparam(const struct pmu_metric *pm) { int count; char path[PATH_MAX] = "/devices/hv_24x7/interface/"; - atoi(pe->aggr_mode) == PerChip ? strcat(path, "sockets") : strcat(path, "coresperchip"); + atoi(pm->aggr_mode) == PerChip ? strcat(path, "sockets") : strcat(path, "coresperchip"); return sysfs__read_int(path, &count) < 0 ? 1 : count; } diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c index 480e8f0d30c8..4e39d1a8d6d6 100644 --- a/tools/perf/pmu-events/empty-pmu-events.c +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -181,6 +181,11 @@ struct pmu_events_table { const struct pmu_event *entries; }; +/* Struct used to make the PMU metric table implementation opaque to callers. */ +struct pmu_metrics_table { + const struct pmu_metric *entries; +}; + /* * Map a CPU to its table of PMU events. The CPU is identified by the * cpuid field, which is an arch-specific identifier for the CPU. @@ -254,11 +259,29 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = { int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn, void *data) { - for (const struct pmu_event *pe = &table->entries[0]; - pe->name || pe->metric_group || pe->metric_name; - pe++) { - int ret = fn(pe, table, data); + for (const struct pmu_event *pe = &table->entries[0]; pe->name || pe->metric_expr; pe++) { + int ret; + if (!pe->name) + continue; + ret = fn(pe, table, data); + if (ret) + return ret; + } + return 0; +} + +int pmu_events_table_for_each_metric(const struct pmu_events_table *etable, pmu_metric_iter_fn fn, + void *data) +{ + struct pmu_metrics_table *table = (struct pmu_metrics_table *)etable; + + for (const struct pmu_metric *pm = &table->entries[0]; pm->name || pm->metric_expr; pm++) { + int ret; + + if (!pm->metric_expr) + continue; + ret = fn(pm, etable, data); if (ret) return ret; } @@ -305,11 +328,22 @@ const struct pmu_events_table *find_core_events_table(const char *arch, const ch } int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data) +{ + for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) { + int ret = pmu_events_table_for_each_event(&tables->table, fn, data); + + if (ret) + return ret; + } + return 0; +} + +int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data) { for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) { - int ret = pmu_events_table_for_each_event(&tables->table, fn, data); + int ret = pmu_events_table_for_each_metric(&tables->table, fn, data); if (ret) return ret; @@ -340,3 +374,8 @@ int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data) } return 0; } + +int pmu_for_each_sys_metric(pmu_metric_iter_fn fn __maybe_unused, void *data __maybe_unused) +{ + return 0; +} diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 15a1671740cc..858787a12302 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -564,7 +564,19 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = { \t}, }; -static void decompress(int offset, struct pmu_event *pe) +static void decompress_event(int offset, struct pmu_event *pe) +{ +\tconst char *p = &big_c_string[offset]; +""") + for attr in _json_event_attributes: + _args.output_file.write(f""" +\tpe->{attr} = (*p == '\\0' ? NULL : p); +""") + if attr == _json_event_attributes[-1]: + continue + _args.output_file.write('\twhile (*p++);') + _args.output_file.write("""} +static void decompress_metric(int offset, struct pmu_metric *pe) { \tconst char *p = &big_c_string[offset]; """) @@ -585,7 +597,9 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table, struct pmu_event pe; int ret; - decompress(table->entries[i].offset, &pe); + decompress_event(table->entries[i].offset, &pe); + if (!pe.name) + continue; ret = fn(&pe, table, data); if (ret) return ret; @@ -593,6 +607,24 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table, return 0; } +int pmu_events_table_for_each_metric(const struct pmu_events_table *table, + pmu_metric_iter_fn fn, + void *data) +{ + for (size_t i = 0; i < table->length; i++) { + struct pmu_metric pm; + int ret; + + decompress_metric(table->entries[i].offset, &pm); + if (!pm.metric_expr) + continue; + ret = fn(&pm, table, data); + if (ret) + return ret; + } + return 0; +} + const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu) { const struct pmu_events_table *table = NULL; @@ -644,6 +676,19 @@ int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data) return 0; } +int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data) +{ + for (const struct pmu_events_map *tables = &pmu_events_map[0]; + tables->arch; + tables++) { + int ret = pmu_events_table_for_each_metric(&tables->table, fn, data); + + if (ret) + return ret; + } + return 0; +} + const struct pmu_events_table *find_sys_events_table(const char *name) { for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; @@ -667,6 +712,19 @@ int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data) } return 0; } + +int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data) +{ + for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; + tables->name; + tables++) { + int ret = pmu_events_table_for_each_metric(&tables->table, fn, data); + + if (ret) + return ret; + } + return 0; +} """) diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index fe343c4d8016..45c0f508af23 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -30,20 +30,46 @@ struct pmu_event { const char *metric_constraint; }; +struct pmu_metric { + const char *name; + const char *compat; + const char *event; + const char *desc; + const char *topic; + const char *long_desc; + const char *pmu; + const char *unit; + const char *perpkg; + const char *aggr_mode; + const char *metric_expr; + const char *metric_name; + const char *metric_group; + const char *deprecated; + const char *metric_constraint; +}; + struct pmu_events_table; typedef int (*pmu_event_iter_fn)(const struct pmu_event *pe, const struct pmu_events_table *table, void *data); +typedef int (*pmu_metric_iter_fn)(const struct pmu_metric *pm, + const struct pmu_events_table *table, + void *data); + int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn, void *data); +int pmu_events_table_for_each_metric(const struct pmu_events_table *table, pmu_metric_iter_fn fn, + void *data); const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu); const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid); int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data); +int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data); const struct pmu_events_table *find_sys_events_table(const char *name); int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data); +int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data); #endif diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index a9f2330f6257..e5fb3d5a06c3 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -840,7 +840,7 @@ struct metric { struct metric_ref metric_ref; }; -static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_events_table *table, +static int test__parsing_callback(const struct pmu_metric *pm, const struct pmu_events_table *table, void *data) { int *failures = data; @@ -854,10 +854,10 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e }; int err = 0; - if (!pe->metric_expr) + if (!pm->metric_expr) return 0; - pr_debug("Found metric '%s'\n", pe->metric_name); + pr_debug("Found metric '%s'\n", pm->metric_name); (*failures)++; /* @@ -877,14 +877,14 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e perf_evlist__set_maps(&evlist->core, cpus, NULL); runtime_stat__init(&st); - err = metricgroup__parse_groups_test(evlist, table, pe->metric_name, + err = metricgroup__parse_groups_test(evlist, table, pm->metric_name, false, false, &metric_events); if (err) { - if (!strcmp(pe->metric_name, "M1") || !strcmp(pe->metric_name, "M2") || - !strcmp(pe->metric_name, "M3")) { + if (!strcmp(pm->metric_name, "M1") || !strcmp(pm->metric_name, "M2") || + !strcmp(pm->metric_name, "M3")) { (*failures)--; - pr_debug("Expected broken metric %s skipping\n", pe->metric_name); + pr_debug("Expected broken metric %s skipping\n", pm->metric_name); err = 0; } goto out_err; @@ -912,7 +912,7 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e struct metric_expr *mexp; list_for_each_entry (mexp, &me->head, nd) { - if (strcmp(mexp->metric_name, pe->metric_name)) + if (strcmp(mexp->metric_name, pm->metric_name)) continue; pr_debug("Result %f\n", test_generic_metric(mexp, 0, &st)); err = 0; @@ -921,11 +921,11 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e } } } - pr_debug("Didn't find parsed metric %s", pe->metric_name); + pr_debug("Didn't find parsed metric %s", pm->metric_name); err = 1; out_err: if (err) - pr_debug("Broken metric %s\n", pe->metric_name); + pr_debug("Broken metric %s\n", pm->metric_name); /* ... cleanup. */ metricgroup__rblist_exit(&metric_events); @@ -941,8 +941,8 @@ static int test__parsing(struct test_suite *test __maybe_unused, { int failures = 0; - pmu_for_each_core_event(test__parsing_callback, &failures); - pmu_for_each_sys_event(test__parsing_callback, &failures); + pmu_for_each_core_metric(test__parsing_callback, &failures); + pmu_for_each_sys_metric(test__parsing_callback, &failures); return failures == 0 ? TEST_OK : TEST_FAIL; } @@ -1021,14 +1021,11 @@ out: return ret; } -static int test__parsing_fake_callback(const struct pmu_event *pe, +static int test__parsing_fake_callback(const struct pmu_metric *pm, const struct pmu_events_table *table __maybe_unused, void *data __maybe_unused) { - if (!pe->metric_expr) - return 0; - - return metric_parse_fake(pe->metric_name, pe->metric_expr); + return metric_parse_fake(pm->metric_name, pm->metric_expr); } /* @@ -1047,11 +1044,11 @@ static int test__parsing_fake(struct test_suite *test __maybe_unused, return err; } - err = pmu_for_each_core_event(test__parsing_fake_callback, NULL); + err = pmu_for_each_core_metric(test__parsing_fake_callback, NULL); if (err) return err; - return pmu_for_each_sys_event(test__parsing_fake_callback, NULL); + return pmu_for_each_sys_metric(test__parsing_fake_callback, NULL); } static struct test_case pmu_events_tests[] = { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index b9c273ed080a..47fd02af66f1 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -167,14 +167,14 @@ static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); } -static bool metricgroup__has_constraint(const struct pmu_event *pe) +static bool metricgroup__has_constraint(const struct pmu_metric *pm) { - if (!pe->metric_constraint) + if (!pm->metric_constraint) return false; - if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && + if (!strcmp(pm->metric_constraint, "NO_NMI_WATCHDOG") && sysctl__nmi_watchdog_enabled()) { - metricgroup___watchdog_constraint_hint(pe->metric_name, false); + metricgroup___watchdog_constraint_hint(pm->metric_name, false); return true; } @@ -193,7 +193,7 @@ static void metric__free(struct metric *m) free(m); } -static struct metric *metric__new(const struct pmu_event *pe, +static struct metric *metric__new(const struct pmu_metric *pm, const char *modifier, bool metric_no_group, int runtime, @@ -210,15 +210,15 @@ static struct metric *metric__new(const struct pmu_event *pe, if (!m->pctx) goto out_err; - m->metric_name = pe->metric_name; + m->metric_name = pm->metric_name; m->modifier = NULL; if (modifier) { m->modifier = strdup(modifier); if (!m->modifier) goto out_err; } - m->metric_expr = pe->metric_expr; - m->metric_unit = pe->unit; + m->metric_expr = pm->metric_expr; + m->metric_unit = pm->unit; m->pctx->sctx.user_requested_cpu_list = NULL; if (user_requested_cpu_list) { m->pctx->sctx.user_requested_cpu_list = strdup(user_requested_cpu_list); @@ -227,7 +227,7 @@ static struct metric *metric__new(const struct pmu_event *pe, } m->pctx->sctx.runtime = runtime; m->pctx->sctx.system_wide = system_wide; - m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + m->has_constraint = metric_no_group || metricgroup__has_constraint(pm); m->metric_refs = NULL; m->evlist = NULL; @@ -348,10 +348,10 @@ static bool match_metric(const char *n, const char *list) return false; } -static bool match_pe_metric(const struct pmu_event *pe, const char *metric) +static bool match_pm_metric(const struct pmu_metric *pm, const char *metric) { - return match_metric(pe->metric_group, metric) || - match_metric(pe->metric_name, metric); + return match_metric(pm->metric_group, metric) || + match_metric(pm->metric_name, metric); } /** struct mep - RB-tree node for building printing information. */ @@ -420,13 +420,13 @@ static struct mep *mep_lookup(struct rblist *groups, const char *metric_group, return NULL; } -static int metricgroup__add_to_mep_groups(const struct pmu_event *pe, +static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm, struct rblist *groups) { const char *g; char *omg, *mg; - mg = strdup(pe->metric_group ?: "No_group"); + mg = strdup(pm->metric_group ?: "No_group"); if (!mg) return -ENOMEM; omg = mg; @@ -435,15 +435,15 @@ static int metricgroup__add_to_mep_groups(const struct pmu_event *pe, g = skip_spaces(g); if (strlen(g)) - me = mep_lookup(groups, g, pe->metric_name); + me = mep_lookup(groups, g, pm->metric_name); else - me = mep_lookup(groups, "No_group", pe->metric_name); + me = mep_lookup(groups, "No_group", pm->metric_name); if (me) { - me->metric_desc = pe->desc; - me->metric_long_desc = pe->long_desc; - me->metric_expr = pe->metric_expr; - me->metric_unit = pe->unit; + me->metric_desc = pm->desc; + me->metric_long_desc = pm->long_desc; + me->metric_expr = pm->metric_expr; + me->metric_unit = pm->unit; } } free(omg); @@ -452,40 +452,37 @@ static int metricgroup__add_to_mep_groups(const struct pmu_event *pe, } struct metricgroup_iter_data { - pmu_event_iter_fn fn; + pmu_metric_iter_fn fn; void *data; }; -static int metricgroup__sys_event_iter(const struct pmu_event *pe, +static int metricgroup__sys_event_iter(const struct pmu_metric *pm, const struct pmu_events_table *table, void *data) { struct metricgroup_iter_data *d = data; struct perf_pmu *pmu = NULL; - if (!pe->metric_expr || !pe->compat) + if (!pm->metric_expr || !pm->compat) return 0; while ((pmu = perf_pmu__scan(pmu))) { - if (!pmu->id || strcmp(pmu->id, pe->compat)) + if (!pmu->id || strcmp(pmu->id, pm->compat)) continue; - return d->fn(pe, table, d->data); + return d->fn(pm, table, d->data); } return 0; } -static int metricgroup__add_to_mep_groups_callback(const struct pmu_event *pe, +static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm, const struct pmu_events_table *table __maybe_unused, void *vdata) { struct rblist *groups = vdata; - if (!pe->metric_name) - return 0; - - return metricgroup__add_to_mep_groups(pe, groups); + return metricgroup__add_to_mep_groups(pm, groups); } void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) @@ -500,16 +497,16 @@ void metricgroup__print(const struct print_callbacks *print_cb, void *print_stat groups.node_delete = mep_delete; table = pmu_events_table__find(); if (table) { - pmu_events_table_for_each_event(table, - metricgroup__add_to_mep_groups_callback, - &groups); + pmu_events_table_for_each_metric(table, + metricgroup__add_to_mep_groups_callback, + &groups); } { struct metricgroup_iter_data data = { .fn = metricgroup__add_to_mep_groups_callback, .data = &groups, }; - pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); + pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data); } for (node = rb_first_cached(&groups.entries); node; node = next) { @@ -743,7 +740,7 @@ static int metricgroup__build_event_string(struct strbuf *events, #undef RETURN_IF_NON_ZERO } -int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused) +int __weak arch_get_runtimeparam(const struct pmu_metric *pm __maybe_unused) { return 1; } @@ -773,10 +770,10 @@ struct metricgroup_add_iter_data { static bool metricgroup__find_metric(const char *metric, const struct pmu_events_table *table, - struct pmu_event *pe); + struct pmu_metric *pm); static int add_metric(struct list_head *metric_list, - const struct pmu_event *pe, + const struct pmu_metric *pm, const char *modifier, bool metric_no_group, const char *user_requested_cpu_list, @@ -816,10 +813,10 @@ static int resolve_metric(struct list_head *metric_list, size_t bkt; struct to_resolve { /* The metric to resolve. */ - struct pmu_event pe; + struct pmu_metric pm; /* * The key in the IDs map, this may differ from in case, - * etc. from pe->metric_name. + * etc. from pm->metric_name. */ const char *key; } *pending = NULL; @@ -830,15 +827,15 @@ static int resolve_metric(struct list_head *metric_list, * the pending array. */ hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { - struct pmu_event pe; + struct pmu_metric pm; - if (metricgroup__find_metric(cur->pkey, table, &pe)) { + if (metricgroup__find_metric(cur->pkey, table, &pm)) { pending = realloc(pending, (pending_cnt + 1) * sizeof(struct to_resolve)); if (!pending) return -ENOMEM; - memcpy(&pending[pending_cnt].pe, &pe, sizeof(pe)); + memcpy(&pending[pending_cnt].pm, &pm, sizeof(pm)); pending[pending_cnt].key = cur->pkey; pending_cnt++; } @@ -853,7 +850,7 @@ static int resolve_metric(struct list_head *metric_list, * context. */ for (i = 0; i < pending_cnt; i++) { - ret = add_metric(metric_list, &pending[i].pe, modifier, metric_no_group, + ret = add_metric(metric_list, &pending[i].pm, modifier, metric_no_group, user_requested_cpu_list, system_wide, root_metric, visited, table); if (ret) @@ -867,7 +864,7 @@ static int resolve_metric(struct list_head *metric_list, /** * __add_metric - Add a metric to metric_list. * @metric_list: The list the metric is added to. - * @pe: The pmu_event containing the metric to be added. + * @pm: The pmu_metric containing the metric to be added. * @modifier: if non-null event modifiers like "u". * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the @@ -884,7 +881,7 @@ static int resolve_metric(struct list_head *metric_list, * architecture perf is running upon. */ static int __add_metric(struct list_head *metric_list, - const struct pmu_event *pe, + const struct pmu_metric *pm, const char *modifier, bool metric_no_group, int runtime, @@ -898,13 +895,13 @@ static int __add_metric(struct list_head *metric_list, int ret; bool is_root = !root_metric; struct visited_metric visited_node = { - .name = pe->metric_name, + .name = pm->metric_name, .parent = visited, }; for (vm = visited; vm; vm = vm->parent) { - if (!strcmp(pe->metric_name, vm->name)) { - pr_err("failed: recursion detected for %s\n", pe->metric_name); + if (!strcmp(pm->metric_name, vm->name)) { + pr_err("failed: recursion detected for %s\n", pm->metric_name); return -1; } } @@ -914,7 +911,7 @@ static int __add_metric(struct list_head *metric_list, * This metric is the root of a tree and may reference other * metrics that are added recursively. */ - root_metric = metric__new(pe, modifier, metric_no_group, runtime, + root_metric = metric__new(pm, modifier, metric_no_group, runtime, user_requested_cpu_list, system_wide); if (!root_metric) return -ENOMEM; @@ -929,7 +926,7 @@ static int __add_metric(struct list_head *metric_list, */ if (root_metric->metric_refs) { for (; root_metric->metric_refs[cnt].metric_name; cnt++) { - if (!strcmp(pe->metric_name, + if (!strcmp(pm->metric_name, root_metric->metric_refs[cnt].metric_name)) return 0; } @@ -947,8 +944,8 @@ static int __add_metric(struct list_head *metric_list, * need to change them, so there's no need to create * our own copy. */ - root_metric->metric_refs[cnt].metric_name = pe->metric_name; - root_metric->metric_refs[cnt].metric_expr = pe->metric_expr; + root_metric->metric_refs[cnt].metric_name = pm->metric_name; + root_metric->metric_refs[cnt].metric_expr = pm->metric_expr; /* Null terminate array. */ root_metric->metric_refs[cnt+1].metric_name = NULL; @@ -959,7 +956,7 @@ static int __add_metric(struct list_head *metric_list, * For both the parent and referenced metrics, we parse * all the metric's IDs and add it to the root context. */ - if (expr__find_ids(pe->metric_expr, NULL, root_metric->pctx) < 0) { + if (expr__find_ids(pm->metric_expr, NULL, root_metric->pctx) < 0) { /* Broken metric. */ ret = -EINVAL; } else { @@ -981,37 +978,37 @@ static int __add_metric(struct list_head *metric_list, struct metricgroup__find_metric_data { const char *metric; - struct pmu_event *pe; + struct pmu_metric *pm; }; -static int metricgroup__find_metric_callback(const struct pmu_event *pe, +static int metricgroup__find_metric_callback(const struct pmu_metric *pm, const struct pmu_events_table *table __maybe_unused, void *vdata) { struct metricgroup__find_metric_data *data = vdata; - if (!match_metric(pe->metric_name, data->metric)) + if (!match_metric(pm->metric_name, data->metric)) return 0; - memcpy(data->pe, pe, sizeof(*pe)); + memcpy(data->pm, pm, sizeof(*pm)); return 1; } static bool metricgroup__find_metric(const char *metric, const struct pmu_events_table *table, - struct pmu_event *pe) + struct pmu_metric *pm) { struct metricgroup__find_metric_data data = { .metric = metric, - .pe = pe, + .pm = pm, }; - return pmu_events_table_for_each_event(table, metricgroup__find_metric_callback, &data) + return pmu_events_table_for_each_metric(table, metricgroup__find_metric_callback, &data) ? true : false; } static int add_metric(struct list_head *metric_list, - const struct pmu_event *pe, + const struct pmu_metric *pm, const char *modifier, bool metric_no_group, const char *user_requested_cpu_list, @@ -1022,16 +1019,16 @@ static int add_metric(struct list_head *metric_list, { int ret = 0; - pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); + pr_debug("metric expr %s for %s\n", pm->metric_expr, pm->metric_name); - if (!strstr(pe->metric_expr, "?")) { - ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0, + if (!strstr(pm->metric_expr, "?")) { + ret = __add_metric(metric_list, pm, modifier, metric_no_group, 0, user_requested_cpu_list, system_wide, root_metric, visited, table); } else { int j, count; - count = arch_get_runtimeparam(pe); + count = arch_get_runtimeparam(pm); /* This loop is added to create multiple * events depend on count value and add @@ -1039,7 +1036,7 @@ static int add_metric(struct list_head *metric_list, */ for (j = 0; j < count && !ret; j++) - ret = __add_metric(metric_list, pe, modifier, metric_no_group, j, + ret = __add_metric(metric_list, pm, modifier, metric_no_group, j, user_requested_cpu_list, system_wide, root_metric, visited, table); } @@ -1047,17 +1044,17 @@ static int add_metric(struct list_head *metric_list, return ret; } -static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, +static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm, const struct pmu_events_table *table __maybe_unused, void *data) { struct metricgroup_add_iter_data *d = data; int ret; - if (!match_pe_metric(pe, d->metric_name)) + if (!match_pm_metric(pm, d->metric_name)) return 0; - ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group, + ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group, d->user_requested_cpu_list, d->system_wide, d->root_metric, d->visited, d->table); if (ret) @@ -1107,19 +1104,19 @@ struct metricgroup__add_metric_data { bool has_match; }; -static int metricgroup__add_metric_callback(const struct pmu_event *pe, +static int metricgroup__add_metric_callback(const struct pmu_metric *pm, const struct pmu_events_table *table, void *vdata) { struct metricgroup__add_metric_data *data = vdata; int ret = 0; - if (pe->metric_expr && - (match_metric(pe->metric_group, data->metric_name) || - match_metric(pe->metric_name, data->metric_name))) { + if (pm->metric_expr && + (match_metric(pm->metric_group, data->metric_name) || + match_metric(pm->metric_name, data->metric_name))) { data->has_match = true; - ret = add_metric(data->list, pe, data->modifier, data->metric_no_group, + ret = add_metric(data->list, pm, data->modifier, data->metric_no_group, data->user_requested_cpu_list, data->system_wide, /*root_metric=*/NULL, /*visited_metrics=*/NULL, table); } @@ -1166,8 +1163,8 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier * Iterate over all metrics seeing if metric matches either the * name or group. When it does add the metric to the list. */ - ret = pmu_events_table_for_each_event(table, metricgroup__add_metric_callback, - &data); + ret = pmu_events_table_for_each_metric(table, metricgroup__add_metric_callback, + &data); if (ret) goto out; @@ -1189,7 +1186,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier }, }; - pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); + pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data); } /* End of pmu events. */ if (!has_match) @@ -1603,16 +1600,16 @@ int metricgroup__parse_groups_test(struct evlist *evlist, &perf_pmu__fake, metric_events, table); } -static int metricgroup__has_metric_callback(const struct pmu_event *pe, +static int metricgroup__has_metric_callback(const struct pmu_metric *pm, const struct pmu_events_table *table __maybe_unused, void *vdata) { const char *metric = vdata; - if (!pe->metric_expr) + if (!pm->metric_expr) return 0; - if (match_metric(pe->metric_name, metric)) + if (match_metric(pm->metric_name, metric)) return 1; return 0; @@ -1625,8 +1622,8 @@ bool metricgroup__has_metric(const char *metric) if (!table) return false; - return pmu_events_table_for_each_event(table, metricgroup__has_metric_callback, - (void *)metric) ? true : false; + return pmu_events_table_for_each_metric(table, metricgroup__has_metric_callback, + (void *)metric) ? true : false; } int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 0013cf582173..b1f186d0f514 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -81,7 +81,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); bool metricgroup__has_metric(const char *metric); -int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused); +int arch_get_runtimeparam(const struct pmu_metric *pm); void metricgroup__rblist_exit(struct rblist *metric_events); int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, From 96d2a74618e35c67b1c2245bb927b7d7fbdae526 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:35 -0800 Subject: [PATCH 081/114] perf pmu-events: Separate the metrics from events for no jevents Separate the event and metric table when building without jevents. Add find_core_metrics_table and perf_pmu__find_metrics_table while renaming existing utilities to be event specific, so that users can find the right table for their need. Committer notes: Fix the build on aarch64 with: tools/perf/arch/arm64/util/pmu.c @@ -32,7 +32,7 @@ const struct pmu_events_table *pmu_events_table__find(void) - return perf_pmu__find_table(pmu); + return perf_pmu__find_events_table(pmu); Reviewed-by: John Garry Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/pmu.c | 2 +- tools/perf/pmu-events/empty-pmu-events.c | 88 ++++++++++++++++++------ tools/perf/pmu-events/jevents.py | 7 +- tools/perf/pmu-events/pmu-events.h | 4 +- tools/perf/tests/expand-cgroup.c | 2 +- tools/perf/tests/parse-metric.c | 2 +- tools/perf/util/pmu.c | 4 +- 7 files changed, 80 insertions(+), 29 deletions(-) diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 801bf52e2ea6..b4eaf00ec5a8 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -32,7 +32,7 @@ const struct pmu_events_table *pmu_events_table__find(void) struct perf_pmu *pmu = pmu__find_core_pmu(); if (pmu) - return perf_pmu__find_table(pmu); + return perf_pmu__find_events_table(pmu); return NULL; } diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c index 4e39d1a8d6d6..10bd4943ebf8 100644 --- a/tools/perf/pmu-events/empty-pmu-events.c +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -11,7 +11,7 @@ #include #include -static const struct pmu_event pme_test_soc_cpu[] = { +static const struct pmu_event pmu_events__test_soc_cpu[] = { { .name = "l3_cache_rd", .event = "event=0x40", @@ -105,6 +105,14 @@ static const struct pmu_event pme_test_soc_cpu[] = { .desc = "L2 BTB Correction", .topic = "branch", }, + { + .name = 0, + .event = 0, + .desc = 0, + }, +}; + +static const struct pmu_metric pmu_metrics__test_soc_cpu[] = { { .metric_expr = "1 / IPC", .metric_name = "CPI", @@ -170,9 +178,8 @@ static const struct pmu_event pme_test_soc_cpu[] = { .metric_name = "L1D_Cache_Fill_BW", }, { - .name = 0, - .event = 0, - .desc = 0, + .metric_expr = 0, + .metric_name = 0, }, }; @@ -197,7 +204,8 @@ struct pmu_metrics_table { struct pmu_events_map { const char *arch; const char *cpuid; - const struct pmu_events_table table; + const struct pmu_events_table event_table; + const struct pmu_metrics_table metric_table; }; /* @@ -208,12 +216,14 @@ static const struct pmu_events_map pmu_events_map[] = { { .arch = "testarch", .cpuid = "testcpu", - .table = { pme_test_soc_cpu }, + .event_table = { pmu_events__test_soc_cpu }, + .metric_table = { pmu_metrics__test_soc_cpu }, }, { .arch = 0, .cpuid = 0, - .table = { 0 }, + .event_table = { 0 }, + .metric_table = { 0 }, }, }; @@ -259,12 +269,9 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = { int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn, void *data) { - for (const struct pmu_event *pe = &table->entries[0]; pe->name || pe->metric_expr; pe++) { - int ret; + for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) { + int ret = fn(pe, table, data); - if (!pe->name) - continue; - ret = fn(pe, table, data); if (ret) return ret; } @@ -276,19 +283,16 @@ int pmu_events_table_for_each_metric(const struct pmu_events_table *etable, pmu_ { struct pmu_metrics_table *table = (struct pmu_metrics_table *)etable; - for (const struct pmu_metric *pm = &table->entries[0]; pm->name || pm->metric_expr; pm++) { - int ret; + for (const struct pmu_metric *pm = &table->entries[0]; pm->metric_expr; pm++) { + int ret = fn(pm, etable, data); - if (!pm->metric_expr) - continue; - ret = fn(pm, etable, data); if (ret) return ret; } return 0; } -const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu) +const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) { const struct pmu_events_table *table = NULL; char *cpuid = perf_pmu__getcpuid(pmu); @@ -308,7 +312,35 @@ const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu) break; if (!strcmp_cpuid_str(map->cpuid, cpuid)) { - table = &map->table; + table = &map->event_table; + break; + } + } + free(cpuid); + return table; +} + +const struct pmu_events_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) +{ + const struct pmu_events_table *table = NULL; + char *cpuid = perf_pmu__getcpuid(pmu); + int i; + + /* on some platforms which uses cpus map, cpuid can be NULL for + * PMUs other than CORE PMUs. + */ + if (!cpuid) + return NULL; + + i = 0; + for (;;) { + const struct pmu_events_map *map = &pmu_events_map[i++]; + + if (!map->cpuid) + break; + + if (!strcmp_cpuid_str(map->cpuid, cpuid)) { + table = (const struct pmu_events_table *)&map->metric_table; break; } } @@ -322,7 +354,18 @@ const struct pmu_events_table *find_core_events_table(const char *arch, const ch tables->arch; tables++) { if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid)) - return &tables->table; + return &tables->event_table; + } + return NULL; +} + +const struct pmu_events_table *find_core_metrics_table(const char *arch, const char *cpuid) +{ + for (const struct pmu_events_map *tables = &pmu_events_map[0]; + tables->arch; + tables++) { + if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid)) + return (const struct pmu_events_table *)&tables->metric_table; } return NULL; } @@ -330,7 +373,7 @@ const struct pmu_events_table *find_core_events_table(const char *arch, const ch int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data) { for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) { - int ret = pmu_events_table_for_each_event(&tables->table, fn, data); + int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data); if (ret) return ret; @@ -343,7 +386,8 @@ int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data) for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) { - int ret = pmu_events_table_for_each_metric(&tables->table, fn, data); + int ret = pmu_events_table_for_each_metric( + (const struct pmu_events_table *)&tables->metric_table, fn, data); if (ret) return ret; diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 858787a12302..8df14ab14fcf 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -625,7 +625,7 @@ int pmu_events_table_for_each_metric(const struct pmu_events_table *table, return 0; } -const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu) +const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) { const struct pmu_events_table *table = NULL; char *cpuid = perf_pmu__getcpuid(pmu); @@ -663,6 +663,11 @@ const struct pmu_events_table *find_core_events_table(const char *arch, const ch return NULL; } +const struct pmu_events_table *find_core_metrics_table(const char *arch, const char *cpuid) +{ + return (struct pmu_events_table *)find_core_events_table(arch, cpuid); +} + int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data) { for (const struct pmu_events_map *tables = &pmu_events_map[0]; diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 45c0f508af23..e2cd3e61acef 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -63,8 +63,10 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_ev int pmu_events_table_for_each_metric(const struct pmu_events_table *table, pmu_metric_iter_fn fn, void *data); -const struct pmu_events_table *perf_pmu__find_table(struct perf_pmu *pmu); +const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu); +const struct pmu_events_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu); const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid); +const struct pmu_events_table *find_core_metrics_table(const char *arch, const char *cpuid); int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data); int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data); diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index 51fb5f34c1dd..4f3195b84463 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -186,7 +186,7 @@ static int expand_metric_events(void) TEST_ASSERT_VAL("failed to get evlist", evlist); rblist__init(&metric_events); - pme_test = find_core_events_table("testarch", "testcpu"); + pme_test = find_core_metrics_table("testarch", "testcpu"); ret = metricgroup__parse_groups_test(evlist, pme_test, metric_str, false, false, &metric_events); if (ret < 0) { diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 21b7ac00d798..6c527cd805fe 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -96,7 +96,7 @@ static int __compute_metric(const char *name, struct value *vals, runtime_stat__init(&st); /* Parse the metric into metric_events list. */ - pme_test = find_core_events_table("testarch", "testcpu"); + pme_test = find_core_metrics_table("testarch", "testcpu"); err = metricgroup__parse_groups_test(evlist, pme_test, name, false, false, &metric_events); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8abf5b8439a7..3a67b17b4a16 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -663,7 +663,7 @@ char *perf_pmu__getcpuid(struct perf_pmu *pmu) __weak const struct pmu_events_table *pmu_events_table__find(void) { - return perf_pmu__find_table(NULL); + return perf_pmu__find_events_table(NULL); } /* @@ -794,7 +794,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { const struct pmu_events_table *table; - table = perf_pmu__find_table(pmu); + table = perf_pmu__find_events_table(pmu); if (!table) return; From d9dc8874d6ce46ccb3a0761e1540927ea07408ea Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:36 -0800 Subject: [PATCH 082/114] perf pmu-events: Remove now unused event and metric variables Previous changes separated the uses of pmu_event and pmu_metric, however, both structures contained all the variables of event and metric. This change removes the event variables from metric and the metric variables from event. Note, this change removes the setting of evsel's metric_name/expr as these fields are no longer part of struct pmu_event. The metric remains but is no longer implicitly requested when the event is. This impacts a few Intel uncore events, however, as the ScaleUnit is shared by the event and the metric this utility is questionable. Also the MetricNames look broken (contain spaces) in some cases and when trying to use the functionality with '-e' the metrics fail but regular metrics with '-M' work. For example, on SkylakeX '-M' works: ``` $ perf stat -M LLC_MISSES.PCIE_WRITE -a sleep 1 Performance counter stats for 'system wide': 0 UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 # 57896.0 Bytes LLC_MISSES.PCIE_WRITE (49.84%) 7,174 UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 (49.85%) 0 UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 (50.16%) 63 UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 (50.15%) 1.004576381 seconds time elapsed ``` whilst the event '-e' version is broken even with --group/-g (fwiw, we should also remove -g [1]): ``` $ perf stat -g -e LLC_MISSES.PCIE_WRITE -g -a sleep 1 Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Add UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 event to groups to get metric expression for LLC_MISSES.PCIE_WRITE Performance counter stats for 'system wide': 27,316 Bytes LLC_MISSES.PCIE_WRITE 1.004505469 seconds time elapsed ``` The code also carries warnings where the user is supposed to select events for metrics [2] but given the lack of use of such a feature, let's clean the code and just remove. [1] https://lore.kernel.org/lkml/20220707195610.303254-1-irogers@google.com/ [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/stat-shadow.c?id=01b8957b738f42f96a130079bc951b3cc78c5b8a#n425 Reviewed-by: John Garry Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 20 ++--------------- tools/perf/pmu-events/jevents.py | 20 +++++++++++++---- tools/perf/pmu-events/pmu-events.h | 22 +++++-------------- tools/perf/tests/pmu-events.c | 27 ----------------------- tools/perf/util/parse-events.c | 2 -- tools/perf/util/pmu.c | 35 +++--------------------------- tools/perf/util/pmu.h | 9 -------- tools/perf/util/print-events.c | 32 +++++++-------------------- tools/perf/util/print-events.h | 3 +-- 9 files changed, 36 insertions(+), 134 deletions(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 137d73edb541..791f513ae5b4 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -99,8 +99,7 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi const char *scale_unit __maybe_unused, bool deprecated, const char *event_type_desc, const char *desc, const char *long_desc, - const char *encoding_desc, - const char *metric_name, const char *metric_expr) + const char *encoding_desc) { struct print_state *print_state = ps; int pos; @@ -159,10 +158,6 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi if (print_state->detailed && encoding_desc) { printf("%*s", 8, ""); wordwrap(encoding_desc, 8, pager_get_columns(), 0); - if (metric_name) - printf(" MetricName: %s", metric_name); - if (metric_expr) - printf(" MetricExpr: %s", metric_expr); putchar('\n'); } } @@ -308,8 +303,7 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic, const char *scale_unit, bool deprecated, const char *event_type_desc, const char *desc, const char *long_desc, - const char *encoding_desc, - const char *metric_name, const char *metric_expr) + const char *encoding_desc) { struct json_print_state *print_state = ps; bool need_sep = false; @@ -366,16 +360,6 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic, encoding_desc); need_sep = true; } - if (metric_name) { - fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", - metric_name); - need_sep = true; - } - if (metric_expr) { - fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", - metric_expr); - need_sep = true; - } printf("%s}", need_sep ? "\n" : ""); strbuf_release(&buf); } diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 8df14ab14fcf..4cdbf34b7298 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -37,6 +37,11 @@ _json_event_attributes = [ 'metric_constraint', 'metric_expr', 'long_desc' ] +# Attributes that are in pmu_metric rather than pmu_event. +_json_metric_attributes = [ + 'metric_name', 'metric_group', 'metric_constraint', 'metric_expr', 'desc', + 'long_desc', 'unit', 'compat', 'aggr_mode' +] def removesuffix(s: str, suffix: str) -> str: """Remove the suffix from a string @@ -569,21 +574,28 @@ static void decompress_event(int offset, struct pmu_event *pe) \tconst char *p = &big_c_string[offset]; """) for attr in _json_event_attributes: - _args.output_file.write(f""" + if attr in _json_metric_attributes and 'metric_' in attr: + _args.output_file.write(f'\n\t/* Skip {attr} */\n') + else: + _args.output_file.write(f""" \tpe->{attr} = (*p == '\\0' ? NULL : p); """) if attr == _json_event_attributes[-1]: continue _args.output_file.write('\twhile (*p++);') _args.output_file.write("""} -static void decompress_metric(int offset, struct pmu_metric *pe) + +static void decompress_metric(int offset, struct pmu_metric *pm) { \tconst char *p = &big_c_string[offset]; """) for attr in _json_event_attributes: - _args.output_file.write(f""" -\tpe->{attr} = (*p == '\\0' ? NULL : p); + if attr in _json_metric_attributes: + _args.output_file.write(f""" +\tpm->{attr} = (*p == '\\0' ? NULL : p); """) + else: + _args.output_file.write(f'\n\t/* Skip {attr} */\n') if attr == _json_event_attributes[-1]: continue _args.output_file.write('\twhile (*p++);') diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index e2cd3e61acef..dca32979d6a4 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -23,29 +23,19 @@ struct pmu_event { const char *unit; const char *perpkg; const char *aggr_mode; - const char *metric_expr; - const char *metric_name; - const char *metric_group; const char *deprecated; - const char *metric_constraint; }; struct pmu_metric { - const char *name; - const char *compat; - const char *event; - const char *desc; - const char *topic; - const char *long_desc; - const char *pmu; - const char *unit; - const char *perpkg; - const char *aggr_mode; - const char *metric_expr; const char *metric_name; const char *metric_group; - const char *deprecated; + const char *metric_expr; + const char *unit; + const char *compat; + const char *aggr_mode; const char *metric_constraint; + const char *desc; + const char *long_desc; }; struct pmu_events_table; diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index e5fb3d5a06c3..c2b3ada57cbc 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -337,36 +337,12 @@ static int compare_pmu_events(const struct pmu_event *e1, const struct pmu_event return -1; } - if (!is_same(e1->metric_expr, e2->metric_expr)) { - pr_debug2("testing event e1 %s: mismatched metric_expr, %s vs %s\n", - e1->name, e1->metric_expr, e2->metric_expr); - return -1; - } - - if (!is_same(e1->metric_name, e2->metric_name)) { - pr_debug2("testing event e1 %s: mismatched metric_name, %s vs %s\n", - e1->name, e1->metric_name, e2->metric_name); - return -1; - } - - if (!is_same(e1->metric_group, e2->metric_group)) { - pr_debug2("testing event e1 %s: mismatched metric_group, %s vs %s\n", - e1->name, e1->metric_group, e2->metric_group); - return -1; - } - if (!is_same(e1->deprecated, e2->deprecated)) { pr_debug2("testing event e1 %s: mismatched deprecated, %s vs %s\n", e1->name, e1->deprecated, e2->deprecated); return -1; } - if (!is_same(e1->metric_constraint, e2->metric_constraint)) { - pr_debug2("testing event e1 %s: mismatched metric_constant, %s vs %s\n", - e1->name, e1->metric_constraint, e2->metric_constraint); - return -1; - } - return 0; } @@ -432,9 +408,6 @@ static int test__pmu_event_table_core_callback(const struct pmu_event *pe, struct perf_pmu_test_event const **test_event_table; bool found = false; - if (!pe->name) - return 0; - if (pe->pmu) test_event_table = &uncore_events[0]; else diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 21cce83462b3..0336ff27c15f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1570,8 +1570,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; - evsel->metric_expr = info.metric_expr; - evsel->metric_name = info.metric_name; return 0; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 3a67b17b4a16..f8c214d8815f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -280,10 +280,6 @@ static void perf_pmu_update_alias(struct perf_pmu_alias *old, perf_pmu_assign_str(old->name, "long_desc", &old->long_desc, &newalias->long_desc); perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic); - perf_pmu_assign_str(old->name, "metric_expr", &old->metric_expr, - &newalias->metric_expr); - perf_pmu_assign_str(old->name, "metric_name", &old->metric_name, - &newalias->metric_name); perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str); old->scale = newalias->scale; old->per_pkg = newalias->per_pkg; @@ -299,8 +295,6 @@ void perf_pmu_free_alias(struct perf_pmu_alias *newalias) zfree(&newalias->long_desc); zfree(&newalias->topic); zfree(&newalias->str); - zfree(&newalias->metric_expr); - zfree(&newalias->metric_name); zfree(&newalias->pmu_name); parse_events_terms__purge(&newalias->terms); free(newalias); @@ -337,16 +331,13 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, int num; char newval[256]; char *long_desc = NULL, *topic = NULL, *unit = NULL, *perpkg = NULL, - *metric_expr = NULL, *metric_name = NULL, *deprecated = NULL, - *pmu_name = NULL; + *deprecated = NULL, *pmu_name = NULL; if (pe) { long_desc = (char *)pe->long_desc; topic = (char *)pe->topic; unit = (char *)pe->unit; perpkg = (char *)pe->perpkg; - metric_expr = (char *)pe->metric_expr; - metric_name = (char *)pe->metric_name; deprecated = (char *)pe->deprecated; pmu_name = (char *)pe->pmu; } @@ -401,8 +392,6 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_snapshot(alias, dir, name); } - alias->metric_expr = metric_expr ? strdup(metric_expr) : NULL; - alias->metric_name = metric_name ? strdup(metric_name): NULL; alias->desc = desc ? strdup(desc) : NULL; alias->long_desc = long_desc ? strdup(long_desc) : desc ? strdup(desc) : NULL; @@ -756,9 +745,6 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe, struct pmu_add_cpu_aliases_map_data *data = vdata; const char *pname = pe->pmu ? pe->pmu : data->cpu_name; - if (!pe->name) - return 0; - if (data->pmu->is_uncore && pmu_uncore_alias_match(pname, data->name)) goto new_alias; @@ -813,12 +799,6 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, struct pmu_sys_event_iter_data *idata = data; struct perf_pmu *pmu = idata->pmu; - if (!pe->name) { - if (pe->metric_group || pe->metric_name) - return 0; - return -EINVAL; - } - if (!pe->compat || !pe->pmu) return 0; @@ -1400,8 +1380,6 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, info->unit = NULL; info->scale = 0.0; info->snapshot = false; - info->metric_expr = NULL; - info->metric_name = NULL; list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); @@ -1417,8 +1395,6 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, if (alias->per_pkg) info->per_pkg = true; - info->metric_expr = alias->metric_expr; - info->metric_name = alias->metric_name; list_del_init(&term->list); parse_events_term__delete(term); @@ -1634,8 +1610,7 @@ void print_pmu_events(const struct print_callbacks *print_cb, void *print_state) for (j = 0; j < len; j++) { const char *name, *alias = NULL, *scale_unit = NULL, *desc = NULL, *long_desc = NULL, - *encoding_desc = NULL, *topic = NULL, - *metric_name = NULL, *metric_expr = NULL; + *encoding_desc = NULL, *topic = NULL; bool deprecated = false; size_t buf_used; @@ -1673,8 +1648,6 @@ void print_pmu_events(const struct print_callbacks *print_cb, void *print_state) buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, "%s/%s/", aliases[j].pmu->name, aliases[j].event->str) + 1; - metric_name = aliases[j].event->metric_name; - metric_expr = aliases[j].event->metric_expr; deprecated = aliases[j].event->deprecated; } print_cb->print_event(print_state, @@ -1687,9 +1660,7 @@ void print_pmu_events(const struct print_callbacks *print_cb, void *print_state) "Kernel PMU event", desc, long_desc, - encoding_desc, - metric_name, - metric_expr); + encoding_desc); } if (printed && pager_in_use()) printf("\n"); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 2bdc560f19c7..64c596a358cc 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -132,8 +132,6 @@ extern struct perf_pmu perf_pmu__fake; struct perf_pmu_info { const char *unit; - const char *metric_expr; - const char *metric_name; double scale; bool per_pkg; bool snapshot; @@ -187,13 +185,6 @@ struct perf_pmu_alias { * default. */ bool deprecated; - /** - * @metric_expr: A metric expression associated with an event. Doing - * this makes little sense due to scale and unit applying to both. - */ - char *metric_expr; - /** @metric_name: A name for the metric. unit applying to both. */ - char *metric_name; /** @pmu_name: The name copied from struct perf_pmu. */ char *pmu_name; }; diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 2646ae18d9f9..62e9ea7dcf40 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -101,9 +101,7 @@ void print_tracepoint_events(const struct print_callbacks *print_cb, void *print "Tracepoint event", /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); } free(dir_path); free(evt_namelist); @@ -195,9 +193,7 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) "SDT event", /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); free(evt_name); } @@ -255,9 +251,7 @@ int print_hwcache_events(const struct print_callbacks *print_cb, void *print_sta event_type_descriptors[PERF_TYPE_HW_CACHE], /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); } strlist__delete(evt_name_list); return 0; @@ -277,9 +271,7 @@ void print_tool_events(const struct print_callbacks *print_cb, void *print_state "Tool event", /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); } } @@ -331,9 +323,7 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta event_type_descriptors[type], /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); } strlist__delete(evt_name_list); } @@ -364,9 +354,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) event_type_descriptors[PERF_TYPE_RAW], /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); print_cb->print_event(print_state, /*topic=*/NULL, @@ -378,9 +366,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) event_type_descriptors[PERF_TYPE_RAW], "(see 'man perf-list' on how to encode it)", /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); print_cb->print_event(print_state, /*topic=*/NULL, @@ -392,9 +378,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) event_type_descriptors[PERF_TYPE_BREAKPOINT], /*desc=*/NULL, /*long_desc=*/NULL, - /*encoding_desc=*/NULL, - /*metric_name=*/NULL, - /*metric_expr=*/NULL); + /*encoding_desc=*/NULL); print_tracepoint_events(print_cb, print_state); diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index c237e53c4487..716dcf4b4859 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -16,8 +16,7 @@ struct print_callbacks { const char *scale_unit, bool deprecated, const char *event_type_desc, const char *desc, const char *long_desc, - const char *encoding_desc, - const char *metric_name, const char *metric_expr); + const char *encoding_desc); void (*print_metric)(void *print_state, const char *group, const char *name, From 6f8f98ab6c16101b0694ef7e70425ded9d7af30e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:37 -0800 Subject: [PATCH 083/114] perf stat: Remove evsel metric_name/expr Metrics are their own unit and these variables held broken metrics previously and now just hold the value NULL. Remove code that used these variables. Reviewed-by: John Garry Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 1 - tools/perf/util/cgroup.c | 1 - tools/perf/util/evsel.c | 2 - tools/perf/util/evsel.h | 2 - tools/perf/util/python.c | 7 --- tools/perf/util/stat-shadow.c | 112 ---------------------------------- tools/perf/util/stat.h | 1 - 7 files changed, 126 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9f3e4b257516..5d18a5a6f662 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2524,7 +2524,6 @@ int cmd_stat(int argc, const char **argv) &stat_config.metric_events); zfree(&metrics); } - perf_stat__collect_metric_expr(evsel_list); perf_stat__init_shadow_stats(); if (add_default_attributes()) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index cd978c240e0d..bfb13306d82c 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -481,7 +481,6 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, nr_cgroups++; if (metric_events) { - perf_stat__collect_metric_expr(tmp_list); if (metricgroup__copy_metric_events(tmp_list, cgrp, metric_events, &orig_metric_events) < 0) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 019e53db03b3..51e8ce6edddc 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -285,8 +285,6 @@ void evsel__init(struct evsel *evsel, evsel->sample_size = __evsel__sample_size(attr->sample_type); evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; - evsel->metric_expr = NULL; - evsel->metric_name = NULL; evsel->metric_events = NULL; evsel->per_pkg_mask = NULL; evsel->collect_stat = false; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d572be41b960..24cb807ef6ce 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -105,8 +105,6 @@ struct evsel { * metric fields are similar, but needs more care as they can have * references to other metric (evsel). */ - const char * metric_expr; - const char * metric_name; struct evsel **metric_events; struct evsel *metric_leader; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 9e5d881b0987..42e8b813d010 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -76,13 +76,6 @@ const char *perf_env__arch(struct perf_env *env __maybe_unused) return NULL; } -/* - * Add this one here not to drag util/stat-shadow.c - */ -void perf_stat__collect_metric_expr(struct evlist *evsel_list) -{ -} - /* * These ones are needed not to drag the PMU bandwagon, jevents generated * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index cadb2df23c87..35ea4813f468 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -346,114 +346,6 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } -static struct evsel *perf_stat__find_event(struct evlist *evsel_list, - const char *name) -{ - struct evsel *c2; - - evlist__for_each_entry (evsel_list, c2) { - if (!strcasecmp(c2->name, name) && !c2->collect_stat) - return c2; - } - return NULL; -} - -/* Mark MetricExpr target events and link events using them to them. */ -void perf_stat__collect_metric_expr(struct evlist *evsel_list) -{ - struct evsel *counter, *leader, **metric_events, *oc; - bool found; - struct expr_parse_ctx *ctx; - struct hashmap_entry *cur; - size_t bkt; - int i; - - ctx = expr__ctx_new(); - if (!ctx) { - pr_debug("expr__ctx_new failed"); - return; - } - evlist__for_each_entry(evsel_list, counter) { - bool invalid = false; - - leader = evsel__leader(counter); - if (!counter->metric_expr) - continue; - - expr__ctx_clear(ctx); - metric_events = counter->metric_events; - if (!metric_events) { - if (expr__find_ids(counter->metric_expr, - counter->name, - ctx) < 0) - continue; - - metric_events = calloc(sizeof(struct evsel *), - hashmap__size(ctx->ids) + 1); - if (!metric_events) { - expr__ctx_free(ctx); - return; - } - counter->metric_events = metric_events; - } - - i = 0; - hashmap__for_each_entry(ctx->ids, cur, bkt) { - const char *metric_name = cur->pkey; - - found = false; - if (leader) { - /* Search in group */ - for_each_group_member (oc, leader) { - if (!strcasecmp(oc->name, - metric_name) && - !oc->collect_stat) { - found = true; - break; - } - } - } - if (!found) { - /* Search ignoring groups */ - oc = perf_stat__find_event(evsel_list, - metric_name); - } - if (!oc) { - /* Deduping one is good enough to handle duplicated PMUs. */ - static char *printed; - - /* - * Adding events automatically would be difficult, because - * it would risk creating groups that are not schedulable. - * perf stat doesn't understand all the scheduling constraints - * of events. So we ask the user instead to add the missing - * events. - */ - if (!printed || - strcasecmp(printed, metric_name)) { - fprintf(stderr, - "Add %s event to groups to get metric expression for %s\n", - metric_name, - counter->name); - free(printed); - printed = strdup(metric_name); - } - invalid = true; - continue; - } - metric_events[i++] = oc; - oc->collect_stat = true; - } - metric_events[i] = NULL; - if (invalid) { - free(metric_events); - counter->metric_events = NULL; - counter->metric_expr = NULL; - } - } - expr__ctx_free(ctx); -} - static double runtime_stat_avg(struct runtime_stat *st, enum stat_type type, int map_idx, struct runtime_stat_data *rsd) @@ -1299,10 +1191,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, color = NULL; print_metric(config, ctxp, color, "%8.1f%%", "Core Bound", core_bound * 100.); - } else if (evsel->metric_expr) { - generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, - evsel->name, evsel->metric_name, NULL, 1, - map_idx, out, st); } else if (runtime_stat_n(st, STAT_NSECS, map_idx, &rsd) != 0) { char unit = ' '; char unit_buf[10] = "/sec"; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 499c3bf81333..b1c29156c560 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -257,7 +257,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct perf_stat_output_ctx *out, struct rblist *metric_events, struct runtime_stat *st); -void perf_stat__collect_metric_expr(struct evlist *); int evlist__alloc_stats(struct perf_stat_config *config, struct evlist *evlist, bool alloc_raw); From 9f587cc93fe98e8d1e6527e18685635e0155fd08 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:38 -0800 Subject: [PATCH 084/114] perf jevents: Combine table prefix and suffix writing Combine into a single function to simplify, in a later change, writing metrics separately. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.py | 36 +++++++++++++------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 4cdbf34b7298..5f8d490c7269 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -19,10 +19,10 @@ _sys_event_tables = [] # JsonEvent. Architecture standard events are in json files in the top # f'{_args.starting_dir}/{_args.arch}' directory. _arch_std_events = {} -# Track whether an events table is currently being defined and needs closing. -_close_table = False # Events to write out when the table is closed _pending_events = [] +# Name of table to be written out +_pending_events_tblname = None # Global BigCString shared by all structures. _bcs = None # Order specific JsonEvent attributes will be visited. @@ -378,24 +378,13 @@ def preprocess_arch_std_files(archpath: str) -> None: _arch_std_events[event.metric_name.lower()] = event -def print_events_table_prefix(tblname: str) -> None: - """Called when a new events table is started.""" - global _close_table - if _close_table: - raise IOError('Printing table prefix but last table has no suffix') - _args.output_file.write(f'static const struct compact_pmu_event {tblname}[] = {{\n') - _close_table = True - - def add_events_table_entries(item: os.DirEntry, topic: str) -> None: """Add contents of file to _pending_events table.""" - if not _close_table: - raise IOError('Table entries missing prefix') for e in read_json_events(item.path, topic): _pending_events.append(e) -def print_events_table_suffix() -> None: +def print_pending_events() -> None: """Optionally close events table.""" def event_cmp_key(j: JsonEvent) -> Tuple[bool, str, str, str, str]: @@ -407,17 +396,19 @@ def print_events_table_suffix() -> None: return (j.desc is not None, fix_none(j.topic), fix_none(j.name), fix_none(j.pmu), fix_none(j.metric_name)) - global _close_table - if not _close_table: + global _pending_events + if not _pending_events: return - global _pending_events + global _pending_events_tblname + _args.output_file.write( + f'static const struct compact_pmu_event {_pending_events_tblname}[] = {{\n') + for event in sorted(_pending_events, key=event_cmp_key): _args.output_file.write(event.to_c_string()) - _pending_events = [] + _pending_events = [] _args.output_file.write('};\n\n') - _close_table = False def get_topic(topic: str) -> str: if topic.endswith('metrics.json'): @@ -455,12 +446,13 @@ def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None: # model directory, reset topic if item.is_dir() and is_leaf_dir(item.path): - print_events_table_suffix() + print_pending_events() tblname = file_name_to_table_name(parents, item.name) if item.name == 'sys': _sys_event_tables.append(tblname) - print_events_table_prefix(tblname) + global _pending_events_tblname + _pending_events_tblname = tblname return # base dir or too deep @@ -809,7 +801,7 @@ struct compact_pmu_event { for arch in archs: arch_path = f'{_args.starting_dir}/{arch}' ftw(arch_path, [], process_one_file) - print_events_table_suffix() + print_pending_events() print_mapping_table(archs) print_system_mapping_table() From f8ea2c1524de42a6bae55cef5713d1229030324b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:39 -0800 Subject: [PATCH 085/114] perf pmu-events: Introduce pmu_metrics_table Add a metrics table that is just a cast from pmu_events_table. This changes the APIs so that event and metric usage of the underlying table is different. For the no jevents case the tables are already separate, later changes will separate the tables for the jevents case. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-10-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/pmu.c | 9 ++++ tools/perf/pmu-events/empty-pmu-events.c | 21 ++++----- tools/perf/pmu-events/jevents.py | 21 ++++++--- tools/perf/pmu-events/pmu-events.h | 10 +++-- tools/perf/tests/expand-cgroup.c | 2 +- tools/perf/tests/parse-metric.c | 2 +- tools/perf/tests/pmu-events.c | 5 ++- tools/perf/util/metricgroup.c | 54 ++++++++++++------------ tools/perf/util/metricgroup.h | 2 +- tools/perf/util/pmu.c | 5 +++ tools/perf/util/pmu.h | 1 + 11 files changed, 78 insertions(+), 54 deletions(-) diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index b4eaf00ec5a8..fa143acb4c8d 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -23,6 +23,15 @@ static struct perf_pmu *pmu__find_core_pmu(void) return pmu; } + return NULL; +} + +const struct pmu_metrics_table *pmu_metrics_table__find(void) +{ + struct perf_pmu *pmu = pmu__find_core_pmu(); + + if (pmu) + return perf_pmu__find_metrics_table(pmu); return NULL; } diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c index 10bd4943ebf8..a938b74cf487 100644 --- a/tools/perf/pmu-events/empty-pmu-events.c +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -278,13 +278,11 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_ev return 0; } -int pmu_events_table_for_each_metric(const struct pmu_events_table *etable, pmu_metric_iter_fn fn, - void *data) +int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn, + void *data) { - struct pmu_metrics_table *table = (struct pmu_metrics_table *)etable; - for (const struct pmu_metric *pm = &table->entries[0]; pm->metric_expr; pm++) { - int ret = fn(pm, etable, data); + int ret = fn(pm, table, data); if (ret) return ret; @@ -320,9 +318,9 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) return table; } -const struct pmu_events_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) +const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) { - const struct pmu_events_table *table = NULL; + const struct pmu_metrics_table *table = NULL; char *cpuid = perf_pmu__getcpuid(pmu); int i; @@ -340,7 +338,7 @@ const struct pmu_events_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu break; if (!strcmp_cpuid_str(map->cpuid, cpuid)) { - table = (const struct pmu_events_table *)&map->metric_table; + table = &map->metric_table; break; } } @@ -359,13 +357,13 @@ const struct pmu_events_table *find_core_events_table(const char *arch, const ch return NULL; } -const struct pmu_events_table *find_core_metrics_table(const char *arch, const char *cpuid) +const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid) { for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) { if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid)) - return (const struct pmu_events_table *)&tables->metric_table; + return &tables->metric_table; } return NULL; } @@ -386,8 +384,7 @@ int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data) for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) { - int ret = pmu_events_table_for_each_metric( - (const struct pmu_events_table *)&tables->metric_table, fn, data); + int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data); if (ret) return ret; diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 5f8d490c7269..d83cc94af51f 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -611,10 +611,12 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table, return 0; } -int pmu_events_table_for_each_metric(const struct pmu_events_table *table, +int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *mtable, pmu_metric_iter_fn fn, void *data) { + const struct pmu_events_table *table = (const struct pmu_events_table *)mtable; + for (size_t i = 0; i < table->length; i++) { struct pmu_metric pm; int ret; @@ -622,7 +624,7 @@ int pmu_events_table_for_each_metric(const struct pmu_events_table *table, decompress_metric(table->entries[i].offset, &pm); if (!pm.metric_expr) continue; - ret = fn(&pm, table, data); + ret = fn(&pm, mtable, data); if (ret) return ret; } @@ -656,6 +658,11 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) return table; } +const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) +{ + return (struct pmu_metrics_table *)perf_pmu__find_events_table(pmu); +} + const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid) { for (const struct pmu_events_map *tables = &pmu_events_map[0]; @@ -667,9 +674,9 @@ const struct pmu_events_table *find_core_events_table(const char *arch, const ch return NULL; } -const struct pmu_events_table *find_core_metrics_table(const char *arch, const char *cpuid) +const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid) { - return (struct pmu_events_table *)find_core_events_table(arch, cpuid); + return (struct pmu_metrics_table *)find_core_events_table(arch, cpuid); } int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data) @@ -690,7 +697,8 @@ int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data) for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) { - int ret = pmu_events_table_for_each_metric(&tables->table, fn, data); + int ret = pmu_metrics_table_for_each_metric( + (struct pmu_metrics_table *)&tables->table, fn, data); if (ret) return ret; @@ -727,7 +735,8 @@ int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data) for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; tables->name; tables++) { - int ret = pmu_events_table_for_each_metric(&tables->table, fn, data); + int ret = pmu_metrics_table_for_each_metric( + (struct pmu_metrics_table *)&tables->table, fn, data); if (ret) return ret; diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index dca32979d6a4..b7d4a66b8ad2 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -39,28 +39,30 @@ struct pmu_metric { }; struct pmu_events_table; +struct pmu_metrics_table; typedef int (*pmu_event_iter_fn)(const struct pmu_event *pe, const struct pmu_events_table *table, void *data); typedef int (*pmu_metric_iter_fn)(const struct pmu_metric *pm, - const struct pmu_events_table *table, + const struct pmu_metrics_table *table, void *data); int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn, void *data); -int pmu_events_table_for_each_metric(const struct pmu_events_table *table, pmu_metric_iter_fn fn, +int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn, void *data); const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu); -const struct pmu_events_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu); +const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu); const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid); -const struct pmu_events_table *find_core_metrics_table(const char *arch, const char *cpuid); +const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid); int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data); int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data); const struct pmu_events_table *find_sys_events_table(const char *name); +const struct pmu_metrics_table *find_sys_metrics_table(const char *name); int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data); int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data); diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index 4f3195b84463..672a27f37060 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -180,7 +180,7 @@ static int expand_metric_events(void) struct evlist *evlist; struct rblist metric_events; const char metric_str[] = "CPI"; - const struct pmu_events_table *pme_test; + const struct pmu_metrics_table *pme_test; evlist = evlist__new(); TEST_ASSERT_VAL("failed to get evlist", evlist); diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 6c527cd805fe..9fec6040950c 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -72,7 +72,7 @@ static int __compute_metric(const char *name, struct value *vals, struct rblist metric_events = { .nr_entries = 0, }; - const struct pmu_events_table *pme_test; + const struct pmu_metrics_table *pme_test; struct perf_cpu_map *cpus; struct runtime_stat st; struct evlist *evlist; diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index c2b3ada57cbc..9f2e385e0991 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -813,7 +813,8 @@ struct metric { struct metric_ref metric_ref; }; -static int test__parsing_callback(const struct pmu_metric *pm, const struct pmu_events_table *table, +static int test__parsing_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table, void *data) { int *failures = data; @@ -995,7 +996,7 @@ out: } static int test__parsing_fake_callback(const struct pmu_metric *pm, - const struct pmu_events_table *table __maybe_unused, + const struct pmu_metrics_table *table __maybe_unused, void *data __maybe_unused) { return metric_parse_fake(pm->metric_name, pm->metric_expr); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 47fd02af66f1..f3559be95541 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -457,7 +457,7 @@ struct metricgroup_iter_data { }; static int metricgroup__sys_event_iter(const struct pmu_metric *pm, - const struct pmu_events_table *table, + const struct pmu_metrics_table *table, void *data) { struct metricgroup_iter_data *d = data; @@ -477,8 +477,8 @@ static int metricgroup__sys_event_iter(const struct pmu_metric *pm, } static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm, - const struct pmu_events_table *table __maybe_unused, - void *vdata) + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) { struct rblist *groups = vdata; @@ -488,16 +488,16 @@ static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm, void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) { struct rblist groups; - const struct pmu_events_table *table; + const struct pmu_metrics_table *table; struct rb_node *node, *next; rblist__init(&groups); groups.node_new = mep_new; groups.node_cmp = mep_cmp; groups.node_delete = mep_delete; - table = pmu_events_table__find(); + table = pmu_metrics_table__find(); if (table) { - pmu_events_table_for_each_metric(table, + pmu_metrics_table_for_each_metric(table, metricgroup__add_to_mep_groups_callback, &groups); } @@ -765,11 +765,11 @@ struct metricgroup_add_iter_data { bool system_wide; struct metric *root_metric; const struct visited_metric *visited; - const struct pmu_events_table *table; + const struct pmu_metrics_table *table; }; static bool metricgroup__find_metric(const char *metric, - const struct pmu_events_table *table, + const struct pmu_metrics_table *table, struct pmu_metric *pm); static int add_metric(struct list_head *metric_list, @@ -780,7 +780,7 @@ static int add_metric(struct list_head *metric_list, bool system_wide, struct metric *root_metric, const struct visited_metric *visited, - const struct pmu_events_table *table); + const struct pmu_metrics_table *table); /** * resolve_metric - Locate metrics within the root metric and recursively add @@ -807,7 +807,7 @@ static int resolve_metric(struct list_head *metric_list, bool system_wide, struct metric *root_metric, const struct visited_metric *visited, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { struct hashmap_entry *cur; size_t bkt; @@ -889,7 +889,7 @@ static int __add_metric(struct list_head *metric_list, bool system_wide, struct metric *root_metric, const struct visited_metric *visited, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { const struct visited_metric *vm; int ret; @@ -982,7 +982,7 @@ struct metricgroup__find_metric_data { }; static int metricgroup__find_metric_callback(const struct pmu_metric *pm, - const struct pmu_events_table *table __maybe_unused, + const struct pmu_metrics_table *table __maybe_unused, void *vdata) { struct metricgroup__find_metric_data *data = vdata; @@ -995,7 +995,7 @@ static int metricgroup__find_metric_callback(const struct pmu_metric *pm, } static bool metricgroup__find_metric(const char *metric, - const struct pmu_events_table *table, + const struct pmu_metrics_table *table, struct pmu_metric *pm) { struct metricgroup__find_metric_data data = { @@ -1003,7 +1003,7 @@ static bool metricgroup__find_metric(const char *metric, .pm = pm, }; - return pmu_events_table_for_each_metric(table, metricgroup__find_metric_callback, &data) + return pmu_metrics_table_for_each_metric(table, metricgroup__find_metric_callback, &data) ? true : false; } @@ -1015,7 +1015,7 @@ static int add_metric(struct list_head *metric_list, bool system_wide, struct metric *root_metric, const struct visited_metric *visited, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { int ret = 0; @@ -1045,8 +1045,8 @@ static int add_metric(struct list_head *metric_list, } static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm, - const struct pmu_events_table *table __maybe_unused, - void *data) + const struct pmu_metrics_table *table __maybe_unused, + void *data) { struct metricgroup_add_iter_data *d = data; int ret; @@ -1105,7 +1105,7 @@ struct metricgroup__add_metric_data { }; static int metricgroup__add_metric_callback(const struct pmu_metric *pm, - const struct pmu_events_table *table, + const struct pmu_metrics_table *table, void *vdata) { struct metricgroup__add_metric_data *data = vdata; @@ -1143,7 +1143,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier const char *user_requested_cpu_list, bool system_wide, struct list_head *metric_list, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { LIST_HEAD(list); int ret; @@ -1163,7 +1163,7 @@ static int metricgroup__add_metric(const char *metric_name, const char *modifier * Iterate over all metrics seeing if metric matches either the * name or group. When it does add the metric to the list. */ - ret = pmu_events_table_for_each_metric(table, metricgroup__add_metric_callback, + ret = pmu_metrics_table_for_each_metric(table, metricgroup__add_metric_callback, &data); if (ret) goto out; @@ -1219,7 +1219,7 @@ out: static int metricgroup__add_metric_list(const char *list, bool metric_no_group, const char *user_requested_cpu_list, bool system_wide, struct list_head *metric_list, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { char *list_itr, *list_copy, *metric_name, *modifier; int ret, count = 0; @@ -1429,7 +1429,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, bool system_wide, struct perf_pmu *fake_pmu, struct rblist *metric_events_list, - const struct pmu_events_table *table) + const struct pmu_metrics_table *table) { struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); @@ -1577,7 +1577,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool system_wide, struct rblist *metric_events) { - const struct pmu_events_table *table = pmu_events_table__find(); + const struct pmu_metrics_table *table = pmu_metrics_table__find(); if (!table) return -EINVAL; @@ -1588,7 +1588,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, } int metricgroup__parse_groups_test(struct evlist *evlist, - const struct pmu_events_table *table, + const struct pmu_metrics_table *table, const char *str, bool metric_no_group, bool metric_no_merge, @@ -1601,7 +1601,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, } static int metricgroup__has_metric_callback(const struct pmu_metric *pm, - const struct pmu_events_table *table __maybe_unused, + const struct pmu_metrics_table *table __maybe_unused, void *vdata) { const char *metric = vdata; @@ -1617,12 +1617,12 @@ static int metricgroup__has_metric_callback(const struct pmu_metric *pm, bool metricgroup__has_metric(const char *metric) { - const struct pmu_events_table *table = pmu_events_table__find(); + const struct pmu_metrics_table *table = pmu_metrics_table__find(); if (!table) return false; - return pmu_events_table_for_each_metric(table, metricgroup__has_metric_callback, + return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback, (void *)metric) ? true : false; } diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index b1f186d0f514..84030321a057 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -73,7 +73,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool system_wide, struct rblist *metric_events); int metricgroup__parse_groups_test(struct evlist *evlist, - const struct pmu_events_table *table, + const struct pmu_metrics_table *table, const char *str, bool metric_no_group, bool metric_no_merge, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f8c214d8815f..c256b29defad 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -655,6 +655,11 @@ __weak const struct pmu_events_table *pmu_events_table__find(void) return perf_pmu__find_events_table(NULL); } +__weak const struct pmu_metrics_table *pmu_metrics_table__find(void) +{ + return perf_pmu__find_metrics_table(NULL); +} + /* * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name * to be valid. diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 64c596a358cc..6b770f17eb86 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -231,6 +231,7 @@ void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu, char *perf_pmu__getcpuid(struct perf_pmu *pmu); const struct pmu_events_table *pmu_events_table__find(void); +const struct pmu_metrics_table *pmu_metrics_table__find(void); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); From 62774db2a05dc878c83824afd8e6594ff277b91a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:40 -0800 Subject: [PATCH 086/114] perf jevents: Generate metrics and events as separate tables Turn a perf json event into an event, metric or both. This reduces the number of events needed to scan to find an event or metric. As events no longer need the relatively seldom used metric fields, 4 bytes is saved per event. This reduces the big C string's size by 335kb (14.8%) on x86. Note, for the test PMU architecture pme_test_soc_cpu is renamed pmu_events__test_soc_cpu for consistency with the event vs metric naming convention. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.py | 244 +++++++++++++++++++++++-------- tools/perf/tests/pmu-events.c | 3 +- 2 files changed, 189 insertions(+), 58 deletions(-) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index d83cc94af51f..627ee817f57f 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -13,28 +13,40 @@ import collections # Global command line arguments. _args = None +# List of regular event tables. +_event_tables = [] # List of event tables generated from "/sys" directories. _sys_event_tables = [] +# List of regular metric tables. +_metric_tables = [] +# List of metric tables generated from "/sys" directories. +_sys_metric_tables = [] +# Mapping between sys event table names and sys metric table names. +_sys_event_table_to_metric_table_mapping = {} # Map from an event name to an architecture standard # JsonEvent. Architecture standard events are in json files in the top # f'{_args.starting_dir}/{_args.arch}' directory. _arch_std_events = {} # Events to write out when the table is closed _pending_events = [] -# Name of table to be written out +# Name of events table to be written out _pending_events_tblname = None +# Metrics to write out when the table is closed +_pending_metrics = [] +# Name of metrics table to be written out +_pending_metrics_tblname = None # Global BigCString shared by all structures. _bcs = None # Order specific JsonEvent attributes will be visited. _json_event_attributes = [ # cmp_sevent related attributes. - 'name', 'pmu', 'topic', 'desc', 'metric_name', 'metric_group', + 'name', 'pmu', 'topic', 'desc', # Seems useful, put it early. 'event', # Short things in alphabetical order. 'aggr_mode', 'compat', 'deprecated', 'perpkg', 'unit', # Longer things (the last won't be iterated over during decompress). - 'metric_constraint', 'metric_expr', 'long_desc' + 'long_desc' ] # Attributes that are in pmu_metric rather than pmu_event. @@ -52,14 +64,16 @@ def removesuffix(s: str, suffix: str) -> str: return s[0:-len(suffix)] if s.endswith(suffix) else s -def file_name_to_table_name(parents: Sequence[str], dirname: str) -> str: +def file_name_to_table_name(prefix: str, parents: Sequence[str], + dirname: str) -> str: """Generate a C table name from directory names.""" - tblname = 'pme' + tblname = prefix for p in parents: tblname += '_' + p tblname += '_' + dirname return tblname.replace('-', '_') + def c_len(s: str) -> int: """Return the length of s a C string @@ -277,7 +291,7 @@ class JsonEvent: self.metric_constraint = jd.get('MetricConstraint') self.metric_expr = None if 'MetricExpr' in jd: - self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify() + self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify() arch_std = jd.get('ArchStdEvent') if precise and self.desc and '(Precise Event)' not in self.desc: @@ -326,23 +340,24 @@ class JsonEvent: s += f'\t{attr} = {value},\n' return s + '}' - def build_c_string(self) -> str: + def build_c_string(self, metric: bool) -> str: s = '' - for attr in _json_event_attributes: + for attr in _json_metric_attributes if metric else _json_event_attributes: x = getattr(self, attr) - if x and attr == 'metric_expr': + if metric and x and attr == 'metric_expr': # Convert parsed metric expressions into a string. Slashes # must be doubled in the file. x = x.ToPerfJson().replace('\\', '\\\\') s += f'{x}\\000' if x else '\\000' return s - def to_c_string(self) -> str: + def to_c_string(self, metric: bool) -> str: """Representation of the event as a C struct initializer.""" - s = self.build_c_string() + s = self.build_c_string(metric) return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n' + @lru_cache(maxsize=None) def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]: """Read json events from the specified file.""" @@ -381,7 +396,10 @@ def preprocess_arch_std_files(archpath: str) -> None: def add_events_table_entries(item: os.DirEntry, topic: str) -> None: """Add contents of file to _pending_events table.""" for e in read_json_events(item.path, topic): - _pending_events.append(e) + if e.name: + _pending_events.append(e) + if e.metric_name: + _pending_metrics.append(e) def print_pending_events() -> None: @@ -401,15 +419,54 @@ def print_pending_events() -> None: return global _pending_events_tblname + if _pending_events_tblname.endswith('_sys'): + global _sys_event_tables + _sys_event_tables.append(_pending_events_tblname) + else: + global event_tables + _event_tables.append(_pending_events_tblname) + _args.output_file.write( f'static const struct compact_pmu_event {_pending_events_tblname}[] = {{\n') for event in sorted(_pending_events, key=event_cmp_key): - _args.output_file.write(event.to_c_string()) + _args.output_file.write(event.to_c_string(metric=False)) _pending_events = [] _args.output_file.write('};\n\n') +def print_pending_metrics() -> None: + """Optionally close metrics table.""" + + def metric_cmp_key(j: JsonEvent) -> Tuple[bool, str, str]: + def fix_none(s: Optional[str]) -> str: + if s is None: + return '' + return s + + return (j.desc is not None, fix_none(j.pmu), fix_none(j.metric_name)) + + global _pending_metrics + if not _pending_metrics: + return + + global _pending_metrics_tblname + if _pending_metrics_tblname.endswith('_sys'): + global _sys_metric_tables + _sys_metric_tables.append(_pending_metrics_tblname) + else: + global metric_tables + _metric_tables.append(_pending_metrics_tblname) + + _args.output_file.write( + f'static const struct compact_pmu_event {_pending_metrics_tblname}[] = {{\n') + + for metric in sorted(_pending_metrics, key=metric_cmp_key): + _args.output_file.write(metric.to_c_string(metric=True)) + _pending_metrics = [] + + _args.output_file.write('};\n\n') + def get_topic(topic: str) -> str: if topic.endswith('metrics.json'): return 'metrics' @@ -432,12 +489,13 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None: topic = get_topic(item.name) for event in read_json_events(item.path, topic): - _bcs.add(event.build_c_string()) + if event.name: + _bcs.add(event.build_c_string(metric=False)) + if event.metric_name: + _bcs.add(event.build_c_string(metric=True)) def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None: """Process a JSON file during the main walk.""" - global _sys_event_tables - def is_leaf_dir(path: str) -> bool: for item in os.scandir(path): if item.is_dir(): @@ -447,12 +505,15 @@ def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None: # model directory, reset topic if item.is_dir() and is_leaf_dir(item.path): print_pending_events() + print_pending_metrics() - tblname = file_name_to_table_name(parents, item.name) - if item.name == 'sys': - _sys_event_tables.append(tblname) global _pending_events_tblname - _pending_events_tblname = tblname + _pending_events_tblname = file_name_to_table_name('pmu_events_', parents, item.name) + global _pending_metrics_tblname + _pending_metrics_tblname = file_name_to_table_name('pmu_metrics_', parents, item.name) + + if item.name == 'sys': + _sys_event_table_to_metric_table_mapping[_pending_events_tblname] = _pending_metrics_tblname return # base dir or too deep @@ -477,6 +538,12 @@ struct pmu_events_table { size_t length; }; +/* Struct used to make the PMU metric table implementation opaque to callers. */ +struct pmu_metrics_table { + const struct compact_pmu_event *entries; + size_t length; +}; + /* * Map a CPU to its table of PMU events. The CPU is identified by the * cpuid field, which is an arch-specific identifier for the CPU. @@ -488,7 +555,8 @@ struct pmu_events_table { struct pmu_events_map { const char *arch; const char *cpuid; - struct pmu_events_table table; + struct pmu_events_table event_table; + struct pmu_metrics_table metric_table; }; /* @@ -502,9 +570,13 @@ const struct pmu_events_map pmu_events_map[] = { _args.output_file.write("""{ \t.arch = "testarch", \t.cpuid = "testcpu", -\t.table = { -\t.entries = pme_test_soc_cpu, -\t.length = ARRAY_SIZE(pme_test_soc_cpu), +\t.event_table = { +\t\t.entries = pmu_events__test_soc_cpu, +\t\t.length = ARRAY_SIZE(pmu_events__test_soc_cpu), +\t}, +\t.metric_table = { +\t\t.entries = pmu_metrics__test_soc_cpu, +\t\t.length = ARRAY_SIZE(pmu_metrics__test_soc_cpu), \t} }, """) @@ -515,14 +587,29 @@ const struct pmu_events_map pmu_events_map[] = { for row in table: # Skip the first row or any row beginning with #. if not first and len(row) > 0 and not row[0].startswith('#'): - tblname = file_name_to_table_name([], row[2].replace('/', '_')) + event_tblname = file_name_to_table_name('pmu_events_', [], row[2].replace('/', '_')) + if event_tblname in _event_tables: + event_size = f'ARRAY_SIZE({event_tblname})' + else: + event_tblname = 'NULL' + event_size = '0' + metric_tblname = file_name_to_table_name('pmu_metrics_', [], row[2].replace('/', '_')) + if metric_tblname in _metric_tables: + metric_size = f'ARRAY_SIZE({metric_tblname})' + else: + metric_tblname = 'NULL' + metric_size = '0' cpuid = row[0].replace('\\', '\\\\') _args.output_file.write(f"""{{ \t.arch = "{arch}", \t.cpuid = "{cpuid}", -\t.table = {{ -\t\t.entries = {tblname}, -\t\t.length = ARRAY_SIZE({tblname}) +\t.event_table = {{ +\t\t.entries = {event_tblname}, +\t\t.length = {event_size} +\t}}, +\t.metric_table = {{ +\t\t.entries = {metric_tblname}, +\t\t.length = {metric_size} \t}} }}, """) @@ -531,7 +618,8 @@ const struct pmu_events_map pmu_events_map[] = { _args.output_file.write("""{ \t.arch = 0, \t.cpuid = 0, -\t.table = { 0, 0 }, +\t.event_table = { 0, 0 }, +\t.metric_table = { 0, 0 }, } }; """) @@ -542,14 +630,36 @@ def print_system_mapping_table() -> None: _args.output_file.write(""" struct pmu_sys_events { \tconst char *name; -\tstruct pmu_events_table table; +\tstruct pmu_events_table event_table; +\tstruct pmu_metrics_table metric_table; }; static const struct pmu_sys_events pmu_sys_event_tables[] = { """) + printed_metric_tables = [] for tblname in _sys_event_tables: _args.output_file.write(f"""\t{{ -\t\t.table = {{ +\t\t.event_table = {{ +\t\t\t.entries = {tblname}, +\t\t\t.length = ARRAY_SIZE({tblname}) +\t\t}},""") + metric_tblname = _sys_event_table_to_metric_table_mapping[tblname] + if metric_tblname in _sys_metric_tables: + _args.output_file.write(f""" +\t\t.metric_table = {{ +\t\t\t.entries = {metric_tblname}, +\t\t\t.length = ARRAY_SIZE({metric_tblname}) +\t\t}},""") + printed_metric_tables.append(metric_tblname) + _args.output_file.write(f""" +\t\t.name = \"{tblname}\", +\t}}, +""") + for tblname in _sys_metric_tables: + if tblname in printed_metric_tables: + continue + _args.output_file.write(f"""\t{{ +\t\t.metric_table = {{ \t\t\t.entries = {tblname}, \t\t\t.length = ARRAY_SIZE({tblname}) \t\t}}, @@ -557,7 +667,8 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = { \t}}, """) _args.output_file.write("""\t{ -\t\t.table = { 0, 0 } +\t\t.event_table = { 0, 0 }, +\t\t.metric_table = { 0, 0 }, \t}, }; @@ -566,10 +677,7 @@ static void decompress_event(int offset, struct pmu_event *pe) \tconst char *p = &big_c_string[offset]; """) for attr in _json_event_attributes: - if attr in _json_metric_attributes and 'metric_' in attr: - _args.output_file.write(f'\n\t/* Skip {attr} */\n') - else: - _args.output_file.write(f""" + _args.output_file.write(f""" \tpe->{attr} = (*p == '\\0' ? NULL : p); """) if attr == _json_event_attributes[-1]: @@ -581,14 +689,11 @@ static void decompress_metric(int offset, struct pmu_metric *pm) { \tconst char *p = &big_c_string[offset]; """) - for attr in _json_event_attributes: - if attr in _json_metric_attributes: - _args.output_file.write(f""" + for attr in _json_metric_attributes: + _args.output_file.write(f""" \tpm->{attr} = (*p == '\\0' ? NULL : p); """) - else: - _args.output_file.write(f'\n\t/* Skip {attr} */\n') - if attr == _json_event_attributes[-1]: + if attr == _json_metric_attributes[-1]: continue _args.output_file.write('\twhile (*p++);') _args.output_file.write("""} @@ -611,12 +716,10 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table, return 0; } -int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *mtable, +int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn, void *data) { - const struct pmu_events_table *table = (const struct pmu_events_table *)mtable; - for (size_t i = 0; i < table->length; i++) { struct pmu_metric pm; int ret; @@ -624,7 +727,7 @@ int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *mtable, decompress_metric(table->entries[i].offset, &pm); if (!pm.metric_expr) continue; - ret = fn(&pm, mtable, data); + ret = fn(&pm, table, data); if (ret) return ret; } @@ -650,7 +753,7 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) break; if (!strcmp_cpuid_str(map->cpuid, cpuid)) { - table = &map->table; + table = &map->event_table; break; } } @@ -660,7 +763,29 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) { - return (struct pmu_metrics_table *)perf_pmu__find_events_table(pmu); + const struct pmu_metrics_table *table = NULL; + char *cpuid = perf_pmu__getcpuid(pmu); + int i; + + /* on some platforms which uses cpus map, cpuid can be NULL for + * PMUs other than CORE PMUs. + */ + if (!cpuid) + return NULL; + + i = 0; + for (;;) { + const struct pmu_events_map *map = &pmu_events_map[i++]; + if (!map->arch) + break; + + if (!strcmp_cpuid_str(map->cpuid, cpuid)) { + table = &map->metric_table; + break; + } + } + free(cpuid); + return table; } const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid) @@ -669,14 +794,20 @@ const struct pmu_events_table *find_core_events_table(const char *arch, const ch tables->arch; tables++) { if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid)) - return &tables->table; + return &tables->event_table; } return NULL; } const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid) { - return (struct pmu_metrics_table *)find_core_events_table(arch, cpuid); + for (const struct pmu_events_map *tables = &pmu_events_map[0]; + tables->arch; + tables++) { + if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid)) + return &tables->metric_table; + } + return NULL; } int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data) @@ -684,7 +815,7 @@ int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data) for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) { - int ret = pmu_events_table_for_each_event(&tables->table, fn, data); + int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data); if (ret) return ret; @@ -697,8 +828,7 @@ int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data) for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) { - int ret = pmu_metrics_table_for_each_metric( - (struct pmu_metrics_table *)&tables->table, fn, data); + int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data); if (ret) return ret; @@ -712,7 +842,7 @@ const struct pmu_events_table *find_sys_events_table(const char *name) tables->name; tables++) { if (!strcmp(tables->name, name)) - return &tables->table; + return &tables->event_table; } return NULL; } @@ -722,7 +852,7 @@ int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data) for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; tables->name; tables++) { - int ret = pmu_events_table_for_each_event(&tables->table, fn, data); + int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data); if (ret) return ret; @@ -735,8 +865,7 @@ int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data) for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; tables->name; tables++) { - int ret = pmu_metrics_table_for_each_metric( - (struct pmu_metrics_table *)&tables->table, fn, data); + int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data); if (ret) return ret; @@ -811,6 +940,7 @@ struct compact_pmu_event { arch_path = f'{_args.starting_dir}/{arch}' ftw(arch_path, [], process_one_file) print_pending_events() + print_pending_metrics() print_mapping_table(archs) print_system_mapping_table() diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 9f2e385e0991..962c3c0d53ba 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -469,7 +469,8 @@ static int test__pmu_event_table_sys_callback(const struct pmu_event *pe, static int test__pmu_event_table(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - const struct pmu_events_table *sys_event_table = find_sys_events_table("pme_test_soc_sys"); + const struct pmu_events_table *sys_event_table = + find_sys_events_table("pmu_events__test_soc_sys"); const struct pmu_events_table *table = find_core_events_table("testarch", "testcpu"); int map_events = 0, expected_events, err; From 5a09b1fd1b1fadf70cbf02a8daa49046e870a7aa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:41 -0800 Subject: [PATCH 087/114] perf jevents: Add model list option This allows the set of generated jevents events and metrics be limited to a subset of the model names. Appropriate if trying to minimize the binary size where only a set of models are possible. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-12-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/Build | 3 ++- tools/perf/pmu-events/jevents.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 15b9e8fdbffa..a14de24ecb69 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -10,6 +10,7 @@ JEVENTS_PY = pmu-events/jevents.py ifeq ($(JEVENTS_ARCH),) JEVENTS_ARCH=$(SRCARCH) endif +JEVENTS_MODEL ?= all # # Locate/process JSON files in pmu-events/arch/ @@ -23,5 +24,5 @@ $(OUTPUT)pmu-events/pmu-events.c: pmu-events/empty-pmu-events.c else $(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY) pmu-events/metric.py $(call rule_mkdir) - $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) pmu-events/arch $@ + $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@ endif diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 627ee817f57f..2bcd07ce609f 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -599,6 +599,8 @@ const struct pmu_events_map pmu_events_map[] = { else: metric_tblname = 'NULL' metric_size = '0' + if event_size == '0' and metric_size == '0': + continue cpuid = row[0].replace('\\', '\\\\') _args.output_file.write(f"""{{ \t.arch = "{arch}", @@ -888,12 +890,24 @@ def main() -> None: action: Callable[[Sequence[str], os.DirEntry], None]) -> None: """Replicate the directory/file walking behavior of C's file tree walk.""" for item in os.scandir(path): + if _args.model != 'all' and item.is_dir(): + # Check if the model matches one in _args.model. + if len(parents) == _args.model.split(',')[0].count('/'): + # We're testing the correct directory. + item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name + if 'test' not in item_path and item_path not in _args.model.split(','): + continue action(parents, item) if item.is_dir(): ftw(item.path, parents + [item.name], action) ap = argparse.ArgumentParser() ap.add_argument('arch', help='Architecture name like x86') + ap.add_argument('model', help='''Select a model such as skylake to +reduce the code size. Normally set to "all". For architectures like +ARM64 with an implementor/model, the model must include the implementor +such as "arm/cortex-a34".''', + default='all') ap.add_argument( 'starting_dir', type=dir_path, From 3340a08354ac286e252738436cf0fbf0d9e72449 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:42 -0800 Subject: [PATCH 088/114] perf pmu-events: Fix testing with JEVENTS_ARCH=all The #slots literal will return NAN when not on ARM64 which causes a perf test failure when not on an ARM64 for a JEVENTS_ARCH=all build: .. 10.4: Parsing of PMU event table metrics with fake PMUs : FAILED! .. Add an is_test boolean so that the failure can be avoided when running as a test. Fixes: acef233b7ca749fd ("perf pmu: Add #slots literal support for arm64") Reviewed-by: John Garry Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-13-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/pmu-events.c | 1 + tools/perf/util/expr.h | 1 + tools/perf/util/expr.l | 8 +++++--- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 962c3c0d53ba..accf44b3d968 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -950,6 +950,7 @@ static int metric_parse_fake(const char *metric_name, const char *str) pr_debug("expr__ctx_new failed"); return TEST_FAIL; } + ctx->sctx.is_test = true; if (expr__find_ids(str, NULL, ctx) < 0) { pr_err("expr__find_ids failed\n"); return -1; diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 029271540fb0..eaa44b24c555 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -9,6 +9,7 @@ struct expr_scanner_ctx { char *user_requested_cpu_list; int runtime; bool system_wide; + bool is_test; }; struct expr_parse_ctx { diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index d47de5f270a8..4fbf353e78e7 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -87,9 +87,11 @@ static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx) YYSTYPE *yylval = expr_get_lval(scanner); yylval->num = expr__get_literal(expr_get_text(scanner), sctx); - if (isnan(yylval->num)) - return EXPR_ERROR; - + if (isnan(yylval->num)) { + if (!sctx->is_test) + return EXPR_ERROR; + yylval->num = 1; + } return LITERAL; } %} From d2e3dc829e389d686194d06f0a64adda4158faae Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:43 -0800 Subject: [PATCH 089/114] perf jevents: Correct bad character encoding A character encoding issue added a "3D" character that breaks the metrics test. Fixes: 40769665b63d8c84 ("perf jevents: Parse metrics during conversion") Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-14-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/metric_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/metric_test.py b/tools/perf/pmu-events/metric_test.py index ced5998bd827..e4c792428277 100644 --- a/tools/perf/pmu-events/metric_test.py +++ b/tools/perf/pmu-events/metric_test.py @@ -89,8 +89,8 @@ class TestMetricExpressions(unittest.TestCase): after = r'min((a + b if c > 1 else c + d), e + f)' self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) - before =3D r'a if b else c if d else e' - after =3D r'(a if b else (c if d else e))' + before = r'a if b else c if d else e' + after = r'(a if b else (c if d else e))' self.assertEqual(ParsePerfJson(before).ToPerfJson(), after) def test_ToPython(self): From e30f34053e5bc552829249941120ad042ba27723 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:44 -0800 Subject: [PATCH 090/114] tools build: Add test echo-cmd Add quiet_cmd_test so that: $(Q)$(call echo-cmd,test) will print: TEST This is useful for executing compile-time tests similar to what happens for fortify tests in the kernel's lib directory. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-15-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.build | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 715092fc6a23..89430338a3d9 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -53,6 +53,7 @@ build-file := $(dir)/Build quiet_cmd_flex = FLEX $@ quiet_cmd_bison = BISON $@ +quiet_cmd_test = TEST $@ # Create directory unless it exists quiet_cmd_mkdir = MKDIR $(dir $@) From b777b3d2555feeaa2bf39d6541ed9198e2d2d728 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 26 Jan 2023 15:36:45 -0800 Subject: [PATCH 091/114] perf jevents: Run metric_test.py at compile-time Add a target that generates a log file for running metric_test.py and make this a dependency on generating pmu-events.c. The log output is displayed if the test fails like (the test was modified to make it fail): ``` TEST /tmp/perf/pmu-events/metric_test.log F...... ====================================================================== FAIL: test_Brackets (__main__.TestMetricExpressions) ---------------------------------------------------------------------- Traceback (most recent call last): File "tools/perf/pmu-events/metric_test.py", line 33, in test_Brackets self.assertEqual((a * b + c).ToPerfJson(), 'a * b + d') AssertionError: 'a * b + c' != 'a * b + d' - a * b + c ? ^ + a * b + d ? ^ ---------------------------------------------------------------------- Ran 7 tests in 0.004s FAILED (failures=1) make[3]: *** [pmu-events/Build:32: /tmp/perf/pmu-events/metric_test.log] Error 1 ``` However, normal execution will just show the TEST line. This is roughly modeled on fortify testing in the kernel lib directory. Modify metric_test.py so that it is executable. This is necessary when PYTHON isn't specified in the build, the normal case. Use variables to make the paths to files clearer and more consistent. Committer notes: Add pmu-events/metric_test.log to tools/perf/.gitignore and to the 'clean' target on tools/perf/Makefile.perf. Reviewed-by: Kajol Jain Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Caleb Biggers Cc: Florian Fischer Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Kang Minchul Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Rob Herring Cc: Sandipan Das Cc: Stephane Eranian Cc: Will Deacon Cc: Xing Zhengjun Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230126233645.200509-16-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + tools/perf/Makefile.perf | 1 + tools/perf/pmu-events/Build | 13 +++++++++++-- tools/perf/pmu-events/metric_test.py | 1 + 4 files changed, 14 insertions(+), 2 deletions(-) mode change 100644 => 100755 tools/perf/pmu-events/metric_test.py diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 05806ecfc33c..f533e76fb480 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -38,6 +38,7 @@ arch/*/include/generated/ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents +pmu-events/metric_test.log feature/ libapi/ libbpf/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index b7d9c4206230..bac9272682b7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1103,6 +1103,7 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $( $(OUTPUT)util/intel-pt-decoder/inat-tables.c \ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ + $(OUTPUT)pmu-events/metric_test.log \ $(OUTPUT)$(fadvise_advice_array) \ $(OUTPUT)$(fsconfig_arrays) \ $(OUTPUT)$(fsmount_arrays) \ diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index a14de24ecb69..150765f2baee 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -6,6 +6,11 @@ JDIR_TEST = pmu-events/arch/test JSON_TEST = $(shell [ -d $(JDIR_TEST) ] && \ find $(JDIR_TEST) -name '*.json') JEVENTS_PY = pmu-events/jevents.py +METRIC_PY = pmu-events/metric.py +METRIC_TEST_PY = pmu-events/metric_test.py +EMPTY_PMU_EVENTS_C = pmu-events/empty-pmu-events.c +PMU_EVENTS_C = $(OUTPUT)pmu-events/pmu-events.c +METRIC_TEST_LOG = $(OUTPUT)pmu-events/metric_test.log ifeq ($(JEVENTS_ARCH),) JEVENTS_ARCH=$(SRCARCH) @@ -18,11 +23,15 @@ JEVENTS_MODEL ?= all # ifeq ($(NO_JEVENTS),1) -$(OUTPUT)pmu-events/pmu-events.c: pmu-events/empty-pmu-events.c +$(PMU_EVENTS_C): $(EMPTY_PMU_EVENTS_C) $(call rule_mkdir) $(Q)$(call echo-cmd,gen)cp $< $@ else -$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY) pmu-events/metric.py +$(METRIC_TEST_LOG): $(METRIC_TEST_PY) $(METRIC_PY) + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)$(PYTHON) $< 2> $@ || (cat $@ && false) + +$(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG) $(call rule_mkdir) $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@ endif diff --git a/tools/perf/pmu-events/metric_test.py b/tools/perf/pmu-events/metric_test.py old mode 100644 new mode 100755 index e4c792428277..40a3c7d8b2bc --- a/tools/perf/pmu-events/metric_test.py +++ b/tools/perf/pmu-events/metric_test.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) import unittest from metric import Constant From 7105311c2d3bce8f52653dbfe87de475a251892b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 3 Feb 2023 10:24:01 -0600 Subject: [PATCH 092/114] perf arm-spe: Add raw decoding for SPEv1.2 previous branch address Arm SPEv1.2 adds a new optional address packet type: previous branch target. The recorded address is the target virtual address of the most recently taken branch in program order. Add support for decoding the address packet in raw dumps. Reviewed-by: Leo Yan Signed-off-by: Rob Herring Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230203162401.132931-1-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 4 +++- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 2f311189c6e8..fed4741f372e 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -422,16 +422,18 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet, int ch, pat; u64 payload = packet->payload; int err = 0; + static const char *idx_name[] = {"PC", "TGT", "VA", "PA", "PBT"}; switch (idx) { case SPE_ADDR_PKT_HDR_INDEX_INS: case SPE_ADDR_PKT_HDR_INDEX_BRANCH: + case SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH: ns = !!SPE_ADDR_PKT_GET_NS(payload); el = SPE_ADDR_PKT_GET_EL(payload); payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload); arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s 0x%llx el%d ns=%d", - (idx == 1) ? "TGT" : "PC", payload, el, ns); + idx_name[idx], payload, el, ns); break; case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT: arm_spe_pkt_out_string(&err, &buf, &buf_len, diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 9b970e7bf1e2..f75ed3a8a050 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -65,6 +65,7 @@ struct arm_spe_pkt { #define SPE_ADDR_PKT_HDR_INDEX_BRANCH 0x1 #define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT 0x2 #define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS 0x3 +#define SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH 0x4 /* Address packet payload */ #define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT 56 From 492fef218a6606c53bbb979a65b8f827c5ea02ce Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 2 Feb 2023 18:13:22 -0800 Subject: [PATCH 093/114] perf lock contention: Factor out lock_contention_get_name() The lock_contention_get_name() returns a name for the lock stat entry based on the current aggregation mode. As it's called sequentially in a single thread, it can return the address of a static buffer for symbol and offset of the caller. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Juri Lelli Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_lock_contention.c | 115 +++++++++++++++----------- 1 file changed, 65 insertions(+), 50 deletions(-) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 4902ac331f41..286e52ba9f91 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -163,9 +163,70 @@ int lock_contention_stop(void) return 0; } +static const char *lock_contention_get_name(struct lock_contention *con, + struct contention_key *key, + u64 *stack_trace) +{ + int idx = 0; + u64 addr; + const char *name = ""; + static char name_buf[KSYM_NAME_LEN]; + struct symbol *sym; + struct map *kmap; + struct machine *machine = con->machine; + + if (con->aggr_mode == LOCK_AGGR_TASK) { + struct contention_task_data task; + int pid = key->aggr_key; + int task_fd = bpf_map__fd(skel->maps.task_data); + + /* do not update idle comm which contains CPU number */ + if (pid) { + struct thread *t = __machine__findnew_thread(machine, /*pid=*/-1, pid); + + if (t == NULL) + return name; + if (!bpf_map_lookup_elem(task_fd, &pid, &task) && + thread__set_comm(t, task.comm, /*timestamp=*/0)) + name = task.comm; + } + return name; + } + + if (con->aggr_mode == LOCK_AGGR_ADDR) { + sym = machine__find_kernel_symbol(machine, key->aggr_key, &kmap); + if (sym) + name = sym->name; + return name; + } + + /* LOCK_AGGR_CALLER: skip lock internal functions */ + while (machine__is_lock_function(machine, stack_trace[idx]) && + idx < con->max_stack - 1) + idx++; + + addr = stack_trace[idx]; + sym = machine__find_kernel_symbol(machine, addr, &kmap); + + if (sym) { + unsigned long offset; + + offset = kmap->map_ip(kmap, addr) - sym->start; + + if (offset == 0) + return sym->name; + + snprintf(name_buf, sizeof(name_buf), "%s+%#lx", sym->name, offset); + } else { + snprintf(name_buf, sizeof(name_buf), "%#lx", (unsigned long)addr); + } + + return name_buf; +} + int lock_contention_read(struct lock_contention *con) { - int fd, stack, task_fd, err = 0; + int fd, stack, err = 0; struct contention_key *prev_key, key; struct contention_data data = {}; struct lock_stat *st = NULL; @@ -175,7 +236,6 @@ int lock_contention_read(struct lock_contention *con) fd = bpf_map__fd(skel->maps.lock_stat); stack = bpf_map__fd(skel->maps.stacks); - task_fd = bpf_map__fd(skel->maps.task_data); con->lost = skel->bss->lost; @@ -195,9 +255,6 @@ int lock_contention_read(struct lock_contention *con) prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { - struct map *kmap; - struct symbol *sym; - int idx = 0; s32 stack_id; /* to handle errors in the loop body */ @@ -219,61 +276,19 @@ int lock_contention_read(struct lock_contention *con) st->flags = data.flags; st->addr = key.aggr_key; - if (con->aggr_mode == LOCK_AGGR_TASK) { - struct contention_task_data task; - struct thread *t; - int pid = key.aggr_key; - - /* do not update idle comm which contains CPU number */ - if (st->addr) { - bpf_map_lookup_elem(task_fd, &pid, &task); - t = __machine__findnew_thread(machine, /*pid=*/-1, pid); - thread__set_comm(t, task.comm, /*timestamp=*/0); - } - goto next; - } - - if (con->aggr_mode == LOCK_AGGR_ADDR) { - sym = machine__find_kernel_symbol(machine, st->addr, &kmap); - if (sym) - st->name = strdup(sym->name); - goto next; - } - stack_id = key.aggr_key; bpf_map_lookup_elem(stack, &stack_id, stack_trace); - /* skip lock internal functions */ - while (machine__is_lock_function(machine, stack_trace[idx]) && - idx < con->max_stack - 1) - idx++; - - st->addr = stack_trace[idx]; - sym = machine__find_kernel_symbol(machine, st->addr, &kmap); - - if (sym) { - unsigned long offset; - int ret = 0; - - offset = kmap->map_ip(kmap, st->addr) - sym->start; - - if (offset) - ret = asprintf(&st->name, "%s+%#lx", sym->name, offset); - else - st->name = strdup(sym->name); - - if (ret < 0 || st->name == NULL) - break; - } else if (asprintf(&st->name, "%#lx", (unsigned long)st->addr) < 0) { + st->name = strdup(lock_contention_get_name(con, &key, stack_trace)); + if (st->name == NULL) break; - } if (con->save_callstack) { st->callstack = memdup(stack_trace, stack_size); if (st->callstack == NULL) break; } -next: + hlist_add_head(&st->hash_entry, con->result); prev_key = &key; From 16cad1d3597d32e470a4115f11c5e61cce6cd81b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 2 Feb 2023 18:13:23 -0800 Subject: [PATCH 094/114] perf lock contention: Use lock_stat_find{,new} This is a preparation work to support complex keys of BPF maps. Now it has single value key according to the aggregation mode like stack_id or pid. But we want to use a combination of those keys. Then lock_contention_read() should still aggregate the result based on the key that was requested by user. The other key info will be used for filtering. So instead of creating a lock_stat entry always, Check if it's already there using lock_stat_find() first. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Juri Lelli Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 4 +-- tools/perf/util/Build | 5 +++- tools/perf/util/bpf_lock_contention.c | 41 ++++++++++++++++----------- tools/perf/util/lock-contention.h | 3 ++ 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 216a9a252bf4..0593c6e636c6 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -465,7 +465,7 @@ static struct lock_stat *pop_from_result(void) return container_of(node, struct lock_stat, rb); } -static struct lock_stat *lock_stat_find(u64 addr) +struct lock_stat *lock_stat_find(u64 addr) { struct hlist_head *entry = lockhashentry(addr); struct lock_stat *ret; @@ -477,7 +477,7 @@ static struct lock_stat *lock_stat_find(u64 addr) return NULL; } -static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags) +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags) { struct hlist_head *entry = lockhashentry(addr); struct lock_stat *ret, *new; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 79b9498886a2..918b501f9bd8 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -154,7 +154,10 @@ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o + +ifeq ($(CONFIG_LIBTRACEEVENT),y) + perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o +endif ifeq ($(CONFIG_LIBTRACEEVENT),y) perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 286e52ba9f91..ead2898ba377 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -256,12 +256,34 @@ int lock_contention_read(struct lock_contention *con) prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { s32 stack_id; + const char *name; /* to handle errors in the loop body */ err = -1; bpf_map_lookup_elem(fd, &key, &data); - st = zalloc(sizeof(*st)); + + if (con->save_callstack) { + stack_id = key.aggr_key; + bpf_map_lookup_elem(stack, &stack_id, stack_trace); + } + + st = lock_stat_find(key.aggr_key); + if (st != NULL) { + st->wait_time_total += data.total_time; + if (st->wait_time_max < data.max_time) + st->wait_time_max = data.max_time; + if (st->wait_time_min > data.min_time) + st->wait_time_min = data.min_time; + + st->nr_contended += data.count; + if (st->nr_contended) + st->avg_wait_time = st->wait_time_total / st->nr_contended; + goto next; + } + + name = lock_contention_get_name(con, &key, stack_trace); + st = lock_stat_findnew(key.aggr_key, name, data.flags); if (st == NULL) break; @@ -274,14 +296,6 @@ int lock_contention_read(struct lock_contention *con) st->avg_wait_time = data.total_time / data.count; st->flags = data.flags; - st->addr = key.aggr_key; - - stack_id = key.aggr_key; - bpf_map_lookup_elem(stack, &stack_id, stack_trace); - - st->name = strdup(lock_contention_get_name(con, &key, stack_trace)); - if (st->name == NULL) - break; if (con->save_callstack) { st->callstack = memdup(stack_trace, stack_size); @@ -289,19 +303,14 @@ int lock_contention_read(struct lock_contention *con) break; } - hlist_add_head(&st->hash_entry, con->result); +next: prev_key = &key; - /* we're fine now, reset the values */ - st = NULL; + /* we're fine now, reset the error */ err = 0; } free(stack_trace); - if (st) { - free(st->name); - free(st); - } return err; } diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index 17e594d57a61..39d5bfc77f4e 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -65,6 +65,9 @@ struct lock_stat { */ #define MAX_LOCK_DEPTH 48 +struct lock_stat *lock_stat_find(u64 addr); +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags); + /* * struct lock_seq_stat: * Place to put on state of one lock sequence From ebab291641bed48f62c608e3bf29071c435c2d9b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 2 Feb 2023 18:13:24 -0800 Subject: [PATCH 095/114] perf lock contention: Support filters for different aggregation It'd be useful to filter other than the current aggregation mode. For example, users may want to see callstacks for specific locks only. Or they may want tasks from a certain callstack. The tracepoints already collected the information but it needs to check the condition again when processing the event. And it needs to change BPF to allow the key combinations. The lock contentions on 'rcu_state' spinlock can be monitored: $ sudo perf lock con -abv -L rcu_state sleep 1 ... contended total wait max wait avg wait type caller 4 151.39 us 62.57 us 37.85 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 0xffffffff81d4a259 cpuidle_enter+0x29 1 30.21 us 30.21 us 30.21 us spinlock rcu_core+0xcb 0xffffffff81fd1666 _raw_spin_lock_irqsave+0x46 0xffffffff8172d76b rcu_core+0xcb 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc00c4 sysvec_apic_timer_interrupt+0x54 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 1 28.84 us 28.84 us 28.84 us spinlock rcu_accelerate_cbs_unlocked+0x40 0xffffffff81fd1c60 _raw_spin_lock+0x30 0xffffffff81728cf0 rcu_accelerate_cbs_unlocked+0x40 0xffffffff8172da82 rcu_core+0x3e2 0xffffffff822000eb __softirqentry_text_start+0xeb 0xffffffff816a0ba9 __irq_exit_rcu+0xc9 0xffffffff81fc0112 sysvec_apic_timer_interrupt+0xa2 0xffffffff82000e46 asm_sysvec_apic_timer_interrupt+0x16 0xffffffff81d49f78 cpuidle_enter_state+0xd8 ... To see tasks calling 'rcu_core' function: $ sudo perf lock con -abt -S rcu_core sleep 1 contended total wait max wait avg wait pid comm 19 23.46 us 2.21 us 1.23 us 0 swapper 2 18.37 us 17.01 us 9.19 us 2061859 ThreadPoolForeg 3 5.76 us 1.97 us 1.92 us 3909 pipewire-pulse 1 2.26 us 2.26 us 2.26 us 1809271 MediaSu~isor #2 1 1.97 us 1.97 us 1.97 us 1514882 Chrome_ChildIOT 1 987 ns 987 ns 987 ns 3740 pipewire-pulse Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Juri Lelli Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230203021324.143540-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 75 +++++++++++-------- tools/perf/util/bpf_lock_contention.c | 44 +++++++---- .../perf/util/bpf_skel/lock_contention.bpf.c | 15 ++-- tools/perf/util/bpf_skel/lock_data.h | 4 +- tools/perf/util/lock-contention.h | 2 + 5 files changed, 89 insertions(+), 51 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 0593c6e636c6..0d11f301fd72 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -509,6 +509,34 @@ alloc_failed: return NULL; } +bool match_callstack_filter(struct machine *machine, u64 *callstack) +{ + struct map *kmap; + struct symbol *sym; + u64 ip; + + if (list_empty(&callstack_filters)) + return true; + + for (int i = 0; i < max_stack_depth; i++) { + struct callstack_filter *filter; + + if (!callstack || !callstack[i]) + break; + + ip = callstack[i]; + sym = machine__find_kernel_symbol(machine, ip, &kmap); + if (sym == NULL) + continue; + + list_for_each_entry(filter, &callstack_filters, list) { + if (strstr(sym->name, filter->name)) + return true; + } + } + return false; +} + struct trace_lock_handler { /* it's used on CONFIG_LOCKDEP */ int (*acquire_event)(struct evsel *evsel, @@ -1070,12 +1098,6 @@ static int report_lock_contention_begin_event(struct evsel *evsel, ls = lock_stat_findnew(key, name, flags); if (!ls) return -ENOMEM; - - if (aggr_mode == LOCK_AGGR_CALLER && needs_callstack()) { - ls->callstack = get_callstack(sample, max_stack_depth); - if (ls->callstack == NULL) - return -ENOMEM; - } } if (filters.nr_types) { @@ -1106,6 +1128,22 @@ static int report_lock_contention_begin_event(struct evsel *evsel, return 0; } + if (needs_callstack()) { + u64 *callstack = get_callstack(sample, max_stack_depth); + if (callstack == NULL) + return -ENOMEM; + + if (!match_callstack_filter(machine, callstack)) { + free(callstack); + return 0; + } + + if (ls->callstack == NULL) + ls->callstack = callstack; + else + free(callstack); + } + ts = thread_stat_findnew(sample->tid); if (!ts) return -ENOMEM; @@ -1606,31 +1644,6 @@ static void print_contention_result(struct lock_contention *con) if (!st->wait_time_total) continue; - if (aggr_mode == LOCK_AGGR_CALLER && !list_empty(&callstack_filters)) { - struct map *kmap; - struct symbol *sym; - u64 ip; - - for (int i = 0; i < max_stack_depth; i++) { - struct callstack_filter *filter; - - if (!st->callstack || !st->callstack[i]) - break; - - ip = st->callstack[i]; - sym = machine__find_kernel_symbol(con->machine, ip, &kmap); - if (sym == NULL) - continue; - - list_for_each_entry(filter, &callstack_filters, list) { - if (strstr(sym->name, filter->name)) - goto found; - } - } - continue; - } - -found: list_for_each_entry(key, &lock_keys, list) { key->print(key, st); pr_info(" "); diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index ead2898ba377..72cf81114982 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -34,13 +34,15 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries); bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries); - if (con->aggr_mode == LOCK_AGGR_TASK) { + if (con->aggr_mode == LOCK_AGGR_TASK) bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries); - bpf_map__set_max_entries(skel->maps.stacks, 1); - } else { + else bpf_map__set_max_entries(skel->maps.task_data, 1); + + if (con->save_callstack) bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); - } + else + bpf_map__set_max_entries(skel->maps.stacks, 1); if (target__has_cpu(target)) ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); @@ -146,6 +148,7 @@ int lock_contention_prepare(struct lock_contention *con) /* these don't work well if in the rodata section */ skel->bss->stack_skip = con->stack_skip; skel->bss->aggr_mode = con->aggr_mode; + skel->bss->needs_callstack = con->save_callstack; lock_contention_bpf__attach(skel); return 0; @@ -177,7 +180,7 @@ static const char *lock_contention_get_name(struct lock_contention *con, if (con->aggr_mode == LOCK_AGGR_TASK) { struct contention_task_data task; - int pid = key->aggr_key; + int pid = key->pid; int task_fd = bpf_map__fd(skel->maps.task_data); /* do not update idle comm which contains CPU number */ @@ -194,7 +197,7 @@ static const char *lock_contention_get_name(struct lock_contention *con, } if (con->aggr_mode == LOCK_AGGR_ADDR) { - sym = machine__find_kernel_symbol(machine, key->aggr_key, &kmap); + sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap); if (sym) name = sym->name; return name; @@ -255,20 +258,35 @@ int lock_contention_read(struct lock_contention *con) prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { - s32 stack_id; + s64 ls_key; const char *name; /* to handle errors in the loop body */ err = -1; bpf_map_lookup_elem(fd, &key, &data); - if (con->save_callstack) { - stack_id = key.aggr_key; - bpf_map_lookup_elem(stack, &stack_id, stack_trace); + bpf_map_lookup_elem(stack, &key.stack_id, stack_trace); + + if (!match_callstack_filter(machine, stack_trace)) + goto next; } - st = lock_stat_find(key.aggr_key); + switch (con->aggr_mode) { + case LOCK_AGGR_CALLER: + ls_key = key.stack_id; + break; + case LOCK_AGGR_TASK: + ls_key = key.pid; + break; + case LOCK_AGGR_ADDR: + ls_key = key.lock_addr; + break; + default: + goto next; + } + + st = lock_stat_find(ls_key); if (st != NULL) { st->wait_time_total += data.total_time; if (st->wait_time_max < data.max_time) @@ -283,7 +301,7 @@ int lock_contention_read(struct lock_contention *con) } name = lock_contention_get_name(con, &key, stack_trace); - st = lock_stat_findnew(key.aggr_key, name, data.flags); + st = lock_stat_findnew(ls_key, name, data.flags); if (st == NULL) break; @@ -295,8 +313,6 @@ int lock_contention_read(struct lock_contention *con) if (data.count) st->avg_wait_time = data.total_time / data.count; - st->flags = data.flags; - if (con->save_callstack) { st->callstack = memdup(stack_trace, stack_size); if (st->callstack == NULL) diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index ad0ca5d50557..7ce276ed987e 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -82,6 +82,7 @@ int has_cpu; int has_task; int has_type; int has_addr; +int needs_callstack; int stack_skip; /* determine the key of lock stat */ @@ -173,7 +174,7 @@ int contention_begin(u64 *ctx) pelem->lock = (__u64)ctx[0]; pelem->flags = (__u32)ctx[1]; - if (aggr_mode == LOCK_AGGR_CALLER) { + if (needs_callstack) { pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip); if (pelem->stack_id < 0) @@ -188,7 +189,7 @@ int contention_end(u64 *ctx) { __u32 pid; struct tstamp_data *pelem; - struct contention_key key; + struct contention_key key = {}; struct contention_data *data; __u64 duration; @@ -204,14 +205,18 @@ int contention_end(u64 *ctx) switch (aggr_mode) { case LOCK_AGGR_CALLER: - key.aggr_key = pelem->stack_id; + key.stack_id = pelem->stack_id; break; case LOCK_AGGR_TASK: - key.aggr_key = pid; + key.pid = pid; update_task_data(pid); + if (needs_callstack) + key.stack_id = pelem->stack_id; break; case LOCK_AGGR_ADDR: - key.aggr_key = pelem->lock; + key.lock_addr = pelem->lock; + if (needs_callstack) + key.stack_id = pelem->stack_id; break; default: /* should not happen */ diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h index ce71cf1a7e1e..3d35fd4407ac 100644 --- a/tools/perf/util/bpf_skel/lock_data.h +++ b/tools/perf/util/bpf_skel/lock_data.h @@ -4,7 +4,9 @@ #define UTIL_BPF_SKEL_LOCK_DATA_H struct contention_key { - u64 aggr_key; /* can be stack_id, pid or lock addr */ + u32 stack_id; + u32 pid; + u64 lock_addr; }; #define TASK_COMM_LEN 16 diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index 39d5bfc77f4e..e5fc036108ec 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -68,6 +68,8 @@ struct lock_stat { struct lock_stat *lock_stat_find(u64 addr); struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags); +bool match_callstack_filter(struct machine *machine, u64 *callstack); + /* * struct lock_seq_stat: * Place to put on state of one lock sequence From d7d213e04cf83318681f24870f1144e50d5c91bb Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 4 Jan 2023 12:13:48 -0800 Subject: [PATCH 096/114] perf report: Support Retire Latency The Retire Latency field is added in the var3_w of the PERF_SAMPLE_WEIGHT_STRUCT. The Retire Latency reports pipeline stall of this instruction compared to the previous instruction in cycles. That's quite useful to display the information with perf mem report. The p_stage_cyc for Power is also from the var3_w. Union the p_stage_cyc and retire_lat to share the code. Implement X86 specific codes to display the X86 specific header. Add a new sort key retire_lat for the Retire Latency. Reviewed-by: Andi Kleen Signed-off-by: Kan Liang Cc: Ian Rogers Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20230104201349.1451191-8-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 ++ tools/perf/arch/x86/util/event.c | 20 ++++++++++++++++++++ tools/perf/util/sample.h | 5 ++++- tools/perf/util/sort.c | 2 ++ tools/perf/util/sort.h | 2 ++ 5 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9b0c0dbf9a77..c242e8da6b1a 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -115,6 +115,8 @@ OPTIONS - p_stage_cyc: On powerpc, this presents the number of cycles spent in a pipeline stage. And currently supported only on powerpc. - addr: (Full) virtual address of the sampled instruction + - retire_lat: On X86, this reports pipeline stall of this instruction compared + to the previous instruction in cycles. And currently supported only on X86 By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index a3acefe6d0c6..37b3feb53e8d 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -89,6 +89,7 @@ void arch_perf_parse_sample_weight(struct perf_sample *data, else { data->weight = weight.var1_dw; data->ins_lat = weight.var2_w; + data->retire_lat = weight.var3_w; } } @@ -102,3 +103,22 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data, *array |= ((u64)data->ins_lat << 32); } } + +const char *arch_perf_header_entry(const char *se_header) +{ + if (!strcmp(se_header, "Local Pipeline Stage Cycle")) + return "Local Retire Latency"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Retire Latency"; + + return se_header; +} + +int arch_support_sort_key(const char *sort_key) +{ + if (!strcmp(sort_key, "p_stage_cyc")) + return 1; + if (!strcmp(sort_key, "local_p_stage_cyc")) + return 1; + return 0; +} diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index 60ec79d4eea4..33b08e0ac746 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -92,7 +92,10 @@ struct perf_sample { u8 cpumode; u16 misc; u16 ins_lat; - u16 p_stage_cyc; + union { + u16 p_stage_cyc; + u16 retire_lat; + }; bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index d7d0f997873a..4a648231fe72 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2133,6 +2133,8 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc), DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc), DIM(SORT_ADDR, "addr", sort_addr), + DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc), + DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 921715e6aec4..9a91d0df2833 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -237,6 +237,8 @@ enum sort_type { SORT_LOCAL_PIPELINE_STAGE_CYC, SORT_GLOBAL_PIPELINE_STAGE_CYC, SORT_ADDR, + SORT_LOCAL_RETIRE_LAT, + SORT_GLOBAL_RETIRE_LAT, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, From 17f248aa8664ff5b3643491136283e73b5c18166 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 4 Jan 2023 12:13:49 -0800 Subject: [PATCH 097/114] perf script: Support Retire Latency The Retire Latency field is added in the var3_w of the PERF_SAMPLE_WEIGHT_STRUCT. The Retire Latency reports the number of elapsed core clocks between the retirement of the instruction indicated by the Instruction Pointer field of the PEBS record and the retirement of the prior instruction. That's quite useful to display the information with perf script. Add a new field retire_lat for the Retire Latency information. Reviewed-by: Andi Kleen Signed-off-by: Kan Liang Cc: Ian Rogers Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20230104201349.1451191-9-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 339b441015eb..a792214d1af8 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -132,6 +132,7 @@ enum perf_output_field { PERF_OUTPUT_MACHINE_PID = 1ULL << 37, PERF_OUTPUT_VCPU = 1ULL << 38, PERF_OUTPUT_CGROUP = 1ULL << 39, + PERF_OUTPUT_RETIRE_LAT = 1ULL << 40, }; struct perf_script { @@ -203,6 +204,7 @@ struct output_option { {.str = "machine_pid", .field = PERF_OUTPUT_MACHINE_PID}, {.str = "vcpu", .field = PERF_OUTPUT_VCPU}, {.str = "cgroup", .field = PERF_OUTPUT_CGROUP}, + {.str = "retire_lat", .field = PERF_OUTPUT_RETIRE_LAT}, }; enum { @@ -278,7 +280,7 @@ static struct { PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE | - PERF_OUTPUT_INS_LAT, + PERF_OUTPUT_INS_LAT | PERF_OUTPUT_RETIRE_LAT, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -551,6 +553,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; } + if (PRINT_FIELD(RETIRE_LAT) && + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_RETIRE_LAT)) + return -EINVAL; + return 0; } @@ -2188,6 +2194,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(INS_LAT)) fprintf(fp, "%16" PRIu16, sample->ins_lat); + if (PRINT_FIELD(RETIRE_LAT)) + fprintf(fp, "%16" PRIu16, sample->retire_lat); + if (PRINT_FIELD(IP)) { struct callchain_cursor *cursor = NULL; @@ -3877,7 +3886,7 @@ int cmd_script(int argc, const char **argv) "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," "brstackinsnlen,brstackoff,callindent,insn,insnlen,synth," "phys_addr,metric,misc,srccode,ipc,tod,data_page_size," - "code_page_size,ins_lat,machine_pid,vcpu,cgroup", + "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), From ee739f132f716f28c9fbe70a230e35085c197dd5 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 31 Jan 2023 19:20:01 +0530 Subject: [PATCH 098/114] perf test bpf: Check for libtraceevent support The "bpf" tests fails in environment with missing libtraceevent support as below: # ./perf test 36 36: BPF filter : 36.1: Basic BPF filtering : FAILED! 36.2: BPF pinning : FAILED! 36.3: BPF prologue generation : FAILED! The environment has clang but missing the libtraceevent devel. Hence perf is compiled without libtraceevent support. Detailed logs: ./perf test -v "Basic BPF filtering" Failed to add BPF event syscalls:sys_enter_epoll_pwait bpf: tracepoint call back failed, stop iterate Failed to add events selected by BPF The bpf tests tris to add probe event which fails at "parse_events_add_tracepoint" function due to missing libtraceevent. Add check for "HAVE_LIBTRACEEVENT" in the "tests/bpf.c" before proceeding with the test. With the change, # ./perf test 36 36: BPF filter : 36.1: Basic BPF filtering : Skip (not compiled in or missing libtraceevent support) 36.2: BPF pinning : Skip (not compiled in or missing libtraceevent support) 36.3: BPF prologue generation : Skip (not compiled in or missing libtraceevent support) Signed-off-by: Athira Jajeev Tested-by: Arnaldo Carvalho de Melo Tested-by: Disha Goel Cc: Andi Kleen Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230131135001.54578-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 17c023823713..4af39528f611 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -23,7 +23,7 @@ #define NR_ITERS 111 #define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test" -#ifdef HAVE_LIBBPF_SUPPORT +#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT) #include #include @@ -330,10 +330,10 @@ static int test__bpf(int i) static int test__basic_bpf_test(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { -#ifdef HAVE_LIBBPF_SUPPORT +#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT) return test__bpf(0); #else - pr_debug("Skip BPF test because BPF support is not compiled\n"); + pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n"); return TEST_SKIP; #endif } @@ -341,10 +341,10 @@ static int test__basic_bpf_test(struct test_suite *test __maybe_unused, static int test__bpf_pinning(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { -#ifdef HAVE_LIBBPF_SUPPORT +#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT) return test__bpf(1); #else - pr_debug("Skip BPF test because BPF support is not compiled\n"); + pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n"); return TEST_SKIP; #endif } @@ -352,17 +352,17 @@ static int test__bpf_pinning(struct test_suite *test __maybe_unused, static int test__bpf_prologue_test(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { -#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_BPF_PROLOGUE) +#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_BPF_PROLOGUE) && defined(HAVE_LIBTRACEEVENT) return test__bpf(2); #else - pr_debug("Skip BPF test because BPF support is not compiled\n"); + pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n"); return TEST_SKIP; #endif } static struct test_case bpf_tests[] = { -#ifdef HAVE_LIBBPF_SUPPORT +#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT) TEST_CASE("Basic BPF filtering", basic_bpf_test), TEST_CASE_REASON("BPF pinning", bpf_pinning, "clang isn't installed or environment missing BPF support"), @@ -373,9 +373,9 @@ static struct test_case bpf_tests[] = { TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"), #endif #else - TEST_CASE_REASON("Basic BPF filtering", basic_bpf_test, "not compiled in"), - TEST_CASE_REASON("BPF pinning", bpf_pinning, "not compiled in"), - TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"), + TEST_CASE_REASON("Basic BPF filtering", basic_bpf_test, "not compiled in or missing libtraceevent support"), + TEST_CASE_REASON("BPF pinning", bpf_pinning, "not compiled in or missing libtraceevent support"), + TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in or missing libtraceevent support"), #endif { .name = NULL, } }; From e65f91b20c3d170a1e8b1b6b40cd96bea6343194 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 2 Feb 2023 11:22:09 -0800 Subject: [PATCH 099/114] perf test x86: Support the retire_lat (Retire Latency) sample_type check Add test for the new field for Retire Latency in the X86 specific test. Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20230202192209.1795329-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/sample-parsing.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c index 690c7c07e90d..a061e8619267 100644 --- a/tools/perf/arch/x86/tests/sample-parsing.c +++ b/tools/perf/arch/x86/tests/sample-parsing.c @@ -27,8 +27,10 @@ static bool samples_same(const struct perf_sample *s1, const struct perf_sample *s2, u64 type) { - if (type & PERF_SAMPLE_WEIGHT_STRUCT) + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { COMP(ins_lat); + COMP(retire_lat); + } return true; } @@ -48,6 +50,7 @@ static int do_test(u64 sample_type) struct perf_sample sample = { .weight = 101, .ins_lat = 102, + .retire_lat = 103, }; struct perf_sample sample_out; size_t i, sz, bufsz; From 957ed139d760fb5257e5a587f78a339e23d9b741 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 6 Feb 2023 08:20:59 -0800 Subject: [PATCH 100/114] perf event x86: Add retire_lat when synthesizing PERF_SAMPLE_WEIGHT_STRUCT In arch_perf_synthesize_sample_weight(), the retire_lat was mistakenly missed, add it. perf test -v "x86 sample parsing" 74: x86 Sample parsing : --- start --- test child forked, pid 72526 Samples differ at 'retire_lat' parsing failed for sample_type 0x1000000 test child finished with -1 ---- end ---- x86 Sample parsing: FAILED! Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20230206162100.3329395-1-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/event.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index 37b3feb53e8d..e4288d09f3a0 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -101,6 +101,7 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data, if (type & PERF_SAMPLE_WEIGHT_STRUCT) { *array &= 0xffffffff; *array |= ((u64)data->ins_lat << 32); + *array |= ((u64)data->retire_lat << 48); } } From 4e846311a9be53999d9c52bba4ce76939d2b0e64 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 6 Feb 2023 08:21:00 -0800 Subject: [PATCH 101/114] perf script: Fix missing Retire Latency fields option documentation The 'perf script' documentation is missing the fields option for Retire Latency. Add it. Signed-off-by: Kan Liang Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20230206162100.3329395-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index a2ebadc9d948..777a0d8ba7d1 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -134,7 +134,7 @@ OPTIONS srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat, - machine_pid, vcpu, cgroup. + machine_pid, vcpu, cgroup, retire_lat. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace From 67ef66bad42b32237a0ddc6bdb5cc2653c354fec Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 5 Jan 2023 17:47:41 +0530 Subject: [PATCH 102/114] perf probe: Update the exit error codes in function try_to_find_probe_trace_event try_to_find_probe_trace_events() uses return error code as ENOENT in two places. First place is after open_debuginfo() when opening debuginfo fails and secondly, after when not finding the probe point. This function is invoked during BPF load and there are other exit points in this code path which returns ENOENT. This makes it difficult to understand the exact reason for exit. Patches changes the exit code from ENOENT to: - ENODATA when it fails to find debuginfo - ENODEV when it fails to find probe point Signed-off-by: Athira Rajeev Cc: Andi Kleen Cc: Disha Goel Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230105121742.92249-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0c24bc7afbca..881d94f65a6b 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -917,7 +917,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf); if (!dinfo) { if (need_dwarf) - return -ENOENT; + return -ENODATA; pr_debug("Could not open debuginfo. Try to use symbols.\n"); return 0; } @@ -956,7 +956,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, if (ntevs == 0) { /* No error but failed to find probe point. */ pr_warning("Probe point '%s' not found.\n", synthesize_perf_probe_point(&pev->point)); - return -ENOENT; + return -ENODEV; } else if (ntevs < 0) { /* Error path : ntevs < 0 */ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); From 34266f904abd45731bdade2e92d0536c092ee9bc Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 5 Jan 2023 17:47:42 +0530 Subject: [PATCH 103/114] perf test bpf: Skip test if kernel-debuginfo is not present Perf BPF filter test fails in environment where "kernel-debuginfo" is not installed. Test failure logs: <<>> 42: BPF filter : 42.1: Basic BPF filtering : Ok 42.2: BPF pinning : Ok 42.3: BPF prologue generation : FAILED! <<>> Enabling verbose option provided debug logs, which says debuginfo needs to be installed. Snippet of verbose logs: <<>> 42.3: BPF prologue generation : --- start --- test child forked, pid 28218 <<>> Rebuild with CONFIG_DEBUG_INFO=y, or install an appropriate debuginfo package. bpf_probe: failed to convert perf probe events Failed to add events selected by BPF test child finished with -1 ---- end ---- BPF filter subtest 3: FAILED! <<>> Here the subtest "BPF prologue generation" failed and logs shows debuginfo is needed. After installing kernel-debuginfo package, testcase passes. The "BPF prologue generation" subtest failed because, the do_test() returns TEST_FAIL without checking the error type returned by parse_events_load_bpf_obj(). parse_events_load_bpf_obj() can also return error of type -ENODATA incase kernel-debuginfo package is not installed. Fix this by adding check for -ENODATA error. Test result after the patch changes: Test failure logs: <<>> 42: BPF filter : 42.1: Basic BPF filtering : Ok 42.2: BPF pinning : Ok 42.3: BPF prologue generation : Skip (clang/debuginfo isn't installed or environment missing BPF support) <<>> Fixes: ba1fae431e74bb42 ("perf test: Add 'perf test BPF'") Signed-off-by: Athira Rajeev Cc: Andi Kleen Cc: Disha Goel Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: Wang Nan Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/linux-perf-users/Y7bIk77mdE4j8Jyi@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 4af39528f611..ae9223f27cfb 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -126,6 +126,10 @@ static int do_test(struct bpf_object *obj, int (*func)(void), err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL); parse_events_error__exit(&parse_error); + if (err == -ENODATA) { + pr_debug("Failed to add events selected by BPF, debuginfo package not installed\n"); + return TEST_SKIP; + } if (err || list_empty(&parse_state.list)) { pr_debug("Failed to add events selected by BPF\n"); return TEST_FAIL; @@ -368,7 +372,7 @@ static struct test_case bpf_tests[] = { "clang isn't installed or environment missing BPF support"), #ifdef HAVE_BPF_PROLOGUE TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, - "clang isn't installed or environment missing BPF support"), + "clang/debuginfo isn't installed or environment missing BPF support"), #else TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"), #endif From 55e391852e713f85af4e724443f929b3ce5b5dbe Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 6 Feb 2023 16:24:01 -0800 Subject: [PATCH 104/114] perf lock contention: Fix to save callstack for the default modified The previous change missed to set the con->save_callstack for the LOCK_AGGR_CALLER mode resulting in no caller information. Fixes: ebab291641bed48f ("perf lock contention: Support filters for different aggregation") Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Boqun Feng Cc: Davidlohr Bueso Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Waiman Long Cc: Will Deacon Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230207002403.63590-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 0d11f301fd72..a4b5c481129c 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1806,6 +1806,9 @@ static int __cmd_contention(int argc, const char **argv) con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER; + if (con.aggr_mode == LOCK_AGGR_CALLER) + con.save_callstack = true; + /* for lock function check */ symbol_conf.sort_by_name = true; symbol_conf.allow_aliases = true; From 3477f079fe70b3c97a619788d89ac357e207f302 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 6 Feb 2023 16:24:02 -0800 Subject: [PATCH 105/114] perf lock contention: Add -o/--lock-owner option When there're many lock contentions in the system, people sometimes want to know who caused the contention, IOW who's the owner of the locks. The -o/--lock-owner option tries to follow the lock owners for the contended mutexes and rwsems from BPF, and then attributes the contention time to the owner instead of the waiter. It's a best effort approach to get the owner info at the time of the contention and doesn't guarantee to have the precise tracking of owners if it's changing over time. Currently it only handles mutex and rwsem that have owner field in their struct and it basically points to a task_struct that owns the lock at the moment. Technically its type is atomic_long_t and it comes with some LSB bits used for other meanings. So it needs to clear them when casting it to a pointer to task_struct. Also the atomic_long_t is a typedef of the atomic 32 or 64 bit types depending on arch which is a wrapper struct for the counter value. I'm not aware of proper ways to access those kernel atomic types from BPF so I just read the internal counter value directly. Please let me know if there's a better way. When -o/--lock-owner option is used, it goes to the task aggregation mode like -t/--threads option does. However it cannot get the owner for other lock types like spinlock and sometimes even for mutex. $ sudo ./perf lock con -abo -- ./perf bench sched pipe # Running 'sched/pipe' benchmark: # Executed 1000000 pipe operations between two processes Total time: 4.766 [sec] 4.766540 usecs/op 209795 ops/sec contended total wait max wait avg wait pid owner 403 565.32 us 26.81 us 1.40 us -1 Unknown 4 27.99 us 8.57 us 7.00 us 1583145 sched-pipe 1 8.25 us 8.25 us 8.25 us 1583144 sched-pipe 1 2.03 us 2.03 us 2.03 us 5068 chrome As you can see, the owner is unknown for the most cases. But if we filter only for the mutex locks, it'd more likely get the onwers. $ sudo ./perf lock con -abo -Y mutex -- ./perf bench sched pipe # Running 'sched/pipe' benchmark: # Executed 1000000 pipe operations between two processes Total time: 4.910 [sec] 4.910435 usecs/op 203647 ops/sec contended total wait max wait avg wait pid owner 2 15.50 us 8.29 us 7.75 us 1582852 sched-pipe 7 7.20 us 2.47 us 1.03 us -1 Unknown 1 6.74 us 6.74 us 6.74 us 1582851 sched-pipe Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Boqun Feng Cc: Davidlohr Bueso Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: Waiman Long Cc: Will Deacon Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230207002403.63590-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-lock.txt | 5 ++ tools/perf/builtin-lock.c | 49 ++++++++++++--- tools/perf/util/bpf_lock_contention.c | 1 + .../perf/util/bpf_skel/lock_contention.bpf.c | 60 +++++++++++++++++-- tools/perf/util/lock-contention.h | 1 + 5 files changed, 102 insertions(+), 14 deletions(-) diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 11b8901d8d13..37aae194a2a1 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -172,6 +172,11 @@ CONTENTION OPTIONS --lock-addr:: Show lock contention stat by address +-o:: +--lock-owner:: + Show lock contention stat by owners. Implies --threads and + requires --use-bpf. + -Y:: --type-filter=:: Show lock contention only for given lock types (comma separated list). diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index a4b5c481129c..054997edd98b 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -58,6 +58,7 @@ static struct rb_root thread_stats; static bool combine_locks; static bool show_thread_stats; static bool show_lock_addrs; +static bool show_lock_owner; static bool use_bpf; static unsigned long bpf_map_entries = 10240; static int max_stack_depth = CONTENTION_STACK_DEPTH; @@ -1616,7 +1617,8 @@ static void print_contention_result(struct lock_contention *con) switch (aggr_mode) { case LOCK_AGGR_TASK: - pr_info(" %10s %s\n\n", "pid", "comm"); + pr_info(" %10s %s\n\n", "pid", + show_lock_owner ? "owner" : "comm"); break; case LOCK_AGGR_CALLER: pr_info(" %10s %s\n\n", "type", "caller"); @@ -1656,7 +1658,8 @@ static void print_contention_result(struct lock_contention *con) case LOCK_AGGR_TASK: pid = st->addr; t = perf_session__findnew(session, pid); - pr_info(" %10d %s\n", pid, thread__comm_str(t)); + pr_info(" %10d %s\n", + pid, pid == -1 ? "Unknown" : thread__comm_str(t)); break; case LOCK_AGGR_ADDR: pr_info(" %016llx %s\n", (unsigned long long)st->addr, @@ -1768,6 +1771,37 @@ static void sighandler(int sig __maybe_unused) { } +static int check_lock_contention_options(const struct option *options, + const char * const *usage) + +{ + if (show_thread_stats && show_lock_addrs) { + pr_err("Cannot use thread and addr mode together\n"); + parse_options_usage(usage, options, "threads", 0); + parse_options_usage(NULL, options, "lock-addr", 0); + return -1; + } + + if (show_lock_owner && !use_bpf) { + pr_err("Lock owners are available only with BPF\n"); + parse_options_usage(usage, options, "lock-owner", 0); + parse_options_usage(NULL, options, "use-bpf", 0); + return -1; + } + + if (show_lock_owner && show_lock_addrs) { + pr_err("Cannot use owner and addr mode together\n"); + parse_options_usage(usage, options, "lock-owner", 0); + parse_options_usage(NULL, options, "lock-addr", 0); + return -1; + } + + if (show_lock_owner) + show_thread_stats = true; + + return 0; +} + static int __cmd_contention(int argc, const char **argv) { int err = -EINVAL; @@ -1793,6 +1827,7 @@ static int __cmd_contention(int argc, const char **argv) .stack_skip = stack_skip, .filters = &filters, .save_callstack = needs_callstack(), + .owner = show_lock_owner, }; session = perf_session__new(use_bpf ? NULL : &data, &eops); @@ -2272,6 +2307,7 @@ int cmd_lock(int argc, const char **argv) "Filter specific address/symbol of locks", parse_lock_addr), OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES", "Filter specific function in the callstack", parse_call_stack), + OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"), OPT_PARENT(lock_options) }; @@ -2342,14 +2378,9 @@ int cmd_lock(int argc, const char **argv) contention_usage, 0); } - if (show_thread_stats && show_lock_addrs) { - pr_err("Cannot use thread and addr mode together\n"); - parse_options_usage(contention_usage, contention_options, - "threads", 0); - parse_options_usage(NULL, contention_options, - "lock-addr", 0); + if (check_lock_contention_options(contention_options, + contention_usage) < 0) return -1; - } rc = __cmd_contention(argc, argv); } else { diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 72cf81114982..fadcacb9d501 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -149,6 +149,7 @@ int lock_contention_prepare(struct lock_contention *con) skel->bss->stack_skip = con->stack_skip; skel->bss->aggr_mode = con->aggr_mode; skel->bss->needs_callstack = con->save_callstack; + skel->bss->lock_owner = con->owner; lock_contention_bpf__attach(skel); return 0; diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 7ce276ed987e..c5556606134e 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -10,6 +10,14 @@ /* default buffer size */ #define MAX_ENTRIES 10240 +/* lock contention flags from include/trace/events/lock.h */ +#define LCB_F_SPIN (1U << 0) +#define LCB_F_READ (1U << 1) +#define LCB_F_WRITE (1U << 2) +#define LCB_F_RT (1U << 3) +#define LCB_F_PERCPU (1U << 4) +#define LCB_F_MUTEX (1U << 5) + struct tstamp_data { __u64 timestamp; __u64 lock; @@ -84,6 +92,7 @@ int has_type; int has_addr; int needs_callstack; int stack_skip; +int lock_owner; /* determine the key of lock stat */ int aggr_mode; @@ -132,17 +141,24 @@ static inline int can_record(u64 *ctx) return 1; } -static inline void update_task_data(__u32 pid) +static inline int update_task_data(struct task_struct *task) { struct contention_task_data *p; + int pid, err; + + err = bpf_core_read(&pid, sizeof(pid), &task->pid); + if (err) + return -1; p = bpf_map_lookup_elem(&task_data, &pid); if (p == NULL) { - struct contention_task_data data; + struct contention_task_data data = {}; - bpf_get_current_comm(data.comm, sizeof(data.comm)); + BPF_CORE_READ_STR_INTO(&data.comm, task, comm); bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST); } + + return 0; } SEC("tp_btf/contention_begin") @@ -179,6 +195,38 @@ int contention_begin(u64 *ctx) BPF_F_FAST_STACK_CMP | stack_skip); if (pelem->stack_id < 0) lost++; + } else if (aggr_mode == LOCK_AGGR_TASK) { + struct task_struct *task; + + if (lock_owner) { + if (pelem->flags & LCB_F_MUTEX) { + struct mutex *lock = (void *)pelem->lock; + unsigned long owner = BPF_CORE_READ(lock, owner.counter); + + task = (void *)(owner & ~7UL); + } else if (pelem->flags == LCB_F_READ || pelem->flags == LCB_F_WRITE) { + struct rw_semaphore *lock = (void *)pelem->lock; + unsigned long owner = BPF_CORE_READ(lock, owner.counter); + + task = (void *)(owner & ~7UL); + } else { + task = NULL; + } + + /* The flags is not used anymore. Pass the owner pid. */ + if (task) + pelem->flags = BPF_CORE_READ(task, pid); + else + pelem->flags = -1U; + + } else { + task = bpf_get_current_task_btf(); + } + + if (task) { + if (update_task_data(task) < 0 && lock_owner) + pelem->flags = -1U; + } } return 0; @@ -208,8 +256,10 @@ int contention_end(u64 *ctx) key.stack_id = pelem->stack_id; break; case LOCK_AGGR_TASK: - key.pid = pid; - update_task_data(pid); + if (lock_owner) + key.pid = pelem->flags; + else + key.pid = pid; if (needs_callstack) key.stack_id = pelem->stack_id; break; diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index e5fc036108ec..040b618b2215 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -133,6 +133,7 @@ struct lock_contention { int max_stack; int stack_skip; int aggr_mode; + int owner; bool save_callstack; }; From 1bece1351c653c3d36bf761513e21ac8428449b4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 6 Feb 2023 16:24:03 -0800 Subject: [PATCH 106/114] perf lock contention: Support old rw_semaphore type The old kernel has a different type of the owner field in rwsem. We can check it using bpf_core_type_matches() builtin in clang but it also needs its own version check since it's available on recent versions. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Boqun Feng Cc: Davidlohr Bueso Cc: Hao Luo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: Waiman Long Cc: Will Deacon Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230207002403.63590-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/bpf_skel/lock_contention.bpf.c | 57 ++++++++++++++----- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index c5556606134e..e6007eaeda1a 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -84,6 +84,14 @@ struct { __uint(max_entries, 1); } addr_filter SEC(".maps"); +struct rw_semaphore___old { + struct task_struct *owner; +} __attribute__((preserve_access_index)); + +struct rw_semaphore___new { + atomic_long_t owner; +} __attribute__((preserve_access_index)); + /* control flags */ int enabled; int has_cpu; @@ -161,6 +169,41 @@ static inline int update_task_data(struct task_struct *task) return 0; } +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +static inline struct task_struct *get_lock_owner(__u64 lock, __u32 flags) +{ + struct task_struct *task; + __u64 owner = 0; + + if (flags & LCB_F_MUTEX) { + struct mutex *mutex = (void *)lock; + owner = BPF_CORE_READ(mutex, owner.counter); + } else if (flags == LCB_F_READ || flags == LCB_F_WRITE) { +#if __has_builtin(bpf_core_type_matches) + if (bpf_core_type_matches(struct rw_semaphore___old)) { + struct rw_semaphore___old *rwsem = (void *)lock; + owner = (unsigned long)BPF_CORE_READ(rwsem, owner); + } else if (bpf_core_type_matches(struct rw_semaphore___new)) { + struct rw_semaphore___new *rwsem = (void *)lock; + owner = BPF_CORE_READ(rwsem, owner.counter); + } +#else + /* assume new struct */ + struct rw_semaphore *rwsem = (void *)lock; + owner = BPF_CORE_READ(rwsem, owner.counter); +#endif + } + + if (!owner) + return NULL; + + task = (void *)(owner & ~7UL); + return task; +} + SEC("tp_btf/contention_begin") int contention_begin(u64 *ctx) { @@ -199,19 +242,7 @@ int contention_begin(u64 *ctx) struct task_struct *task; if (lock_owner) { - if (pelem->flags & LCB_F_MUTEX) { - struct mutex *lock = (void *)pelem->lock; - unsigned long owner = BPF_CORE_READ(lock, owner.counter); - - task = (void *)(owner & ~7UL); - } else if (pelem->flags == LCB_F_READ || pelem->flags == LCB_F_WRITE) { - struct rw_semaphore *lock = (void *)pelem->lock; - unsigned long owner = BPF_CORE_READ(lock, owner.counter); - - task = (void *)(owner & ~7UL); - } else { - task = NULL; - } + task = get_lock_owner(pelem->lock, pelem->flags); /* The flags is not used anymore. Pass the owner pid. */ if (task) From ffd1240e8f0814262ceb957dbe961f6e0aef1e7a Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 7 Feb 2023 11:50:57 +0800 Subject: [PATCH 107/114] perf tools: Fix auto-complete on aarch64 On aarch64 CPU related events are not under event_source/devices/cpu/events, they're under event_source/devices/armv8_pmuv3_0/events on my machine. Using current auto-complete script will generate below error: [root@localhost bin]# perf stat -e ls: cannot access '/sys/bus/event_source/devices/cpu/events': No such file or directory Fix this by not testing /sys/bus/event_source/devices/cpu/events on aarch64 machine. Fixes: 74cd5815d9af6e6c ("perf tool: Improve bash command line auto-complete for multiple events with comma") Reviewed-by: James Clark Signed-off-by: Yicong Yang Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Cc: prime.zeng@hisilicon.com Link: https://lore.kernel.org/r/20230207035057.43394-1-yangyicong@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-completion.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index fdf75d45efff..978249d7868c 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -165,7 +165,12 @@ __perf_main () local cur1=${COMP_WORDS[COMP_CWORD]} local raw_evts=$($cmd list --raw-dump) - local arr s tmp result + local arr s tmp result cpu_evts + + # aarch64 doesn't have /sys/bus/event_source/devices/cpu/events + if [[ `uname -m` != aarch64 ]]; then + cpu_evts=$(ls /sys/bus/event_source/devices/cpu/events) + fi if [[ "$cur1" == */* && ${cur1#*/} =~ ^[A-Z] ]]; then OLD_IFS="$IFS" @@ -183,9 +188,9 @@ __perf_main () fi done - evts=${result}" "$(ls /sys/bus/event_source/devices/cpu/events) + evts=${result}" "${cpu_evts} else - evts=${raw_evts}" "$(ls /sys/bus/event_source/devices/cpu/events) + evts=${raw_evts}" "${cpu_evts} fi if [[ "$cur1" == , ]]; then From 6a5558f1166473f741de33c32ffb161d7f7732cb Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 7 Feb 2023 15:04:47 +0100 Subject: [PATCH 108/114] perf tools: Fix perf tool build error in util/pfm.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I have downloaded linux-next and build the perf tool using # make LIBPFM4=1 to have libpfm4 support built into perf. The build fails: # make LIBPFM4=1 .... INSTALL libbpf_headers CC util/pfm.o util/pfm.c: In function ‘print_libpfm_event’: util/pfm.c:189:9: error: too many arguments to function ‘print_cb->print_event’ 189 | print_cb->print_event(print_state, | ^~~~~~~~ util/pfm.c:220:25: error: too many arguments to function ‘print_cb->print_event’ 220 | print_cb->print_event(print_state, The build error is caused by commit d9dc8874d6ce46cc ("perf pmu-events: Remove now unused event and metric variables") which changes the function prototype of struct print_callbacks { ... void (*print_event)(...); --> last two parameters removed. }; but does not adjust the usage of this function prototype in util/pfm.c. In file util/pfm.c function print_event() is still invoked with 13 parameters instead of 11. The compile fails. When I adjust the file util/pfm.c as in this patch, the build works file. Please check this patch for correctness, I have just fixed the compile issue. Fixes: d9dc8874d6ce46cc ("perf pmu-events: Remove now unused event and metric variables") Signed-off-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Tested-by: Ian Rogers Cc: Heiko Carstens Cc: Ian Rogers Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Cc: egorenar@linux.ibm.com Cc: linux-kernel-next@vger.kernel.org Link: https://lore.kernel.org/r/20230207140447.1827741-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pfm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index ac3227ba769c..b59ba825ddc9 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -193,8 +193,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", info->desc, /*long_desc=*/NULL, - /*encoding_desc=*/buf->buf, - /*metric_name=*/NULL, /*metric_expr=*/NULL); + /*encoding_desc=*/buf->buf); pfm_for_each_event_attr(j, info) { pfm_event_attr_info_t ainfo; @@ -224,8 +223,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", ainfo.desc, /*long_desc=*/NULL, - /*encoding_desc=*/buf->buf, - /*metric_name=*/NULL, /*metric_expr=*/NULL); + /*encoding_desc=*/buf->buf); } } } From 37f322cd58d81a9d46456531281c908de9ef6e42 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 8 Feb 2023 22:44:47 -0800 Subject: [PATCH 109/114] perf stat: Avoid merging/aggregating metric counts twice The added perf_stat_merge_counters combines uncore counters. When metrics are enabled, the counts are merged into a metric_leader via the stat-shadow saved_value logic. As the leader now is passed an aggregated count, it leads to all counters being added together twice and counts appearing approximately doubled in metrics. This change disables the saved_value merging of counts for evsels that are merged. It is recommended that later changes remove the saved_value entirely as the two layers of aggregation in the code is confusing. Fixes: 942c5593393d9418 ("perf stat: Add perf_stat_merge_counters()") Reported-by: Perry Taylor Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Eduard Zingerman Cc: Florian Fischer Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20230209064447.83733-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 35ea4813f468..806b32156459 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -311,7 +311,7 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; - } else if (counter->metric_leader) { + } else if (counter->metric_leader && !counter->merged_stat) { v = saved_value_lookup(counter->metric_leader, map_idx, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; From 91621be65d6812cd74b2ea09573ff9ee0cbf5666 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Wed, 15 Feb 2023 12:23:24 +0000 Subject: [PATCH 110/114] perf record: Fix segfault with --overwrite and --max-size When --overwrite and --max-size options of perf record are used together, a segmentation fault occurs. The following is an example: # perf record -e sched:sched* --overwrite --max-size 1K -a -- sleep 1 [ perf record: Woken up 1 times to write data ] perf: Segmentation fault Obtained 12 stack frames. ./perf/perf(+0x197673) [0x55f99710b673] /lib/x86_64-linux-gnu/libc.so.6(+0x3ef0f) [0x7fa45f3cff0f] ./perf/perf(+0x8eb40) [0x55f997002b40] ./perf/perf(+0x1f6882) [0x55f99716a882] ./perf/perf(+0x794c2) [0x55f996fed4c2] ./perf/perf(+0x7b7c7) [0x55f996fef7c7] ./perf/perf(+0x9074b) [0x55f99700474b] ./perf/perf(+0x12e23c) [0x55f9970a223c] ./perf/perf(+0x12e54a) [0x55f9970a254a] ./perf/perf(+0x7db60) [0x55f996ff1b60] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe6) [0x7fa45f3b2c86] ./perf/perf(+0x7dfe9) [0x55f996ff1fe9] Segmentation fault (core dumped) backtrace of the core file is as follows: (gdb) bt #0 record__bytes_written (rec=0x55f99755a200 ) at builtin-record.c:234 #1 record__output_max_size_exceeded (rec=0x55f99755a200 ) at builtin-record.c:242 #2 record__write (map=0x0, size=12816, bf=0x55f9978da2e0, rec=0x55f99755a200 ) at builtin-record.c:263 #3 process_synthesized_event (tool=tool@entry=0x55f99755a200 , event=event@entry=0x55f9978da2e0, sample=sample@entry=0x0, machine=machine@entry=0x55f997893658) at builtin-record.c:618 #4 0x000055f99716a883 in __perf_event__synthesize_id_index (tool=tool@entry=0x55f99755a200 , process=process@entry=0x55f997002aa0 , evlist=0x55f9978928b0, machine=machine@entry=0x55f997893658, from=from@entry=0) at util/synthetic-events.c:1895 #5 0x000055f99716a91f in perf_event__synthesize_id_index (tool=tool@entry=0x55f99755a200 , process=process@entry=0x55f997002aa0 , evlist=, machine=machine@entry=0x55f997893658) at util/synthetic-events.c:1905 #6 0x000055f996fed4c3 in record__synthesize (tail=tail@entry=true, rec=0x55f99755a200 ) at builtin-record.c:1997 #7 0x000055f996fef7c8 in __cmd_record (argc=argc@entry=2, argv=argv@entry=0x7ffc67551260, rec=0x55f99755a200 ) at builtin-record.c:2802 #8 0x000055f99700474c in cmd_record (argc=, argv=0x7ffc67551260) at builtin-record.c:4258 #9 0x000055f9970a223d in run_builtin (p=0x55f997564d88 , argc=10, argv=0x7ffc67551260) at perf.c:330 #10 0x000055f9970a254b in handle_internal_command (argc=10, argv=0x7ffc67551260) at perf.c:384 #11 0x000055f996ff1b61 in run_argv (argcp=, argv=) at perf.c:428 #12 main (argc=, argv=0x7ffc67551260) at perf.c:562 The reason is that record__bytes_written accesses the freed memory rec->thread_data, The process is as follows: __cmd_record -> record__free_thread_data -> zfree(&rec->thread_data) // free rec->thread_data -> record__synthesize -> perf_event__synthesize_id_index -> process_synthesized_event -> record__write -> record__bytes_written // access rec->thread_data We add a member variable "thread_bytes_written" in the struct "record" to save the data size written by the threads. Fixes: 6d57581659f72299 ("perf record: Add support for limit perf output file size") Signed-off-by: Yang Jihong Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jiwei Sun Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/CAM9d7ci_TRrqBQVQNW8=GwakUr7SsZpYxaaty-S4bxF8zJWyqw@mail.gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 29dcd454b8e2..8374117e66f6 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -154,6 +154,7 @@ struct record { struct perf_tool tool; struct record_opts opts; u64 bytes_written; + u64 thread_bytes_written; struct perf_data data; struct auxtrace_record *itr; struct evlist *evlist; @@ -226,14 +227,7 @@ static bool switch_output_time(struct record *rec) static u64 record__bytes_written(struct record *rec) { - int t; - u64 bytes_written = rec->bytes_written; - struct record_thread *thread_data = rec->thread_data; - - for (t = 0; t < rec->nr_threads; t++) - bytes_written += thread_data[t].bytes_written; - - return bytes_written; + return rec->bytes_written + rec->thread_bytes_written; } static bool record__output_max_size_exceeded(struct record *rec) @@ -255,10 +249,12 @@ static int record__write(struct record *rec, struct mmap *map __maybe_unused, return -1; } - if (map && map->file) + if (map && map->file) { thread->bytes_written += size; - else + rec->thread_bytes_written += size; + } else { rec->bytes_written += size; + } if (record__output_max_size_exceeded(rec) && !done) { fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB)," From 1470a108a60e8c0c4d19da10117c9b98f0078654 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 14 Feb 2023 15:58:23 +0800 Subject: [PATCH 111/114] perf c2c: Add report option to show false sharing in adjacent cachelines Many platforms have feature of adjacent cachelines prefetch, when it is enabled, for data in RAM of 2 cachelines (2N and 2N+1) granularity, if one is fetched to cache, the other one could likely be fetched too, which sort of extends the cacheline size to double, thus the false sharing could happens in adjacent cachelines. 0Day has captured performance changed related with this [1], and some commercial software explicitly makes its hot global variables 128 bytes aligned (2 cache lines) to avoid this kind of extended false sharing. So add an option "--double-cl" for 'perf c2c report' to show false sharing in double cache line granularity, which acts just like the cacheline size is doubled. There is no change to c2c record. The hardware events of shared cacheline are still per cacheline, and this option just changes the granularity of how events are grouped and displayed. In the 'perf c2c report' output below (will-it-scale's 'pagefault2' case on old kernel): ---------------------------------------------------------------------- 26 31 2 0 0 0 0xffff888103ec6000 ---------------------------------------------------------------------- 35.48% 50.00% 0.00% 0.00% 0.00% 0x10 0 1 0xffffffff8133148b 1153 66 971 3748 74 [k] get_mem_cgroup_from_mm 6.45% 0.00% 0.00% 0.00% 0.00% 0x10 0 1 0xffffffff813396e4 570 0 1531 879 75 [k] mem_cgroup_charge 25.81% 50.00% 0.00% 0.00% 0.00% 0x54 0 1 0xffffffff81331472 949 70 593 3359 74 [k] get_mem_cgroup_from_mm 19.35% 0.00% 0.00% 0.00% 0.00% 0x54 0 1 0xffffffff81339686 1352 0 1073 1022 74 [k] mem_cgroup_charge 9.68% 0.00% 0.00% 0.00% 0.00% 0x54 0 1 0xffffffff813396d6 1401 0 863 768 74 [k] mem_cgroup_charge 3.23% 0.00% 0.00% 0.00% 0.00% 0x54 0 1 0xffffffff81333106 618 0 804 11 9 [k] uncharge_batch The offset 0x10 and 0x54 used to displayed in 2 groups, and now they are listed together to give users a hint of extended false sharing. [1]. https://lore.kernel.org/lkml/20201102091543.GM31092@shao2-debian/ Committer notes: Link: https://lore.kernel.org/r/Y+wvVNWqXb70l4uy@feng-clx Removed -a, leaving just as --double-cl, as this probably is not used so frequently and perhaps will be even auto-detected if we manage to record the MSR where this is configured. Reviewed-by: Andi Kleen Reviewed-by: Leo Yan Signed-off-by: Feng Tang Tested-by: Leo Yan Acked-by: Joe Mario Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Tim Chen Cc: Xing Zhengjun Link: https://lore.kernel.org/r/20230214075823.246414-1-feng.tang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-c2c.txt | 6 ++++++ tools/perf/builtin-c2c.c | 21 ++++++++++++--------- tools/perf/util/cacheline.h | 25 ++++++++++++++++++++----- tools/perf/util/sort.c | 13 ++++++++++--- tools/perf/util/sort.h | 1 + 5 files changed, 49 insertions(+), 17 deletions(-) diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 4e8c263e1721..856f0dfb8e5a 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -130,6 +130,12 @@ REPORT OPTIONS The known limitations include exception handing such as setjmp/longjmp will have calls/returns not match. +--double-cl:: + Group the detection of shared cacheline events into double cacheline + granularity. Some architectures have an Adjacent Cacheline Prefetch + feature, which causes cacheline sharing to behave like the cacheline + size is doubled. + C2C RECORD ---------- The perf c2c record command setup options related to HITM cacheline analysis diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 52d94c7dd836..56974eae0638 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -524,7 +524,7 @@ static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, char buf[20]; if (he->mem_info) - addr = cl_address(he->mem_info->daddr.addr); + addr = cl_address(he->mem_info->daddr.addr, chk_double_cl); return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr)); } @@ -562,7 +562,7 @@ static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, char buf[20]; if (he->mem_info) - addr = cl_offset(he->mem_info->daddr.al_addr); + addr = cl_offset(he->mem_info->daddr.al_addr, chk_double_cl); return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr)); } @@ -574,9 +574,10 @@ offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused, uint64_t l = 0, r = 0; if (left->mem_info) - l = cl_offset(left->mem_info->daddr.addr); + l = cl_offset(left->mem_info->daddr.addr, chk_double_cl); + if (right->mem_info) - r = cl_offset(right->mem_info->daddr.addr); + r = cl_offset(right->mem_info->daddr.addr, chk_double_cl); return (int64_t)(r - l); } @@ -2590,7 +2591,7 @@ perf_c2c_cacheline_browser__title(struct hist_browser *browser, he = cl_browser->he; if (he->mem_info) - addr = cl_address(he->mem_info->daddr.addr); + addr = cl_address(he->mem_info->daddr.addr, chk_double_cl); scnprintf(bf, size, "Cacheline 0x%lx", addr); return 0; @@ -2788,15 +2789,16 @@ static int ui_quirks(void) if (!c2c.use_stdio) { dim_offset.width = 5; dim_offset.header = header_offset_tui; - nodestr = "CL"; + nodestr = chk_double_cl ? "Double-CL" : "CL"; } dim_percent_costly_snoop.header = percent_costly_snoop_header[c2c.display]; /* Fix the zero line for dcacheline column. */ - buf = fill_line("Cacheline", dim_dcacheline.width + - dim_dcacheline_node.width + - dim_dcacheline_count.width + 4); + buf = fill_line(chk_double_cl ? "Double-Cacheline" : "Cacheline", + dim_dcacheline.width + + dim_dcacheline_node.width + + dim_dcacheline_count.width + 4); if (!buf) return -ENOMEM; @@ -3037,6 +3039,7 @@ static int perf_c2c__report(int argc, const char **argv) OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr, "Enable LBR callgraph stitching approach"), + OPT_BOOLEAN(0, "double-cl", &chk_double_cl, "Detect adjacent cacheline false sharing"), OPT_PARENT(c2c_options), OPT_END() }; diff --git a/tools/perf/util/cacheline.h b/tools/perf/util/cacheline.h index dec8c0fb1f4a..fe6d5b60a031 100644 --- a/tools/perf/util/cacheline.h +++ b/tools/perf/util/cacheline.h @@ -6,16 +6,31 @@ int __pure cacheline_size(void); -static inline u64 cl_address(u64 address) + +/* + * Some architectures have 'Adjacent Cacheline Prefetch' feature, + * which performs like the cacheline size being doubled. + */ +static inline u64 cl_address(u64 address, bool double_cl) { + u64 size = cacheline_size(); + + if (double_cl) + size *= 2; + /* return the cacheline of the address */ - return (address & ~(cacheline_size() - 1)); + return (address & ~(size - 1)); } -static inline u64 cl_offset(u64 address) +static inline u64 cl_offset(u64 address, bool double_cl) { - /* return the cacheline of the address */ - return (address & (cacheline_size() - 1)); + u64 size = cacheline_size(); + + if (double_cl) + size *= 2; + + /* return the offset inside cacheline */ + return (address & (size - 1)); } #endif // PERF_CACHELINE_H diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 4a648231fe72..093a0c8b2e3d 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -53,6 +53,13 @@ enum sort_mode sort__mode = SORT_MODE__NORMAL; static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"}; static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"}; +/* + * Some architectures have Adjacent Cacheline Prefetch feature, which + * behaves like the cacheline size is doubled. Enable this flag to + * check things in double cacheline granularity. + */ +bool chk_double_cl; + /* * Replaces all occurrences of a char used with the: * @@ -1500,8 +1507,8 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) addr: /* al_addr does all the right addr - start + offset calculations */ - l = cl_address(left->mem_info->daddr.al_addr); - r = cl_address(right->mem_info->daddr.al_addr); + l = cl_address(left->mem_info->daddr.al_addr, chk_double_cl); + r = cl_address(right->mem_info->daddr.al_addr, chk_double_cl); if (l > r) return -1; if (l < r) return 1; @@ -1520,7 +1527,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, if (he->mem_info) { struct map *map = he->mem_info->daddr.ms.map; - addr = cl_address(he->mem_info->daddr.al_addr); + addr = cl_address(he->mem_info->daddr.al_addr, chk_double_cl); ms = &he->mem_info->daddr.ms; /* print [s] for shared data mmaps */ diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 9a91d0df2833..d79a100e5999 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -35,6 +35,7 @@ extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; extern struct sort_entry sort_srcline; extern const char default_mem_sort_order[]; +extern bool chk_double_cl; struct res_sample { u64 time; From 7e55b95651d88e60368087c243525a0d97d43d3d Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Tue, 22 Mar 2022 09:24:52 +0100 Subject: [PATCH 112/114] perf intel-pt: Synthesize cycle events There is no good reason why we cannot synthesize "cycle" events from Intel PT just as we can synthesize "instruction" events, in particular when CYC packets are available. This enables using PT to getting much more accurate cycle profiles than regular sampling (record -e cycles) when the work last for very short periods (<10 ms). Thus, add support for this, based off of the existing IPC calculation framework. The new option to --itrace is "y" (for cYcles), as c was taken for calls. Cycle and instruction events can be synthesized together, and are by default. The only real caveat is that CYC packets are only emitted whenever some other packet is, which in practice is when a branch instruction is encountered (and not even all branches). Thus, even at no subsampling (e.g. --itrace=y0ns), it is impossible to get more accuracy than a single basic block, and all cycles spent executing that block will get attributed to the branch instruction that ends the packet. Thus, one cannot know whether the cycles came from e.g. a specific load, a mispredicted branch, or something else. When subsampling (which is the default), the cycle events will get smeared out even more, but will still be generally useful to attribute cycle counts to functions. Reviewed-by: Adrian Hunter Signed-off-by: Steinar H. Gunderson Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220322082452.1429091-1-sesse@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/itrace.txt | 3 +- tools/perf/Documentation/perf-intel-pt.txt | 36 ++++++++---- tools/perf/util/auxtrace.c | 9 ++- tools/perf/util/auxtrace.h | 7 ++- tools/perf/util/intel-pt.c | 67 ++++++++++++++++++++-- 5 files changed, 101 insertions(+), 21 deletions(-) diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 0916bbfe64cb..a97f95825b14 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -1,4 +1,5 @@ i synthesize instructions events + y synthesize cycles events b synthesize branches events (branch misses for Arm SPE) c synthesize branches events (calls only) r synthesize branches events (returns only) @@ -25,7 +26,7 @@ A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) - The default is all events i.e. the same as --itrace=ibxwpe, + The default is all events i.e. the same as --itrace=iybxwpe, except for perf script where it is --itrace=ce In addition, the period (default 100000, except for perf script where it is 1) diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 9d485a9cdb19..4c90cc176f81 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -101,12 +101,12 @@ data is available you can use the 'perf script' tool with all itrace sampling options, which will list all the samples. perf record -e intel_pt//u ls - perf script --itrace=ibxwpe + perf script --itrace=iybxwpe An interesting field that is not printed by default is 'flags' which can be displayed as follows: - perf script --itrace=ibxwpe -F+flags + perf script --itrace=iybxwpe -F+flags The flags are "bcrosyiABExghDt" which stand for branch, call, return, conditional, system, asynchronous, interrupt, transaction abort, trace begin, trace end, @@ -147,16 +147,17 @@ displayed as follows: There are two ways that instructions-per-cycle (IPC) can be calculated depending on the recording. -If the 'cyc' config term (see config terms section below) was used, then IPC is -calculated using the cycle count from CYC packets, otherwise MTC packets are -used - refer to the 'mtc' config term. When MTC is used, however, the values -are less accurate because the timing is less accurate. +If the 'cyc' config term (see config terms section below) was used, then IPC +and cycle events are calculated using the cycle count from CYC packets, otherwise +MTC packets are used - refer to the 'mtc' config term. When MTC is used, however, +the values are less accurate because the timing is less accurate. Because Intel PT does not update the cycle count on every branch or instruction, the values will often be zero. When there are values, they will be the number of instructions and number of cycles since the last update, and thus represent -the average IPC since the last IPC for that event type. Note IPC for "branches" -events is calculated separately from IPC for "instructions" events. +the average IPC cycle count since the last IPC for that event type. +Note IPC for "branches" events is calculated separately from IPC for "instructions" +events. Even with the 'cyc' config term, it is possible to produce IPC information for every change of timestamp, but at the expense of accuracy. That is selected by @@ -900,11 +901,12 @@ Having no option is the same as which, in turn, is the same as - --itrace=cepwx + --itrace=cepwxy The letters are: i synthesize "instructions" events + y synthesize "cycles" events b synthesize "branches" events x synthesize "transactions" events w synthesize "ptwrite" events @@ -927,6 +929,16 @@ The letters are: "Instructions" events look like they were recorded by "perf record -e instructions". +"Cycles" events look like they were recorded by "perf record -e cycles" +(ie., the default). Note that even with CYC packets enabled and no sampling, +these are not fully accurate, since CYC packets are not emitted for each +instruction, only when some other event (like an indirect branch, or a +TNT packet representing multiple branches) happens causes a packet to +be emitted. Thus, it is more effective for attributing cycles to functions +(and possibly basic blocks) than to individual instructions, although it +is not even perfect for functions (although it becomes better if the noretcomp +option is active). + "Branches" events look like they were recorded by "perf record -e branches". "c" and "r" can be combined to get calls and returns. @@ -934,9 +946,9 @@ and "r" can be combined to get calls and returns. 'flags' field can be used in perf script to determine whether the event is a transaction start, commit or abort. -Note that "instructions", "branches" and "transactions" events depend on code -flow packets which can be disabled by using the config term "branch=0". Refer -to the config terms section above. +Note that "instructions", "cycles", "branches" and "transactions" events +depend on code flow packets which can be disabled by using the config term +"branch=0". Refer to the config terms section above. "ptwrite" events record the payload of the ptwrite instruction and whether "fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index d4b04fa07a11..498ff7f24463 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1394,6 +1394,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, synth_opts->calls = true; } else { synth_opts->instructions = true; + synth_opts->cycles = true; synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; } @@ -1482,7 +1483,11 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, for (p = str; *p;) { switch (*p++) { case 'i': - synth_opts->instructions = true; + case 'y': + if (p[-1] == 'y') + synth_opts->cycles = true; + else + synth_opts->instructions = true; while (*p == ' ' || *p == ',') p += 1; if (isdigit(*p)) { @@ -1641,7 +1646,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, } } out: - if (synth_opts->instructions) { + if (synth_opts->instructions || synth_opts->cycles) { if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 2cf63d377831..29eb82dff574 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -71,6 +71,9 @@ enum itrace_period_type { * @inject: indicates the event (not just the sample) must be fully synthesized * because 'perf inject' will write it out * @instructions: whether to synthesize 'instructions' events + * @cycles: whether to synthesize 'cycles' events + * (not fully accurate, since CYC packets are only emitted + * together with other events, such as branches) * @branches: whether to synthesize 'branches' events * (branch misses only for Arm SPE) * @transactions: whether to synthesize events for transactions @@ -119,6 +122,7 @@ struct itrace_synth_opts { bool default_no_sample; bool inject; bool instructions; + bool cycles; bool branches; bool transactions; bool ptwrites; @@ -643,6 +647,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, #define ITRACE_HELP \ " i[period]: synthesize instructions events\n" \ +" y[period]: synthesize cycles events (same period as i)\n" \ " b: synthesize branches events (branch misses for Arm SPE)\n" \ " c: synthesize branches events (calls only)\n" \ " r: synthesize branches events (returns only)\n" \ @@ -674,7 +679,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " A: approximate IPC\n" \ " Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ -" concatenate multiple options. Default is ibxwpe or cewp\n" +" concatenate multiple options. Default is iybxwpe or cewp\n" static inline void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts, diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index b8b29756fbf1..955c1b9dc6a4 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -98,6 +99,10 @@ struct intel_pt { u64 instructions_sample_type; u64 instructions_id; + bool sample_cycles; + u64 cycles_sample_type; + u64 cycles_id; + bool sample_branches; u32 branches_filter; u64 branches_sample_type; @@ -214,6 +219,8 @@ struct intel_pt_queue { u64 ipc_cyc_cnt; u64 last_in_insn_cnt; u64 last_in_cyc_cnt; + u64 last_cy_insn_cnt; + u64 last_cy_cyc_cnt; u64 last_br_insn_cnt; u64 last_br_cyc_cnt; unsigned int cbr_seen; @@ -1319,7 +1326,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, if (pt->filts.cnt > 0) params.pgd_ip = intel_pt_pgd_ip; - if (pt->synth_opts.instructions) { + if (pt->synth_opts.instructions || pt->synth_opts.cycles) { if (pt->synth_opts.period) { switch (pt->synth_opts.period_type) { case PERF_ITRACE_PERIOD_INSTRUCTIONS: @@ -1830,6 +1837,33 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) pt->instructions_sample_type); } +static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + union perf_event *event = ptq->event_buf; + struct perf_sample sample = { .ip = 0, }; + u64 period = 0; + + if (ptq->sample_ipc) + period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt; + + if (!period || intel_pt_skip_event(pt)) + return 0; + + intel_pt_prep_sample(pt, ptq, event, &sample); + + sample.id = ptq->pt->cycles_id; + sample.stream_id = ptq->pt->cycles_id; + sample.period = period; + + sample.cyc_cnt = period; + sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_cy_insn_cnt; + ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt; + ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt; + + return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); +} + static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; @@ -2598,10 +2632,17 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) } } - if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { - err = intel_pt_synth_instruction_sample(ptq); - if (err) - return err; + if (state->type & INTEL_PT_INSTRUCTION) { + if (pt->sample_instructions) { + err = intel_pt_synth_instruction_sample(ptq); + if (err) + return err; + } + if (pt->sample_cycles) { + err = intel_pt_synth_cycle_sample(ptq); + if (err) + return err; + } } if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) { @@ -3731,6 +3772,22 @@ static int intel_pt_synth_events(struct intel_pt *pt, id += 1; } + if (pt->synth_opts.cycles) { + attr.config = PERF_COUNT_HW_CPU_CYCLES; + if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) + attr.sample_period = + intel_pt_ns_to_ticks(pt, pt->synth_opts.period); + else + attr.sample_period = pt->synth_opts.period; + err = intel_pt_synth_event(session, "cycles", &attr, id); + if (err) + return err; + pt->sample_cycles = true; + pt->cycles_sample_type = attr.sample_type; + pt->cycles_id = id; + id += 1; + } + attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD; attr.sample_period = 1; From cf26e043c2a9213805d7ea9e8cf3e1d7166a62a4 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 16 Feb 2023 11:42:40 +0530 Subject: [PATCH 113/114] perf vendor events power10: Add JSON metric events to present CPI stall cycles in powerpc Power10 Performance Monitoring Unit (PMU) provides events to understand stall cycles of different pipeline stages. These events along with completed instructions provides useful metrics for application tuning. Patch implements the JSON changes to collect counter statistics to present the high level CPI stall breakdown metrics. New metric group is named as "CPI_STALL_RATIO" and this new metric group presents these stall metrics: - DISPATCHED_CPI ( Dispatch stall cycles per insn ) - ISSUE_STALL_CPI ( Issue stall cycles per insn ) - EXECUTION_STALL_CPI ( Execution stall cycles per insn ) - COMPLETION_STALL_CPI ( Completition stall cycles per insn ) To avoid multipling of events, PM_RUN_INST_CMPL event has been modified to use PMC5(performance monitoring counter5) instead of PMC4. This change is needed, since completion stall event is using PMC4. Usage example: ./perf stat --metric-no-group -M CPI_STALL_RATIO Performance counter stats for 'workload': 63,056,817,982 PM_CMPL_STALL # 0.28 COMPLETION_STALL_CPI 1,743,988,038,896 PM_ISSUE_STALL # 7.73 ISSUE_STALL_CPI 225,597,495,030 PM_RUN_INST_CMPL # 6.18 DISPATCHED_CPI # 37.48 EXECUTION_STALL_CPI 1,393,916,546,654 PM_DISP_STALL_CYC 8,455,376,836,463 PM_EXEC_STALL "--metric-no-group" is used for forcing PM_RUN_INST_CMPL to be scheduled in all group for more accuracy. Signed-off-by: Athira Rajeev Acked-by: Ian Rogers Cc: Andi Kleen Cc: Disha Goel Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230216061240.18067-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/powerpc/power10/metrics.json | 8 ++++---- tools/perf/pmu-events/arch/powerpc/power10/others.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json index b57526fa44f2..6f53583a0c62 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json @@ -15,7 +15,7 @@ { "BriefDescription": "Average cycles per completed instruction when dispatch was stalled for any reason", "MetricExpr": "PM_DISP_STALL_CYC / PM_RUN_INST_CMPL", - "MetricGroup": "CPI", + "MetricGroup": "CPI;CPI_STALL_RATIO", "MetricName": "DISPATCHED_CPI" }, { @@ -147,13 +147,13 @@ { "BriefDescription": "Average cycles per completed instruction when the NTC instruction has been dispatched but not issued for any reason", "MetricExpr": "PM_ISSUE_STALL / PM_RUN_INST_CMPL", - "MetricGroup": "CPI", + "MetricGroup": "CPI;CPI_STALL_RATIO", "MetricName": "ISSUE_STALL_CPI" }, { "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting to be finished in one of the execution units", "MetricExpr": "PM_EXEC_STALL / PM_RUN_INST_CMPL", - "MetricGroup": "CPI", + "MetricGroup": "CPI;CPI_STALL_RATIO", "MetricName": "EXECUTION_STALL_CPI" }, { @@ -309,7 +309,7 @@ { "BriefDescription": "Average cycles per completed instruction when the NTC instruction cannot complete because the thread was blocked", "MetricExpr": "PM_CMPL_STALL / PM_RUN_INST_CMPL", - "MetricGroup": "CPI", + "MetricGroup": "CPI;CPI_STALL_RATIO", "MetricName": "COMPLETION_STALL_CPI" }, { diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json index 7d0de1a2860b..a771e4b6bec5 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/others.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json @@ -265,7 +265,7 @@ "BriefDescription": "Load Missed L1, counted at finish time." }, { - "EventCode": "0x400FA", + "EventCode": "0x500FA", "EventName": "PM_RUN_INST_CMPL", "BriefDescription": "Completed PowerPC instructions gated by the run latch." } From f9fa0778ee7349a9aa3d2ea10e9f2ab843a0b44e Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 15 Feb 2023 15:08:27 +0530 Subject: [PATCH 114/114] perf tests stat_all_metrics: Change true workload to sleep workload for system wide check Testcase stat_all_metrics.sh fails in powerpc: 98: perf all metrics test : FAILED! Logs with verbose: [command]# ./perf test 98 -vv 98: perf all metrics test : --- start --- test child forked, pid 13262 Testing BRU_STALL_CPI Testing COMPLETION_STALL_CPI ---- Testing TOTAL_LOCAL_NODE_PUMPS_P23 Metric 'TOTAL_LOCAL_NODE_PUMPS_P23' not printed in: Error: Invalid event (hv_24x7/PM_PB_LNS_PUMP23,chip=3/) in per-thread mode, enable system wide with '-a'. Testing TOTAL_LOCAL_NODE_PUMPS_RETRIES_P01 Metric 'TOTAL_LOCAL_NODE_PUMPS_RETRIES_P01' not printed in: Error: Invalid event (hv_24x7/PM_PB_RTY_LNS_PUMP01,chip=3/) in per-thread mode, enable system wide with '-a'. ---- Based on above logs, we could see some of the hv-24x7 metric events fails, and logs suggest to run the metric event with -a option. This change happened after the commit a4b8cfcabb1d90ec ("perf stat: Delay metric parsing"), which delayed the metric parsing phase and now before metric parsing phase perf tool identifies, whether target is system-wide or not. With this change, perf_event_open will fails with workload monitoring for uncore events as expected. The perf all metric test case fails as some of the hv-24x7 metric events may need bigger workload with system wide monitoring to get the data. Fix this issue by changing current system wide check from true workload to sleep 0.01 workload. Result with the patch changes in powerpc: 98: perf all metrics test : Ok Fixes: a4b8cfcabb1d90ec ("perf stat: Delay metric parsing") Suggested-by: Ian Rogers Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Tested-by: Disha Goel Tested-by: Ian Rogers Cc: Madhavan Srinivasan Cc: Nageswara R Sastry Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20230215093827.124921-1-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat_all_metrics.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh index 6e79349e42be..22e9cb294b40 100755 --- a/tools/perf/tests/shell/stat_all_metrics.sh +++ b/tools/perf/tests/shell/stat_all_metrics.sh @@ -11,7 +11,7 @@ for m in $(perf list --raw-dump metrics); do continue fi # Failed so try system wide. - result=$(perf stat -M "$m" -a true 2>&1) + result=$(perf stat -M "$m" -a sleep 0.01 2>&1) if [[ "$result" =~ "${m:0:50}" ]] then continue