perf pmu: Make the loading of formats lazy
The sysfs format files are loaded eagerly in a PMU. Add a flag so that we create the format but only load the contents when necessary. Reduce the size of the value in struct perf_pmu_format and avoid holes so there is no additional space requirement. For "perf stat -e cycles true" this reduces the number of openat calls from 648 to 573 (about 12%). The benchmark pmu scan speed is improved by roughly 5%. Before: $ perf bench internals pmu-scan Computing performance of sysfs PMU event scan for 100 times Average core PMU scanning took: 1061.100 usec (+- 9.965 usec) Average PMU scanning took: 4725.300 usec (+- 260.599 usec) After: $ perf bench internals pmu-scan Computing performance of sysfs PMU event scan for 100 times Average core PMU scanning took: 989.170 usec (+- 6.873 usec) Average PMU scanning took: 4520.960 usec (+- 251.272 usec) Committer testing: On a AMD Ryzen 5950x: Before: $ perf bench internals pmu-scan -i1000 # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 563.466 usec (+- 1.008 usec) Average PMU scanning took: 1619.174 usec (+- 23.627 usec) $ perf stat -r5 perf bench internals pmu-scan -i1000 # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 583.401 usec (+- 2.098 usec) Average PMU scanning took: 1677.352 usec (+- 24.636 usec) # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 553.254 usec (+- 0.825 usec) Average PMU scanning took: 1635.655 usec (+- 24.312 usec) # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 557.733 usec (+- 0.980 usec) Average PMU scanning took: 1600.659 usec (+- 23.344 usec) # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 554.906 usec (+- 0.774 usec) Average PMU scanning took: 1595.338 usec (+- 23.288 usec) # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 551.798 usec (+- 0.967 usec) Average PMU scanning took: 1623.213 usec (+- 23.998 usec) Performance counter stats for 'perf bench internals pmu-scan -i1000' (5 runs): 3276.82 msec task-clock:u # 0.990 CPUs utilized ( +- 0.82% ) 0 context-switches:u # 0.000 /sec 0 cpu-migrations:u # 0.000 /sec 1008 page-faults:u # 307.615 /sec ( +- 0.04% ) 12049614778 cycles:u # 3.677 GHz ( +- 0.07% ) (83.34%) 117507478 stalled-cycles-frontend:u # 0.98% frontend cycles idle ( +- 0.33% ) (83.32%) 27106761 stalled-cycles-backend:u # 0.22% backend cycles idle ( +- 9.55% ) (83.36%) 33294953848 instructions:u # 2.76 insn per cycle # 0.00 stalled cycles per insn ( +- 0.03% ) (83.31%) 6849825049 branches:u # 2.090 G/sec ( +- 0.03% ) (83.37%) 71533903 branch-misses:u # 1.04% of all branches ( +- 0.20% ) (83.30%) 3.3088 +- 0.0302 seconds time elapsed ( +- 0.91% ) $ After: $ perf stat -r5 perf bench internals pmu-scan -i1000 # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 550.702 usec (+- 0.958 usec) Average PMU scanning took: 1566.577 usec (+- 22.747 usec) # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 548.315 usec (+- 0.555 usec) Average PMU scanning took: 1565.499 usec (+- 22.760 usec) # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 548.073 usec (+- 0.555 usec) Average PMU scanning took: 1586.097 usec (+- 23.299 usec) # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 561.184 usec (+- 2.709 usec) Average PMU scanning took: 1567.153 usec (+- 22.548 usec) # Running 'internals/pmu-scan' benchmark: Computing performance of sysfs PMU event scan for 1000 times Average core PMU scanning took: 546.987 usec (+- 0.553 usec) Average PMU scanning took: 1562.814 usec (+- 22.729 usec) Performance counter stats for 'perf bench internals pmu-scan -i1000' (5 runs): 3170.86 msec task-clock:u # 0.992 CPUs utilized ( +- 0.22% ) 0 context-switches:u # 0.000 /sec 0 cpu-migrations:u # 0.000 /sec 1010 page-faults:u # 318.526 /sec ( +- 0.04% ) 11890047674 cycles:u # 3.750 GHz ( +- 0.14% ) (83.27%) 119090499 stalled-cycles-frontend:u # 1.00% frontend cycles idle ( +- 0.46% ) (83.40%) 32502449 stalled-cycles-backend:u # 0.27% backend cycles idle ( +- 8.32% ) (83.30%) 33119141261 instructions:u # 2.79 insn per cycle # 0.00 stalled cycles per insn ( +- 0.01% ) (83.37%) 6812816561 branches:u # 2.149 G/sec ( +- 0.01% ) (83.29%) 70157855 branch-misses:u # 1.03% of all branches ( +- 0.28% ) (83.38%) 3.19710 +- 0.00826 seconds time elapsed ( +- 0.26% ) $ Signed-off-by: Ian Rogers <irogers@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Gaosheng Cui <cuigaosheng1@huawei.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jing Zhang <renyu.zj@linux.alibaba.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.g.garry@oracle.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Rob Herring <robh@kernel.org> Link: https://lore.kernel.org/r/20230824041330.266337-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
9e1f16939b
commit
5040264121
@ -171,7 +171,7 @@ static int test__pmu(struct test_suite *test __maybe_unused, int subtest __maybe
|
||||
}
|
||||
|
||||
pmu->name = strdup("perf-pmu-test");
|
||||
ret = perf_pmu__format_parse(pmu, fd);
|
||||
ret = perf_pmu__format_parse(pmu, fd, /*eager_load=*/true);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
@ -40,6 +40,10 @@ struct perf_pmu perf_pmu__fake;
|
||||
* value=PERF_PMU_FORMAT_VALUE_CONFIG and bits 0 to 7 will be set.
|
||||
*/
|
||||
struct perf_pmu_format {
|
||||
/** @list: Element on list within struct perf_pmu. */
|
||||
struct list_head list;
|
||||
/** @bits: Which config bits are set by this format value. */
|
||||
DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
|
||||
/** @name: The modifier/file name. */
|
||||
char *name;
|
||||
/**
|
||||
@ -47,18 +51,79 @@ struct perf_pmu_format {
|
||||
* are from PERF_PMU_FORMAT_VALUE_CONFIG to
|
||||
* PERF_PMU_FORMAT_VALUE_CONFIG_END.
|
||||
*/
|
||||
int value;
|
||||
/** @bits: Which config bits are set by this format value. */
|
||||
DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
|
||||
/** @list: Element on list within struct perf_pmu. */
|
||||
struct list_head list;
|
||||
u16 value;
|
||||
/** @loaded: Has the contents been loaded/parsed. */
|
||||
bool loaded;
|
||||
};
|
||||
|
||||
static struct perf_pmu_format *perf_pmu__new_format(struct list_head *list, char *name)
|
||||
{
|
||||
struct perf_pmu_format *format;
|
||||
|
||||
format = zalloc(sizeof(*format));
|
||||
if (!format)
|
||||
return NULL;
|
||||
|
||||
format->name = strdup(name);
|
||||
if (!format->name) {
|
||||
free(format);
|
||||
return NULL;
|
||||
}
|
||||
list_add_tail(&format->list, list);
|
||||
return format;
|
||||
}
|
||||
|
||||
/* Called at the end of parsing a format. */
|
||||
void perf_pmu_format__set_value(void *vformat, int config, unsigned long *bits)
|
||||
{
|
||||
struct perf_pmu_format *format = vformat;
|
||||
|
||||
format->value = config;
|
||||
memcpy(format->bits, bits, sizeof(format->bits));
|
||||
}
|
||||
|
||||
static void __perf_pmu_format__load(struct perf_pmu_format *format, FILE *file)
|
||||
{
|
||||
void *scanner;
|
||||
int ret;
|
||||
|
||||
ret = perf_pmu_lex_init(&scanner);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
perf_pmu_set_in(file, scanner);
|
||||
ret = perf_pmu_parse(format, scanner);
|
||||
perf_pmu_lex_destroy(scanner);
|
||||
format->loaded = true;
|
||||
}
|
||||
|
||||
static void perf_pmu_format__load(struct perf_pmu *pmu, struct perf_pmu_format *format)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
FILE *file = NULL;
|
||||
|
||||
if (format->loaded)
|
||||
return;
|
||||
|
||||
if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, "format"))
|
||||
return;
|
||||
|
||||
assert(strlen(path) + strlen(format->name) + 2 < sizeof(path));
|
||||
strcat(path, "/");
|
||||
strcat(path, format->name);
|
||||
|
||||
file = fopen(path, "r");
|
||||
if (!file)
|
||||
return;
|
||||
__perf_pmu_format__load(format, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse & process all the sysfs attributes located under
|
||||
* the directory specified in 'dir' parameter.
|
||||
*/
|
||||
int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd)
|
||||
int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load)
|
||||
{
|
||||
struct dirent *evt_ent;
|
||||
DIR *format_dir;
|
||||
@ -68,37 +133,35 @@ int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd)
|
||||
if (!format_dir)
|
||||
return -EINVAL;
|
||||
|
||||
while (!ret && (evt_ent = readdir(format_dir))) {
|
||||
while ((evt_ent = readdir(format_dir)) != NULL) {
|
||||
struct perf_pmu_format *format;
|
||||
char *name = evt_ent->d_name;
|
||||
int fd;
|
||||
void *scanner;
|
||||
FILE *file;
|
||||
|
||||
if (!strcmp(name, ".") || !strcmp(name, ".."))
|
||||
continue;
|
||||
|
||||
|
||||
ret = -EINVAL;
|
||||
fd = openat(dirfd, name, O_RDONLY);
|
||||
if (fd < 0)
|
||||
break;
|
||||
|
||||
file = fdopen(fd, "r");
|
||||
if (!file) {
|
||||
close(fd);
|
||||
format = perf_pmu__new_format(&pmu->format, name);
|
||||
if (!format) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = perf_pmu_lex_init(&scanner);
|
||||
if (ret) {
|
||||
if (eager_load) {
|
||||
FILE *file;
|
||||
int fd = openat(dirfd, name, O_RDONLY);
|
||||
|
||||
if (fd < 0) {
|
||||
ret = -errno;
|
||||
break;
|
||||
}
|
||||
file = fdopen(fd, "r");
|
||||
if (!file) {
|
||||
close(fd);
|
||||
break;
|
||||
}
|
||||
__perf_pmu_format__load(format, file);
|
||||
fclose(file);
|
||||
break;
|
||||
}
|
||||
|
||||
perf_pmu_set_in(file, scanner);
|
||||
ret = perf_pmu_parse(&pmu->format, name, scanner);
|
||||
perf_pmu_lex_destroy(scanner);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
closedir(format_dir);
|
||||
@ -119,7 +182,7 @@ static int pmu_format(struct perf_pmu *pmu, int dirfd, const char *name)
|
||||
return 0;
|
||||
|
||||
/* it'll close the fd */
|
||||
if (perf_pmu__format_parse(pmu, fd))
|
||||
if (perf_pmu__format_parse(pmu, fd, /*eager_load=*/false))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
@ -962,13 +1025,15 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
|
||||
if (pmu == &perf_pmu__fake)
|
||||
return;
|
||||
|
||||
list_for_each_entry(format, &pmu->format, list)
|
||||
list_for_each_entry(format, &pmu->format, list) {
|
||||
perf_pmu_format__load(pmu, format);
|
||||
if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) {
|
||||
pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'"
|
||||
"which is not supported by this version of perf!\n",
|
||||
pmu->name, format->name, format->value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool evsel__is_aux_event(const struct evsel *evsel)
|
||||
@ -1041,6 +1106,7 @@ int perf_pmu__format_type(struct perf_pmu *pmu, const char *name)
|
||||
if (!format)
|
||||
return -1;
|
||||
|
||||
perf_pmu_format__load(pmu, format);
|
||||
return format->value;
|
||||
}
|
||||
|
||||
@ -1177,7 +1243,7 @@ static int pmu_config_term(struct perf_pmu *pmu,
|
||||
free(pmu_term);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
perf_pmu_format__load(pmu, format);
|
||||
switch (format->value) {
|
||||
case PERF_PMU_FORMAT_VALUE_CONFIG:
|
||||
vp = &attr->config;
|
||||
@ -1403,24 +1469,6 @@ struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, const char *ev
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int perf_pmu__new_format(struct list_head *list, char *name,
|
||||
int config, unsigned long *bits)
|
||||
{
|
||||
struct perf_pmu_format *format;
|
||||
|
||||
format = zalloc(sizeof(*format));
|
||||
if (!format)
|
||||
return -ENOMEM;
|
||||
|
||||
format->name = strdup(name);
|
||||
format->value = config;
|
||||
memcpy(format->bits, bits, sizeof(format->bits));
|
||||
|
||||
list_add_tail(&format->list, list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_pmu__del_formats(struct list_head *formats)
|
||||
{
|
||||
struct perf_pmu_format *fmt, *tmp;
|
||||
|
@ -227,9 +227,8 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
|
||||
struct perf_pmu_info *info);
|
||||
struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, const char *event);
|
||||
|
||||
int perf_pmu__new_format(struct list_head *list, char *name,
|
||||
int config, unsigned long *bits);
|
||||
int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd);
|
||||
int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load);
|
||||
void perf_pmu_format__set_value(void *format, int config, unsigned long *bits);
|
||||
bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name);
|
||||
|
||||
bool is_pmu_core(const char *name);
|
||||
|
@ -1,6 +1,5 @@
|
||||
%define api.pure full
|
||||
%parse-param {struct list_head *format}
|
||||
%parse-param {char *name}
|
||||
%parse-param {void *format}
|
||||
%parse-param {void *scanner}
|
||||
%lex-param {void* scanner}
|
||||
|
||||
@ -21,7 +20,7 @@ do { \
|
||||
YYABORT; \
|
||||
} while (0)
|
||||
|
||||
static void perf_pmu_error(struct list_head *list, char *name, void *scanner, char const *msg);
|
||||
static void perf_pmu_error(void *format, void *scanner, const char *msg);
|
||||
|
||||
static void perf_pmu__set_format(unsigned long *bits, long from, long to)
|
||||
{
|
||||
@ -59,16 +58,12 @@ format_term
|
||||
format_term:
|
||||
PP_CONFIG ':' bits
|
||||
{
|
||||
ABORT_ON(perf_pmu__new_format(format, name,
|
||||
PERF_PMU_FORMAT_VALUE_CONFIG,
|
||||
$3));
|
||||
perf_pmu_format__set_value(format, PERF_PMU_FORMAT_VALUE_CONFIG, $3);
|
||||
}
|
||||
|
|
||||
PP_CONFIG PP_VALUE ':' bits
|
||||
{
|
||||
ABORT_ON(perf_pmu__new_format(format, name,
|
||||
$2,
|
||||
$4));
|
||||
perf_pmu_format__set_value(format, $2, $4);
|
||||
}
|
||||
|
||||
bits:
|
||||
@ -95,9 +90,8 @@ PP_VALUE
|
||||
|
||||
%%
|
||||
|
||||
static void perf_pmu_error(struct list_head *list __maybe_unused,
|
||||
char *name __maybe_unused,
|
||||
void *scanner __maybe_unused,
|
||||
char const *msg __maybe_unused)
|
||||
static void perf_pmu_error(void *format __maybe_unused,
|
||||
void *scanner __maybe_unused,
|
||||
const char *msg __maybe_unused)
|
||||
{
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user