perf tools fixes for v6.0: 5th batch
- Fail the 'perf test record' entry on error, fixing a regression where just setup stuff like allocating memory and not the actual things being tested failed. - Fixup disabling of -Wdeprecated-declarations for the python scripting engine, the previous attempt had a brown paper bag thinko. - Fix branch stack sampling test to include sanity check for branch filter on PowerPC. - Update is_ignored_symbol function to match the kernel ignored list, fixing running the 'perf test' entry that compares resolving symbols from kallsyms to resolving from vmlinux. - Augment the data source type with ARM's neoverse_spe list, the previous code was limited in its search resolving the data source. - Fix some clang 5 variable set but unused cases. - Get a perf cgroup more portably in BPF as the __builtin_preserve_enum_value builtin is not available in older versions of clang. In those cases we can forgo BPF's CO-RE (Compile Once, Run Everywhere). - More Fixes for Intel's hybrid CPU model. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYzY3aAAKCRCyPKLppCJ+ J+3XAQDGalZmlY6Y0aUrCKj6+utDy7bUy+xamDaD+6gjJgkVNAD/R9YrI7roaGb3 rx3gRJu46CD1abzaI1rMZo35DSYICgE= =n5Gs -----END PGP SIGNATURE----- Merge tag 'perf-tools-fixes-for-v6.0-2022-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux Pull perf tools fixes from Arnaldo Carvalho de Melo: - Fail the 'perf test record' entry on error, fixing a regression where just setup stuff like allocating memory and not the actual things being tested failed. - Fixup disabling of -Wdeprecated-declarations for the python scripting engine, the previous attempt had a brown paper bag thinko. - Fix branch stack sampling test to include sanity check for branch filter on PowerPC. - Update is_ignored_symbol function to match the kernel ignored list, fixing running the 'perf test' entry that compares resolving symbols from kallsyms to resolving from vmlinux. - Augment the data source type with ARM's neoverse_spe list, the previous code was limited in its search resolving the data source. - Fix some clang 5 variable set but unused cases. - Get a perf cgroup more portably in BPF as the __builtin_preserve_enum_value builtin is not available in older versions of clang. In those cases we can forgo BPF's CO-RE (Compile Once, Run Everywhere). - More Fixes for Intel's hybrid CPU model. * tag 'perf-tools-fixes-for-v6.0-2022-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: perf build: Fixup disabling of -Wdeprecated-declarations for the python scripting engine perf tests mmap-basic: Remove unused variable to address clang 15 warning perf parse-events: Ignore clang 15 warning about variable set but unused in bison produced code perf tests record: Fail the test if the 'errs' counter is not zero perf test: Fix test case 87 ("perf record tests") for hybrid systems perf arm-spe: augment the data source type with neoverse_spe list perf tests vmlinux-kallsyms: Update is_ignored_symbol function to match the kernel ignored list perf tests powerpc: Fix branch stack sampling test to include sanity check for branch filter perf parse-events: Remove "not supported" hybrid cache events perf print-events: Fix "perf list" can not display the PMU prefix for some hybrid cache events perf tools: Get a perf cgroup more portably in BPF
This commit is contained in:
commit
c816f2e981
@ -114,8 +114,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
|
||||
|
||||
for (i = 0; i < nsyscalls; ++i)
|
||||
for (j = 0; j < expected_nr_events[i]; ++j) {
|
||||
int foo = syscalls[i]();
|
||||
++foo;
|
||||
syscalls[i]();
|
||||
}
|
||||
|
||||
md = &evlist->mmap[0];
|
||||
|
@ -332,7 +332,7 @@ out_delete_evlist:
|
||||
out:
|
||||
if (err == -EACCES)
|
||||
return TEST_SKIP;
|
||||
if (err < 0)
|
||||
if (err < 0 || errs != 0)
|
||||
return TEST_FAIL;
|
||||
return TEST_OK;
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ test_register_capture() {
|
||||
echo "Register capture test [Skipped missing registers]"
|
||||
return
|
||||
fi
|
||||
if ! perf record -o - --intr-regs=di,r8,dx,cx -e cpu/br_inst_retired.near_call/p \
|
||||
if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \
|
||||
-c 1000 --per-thread true 2> /dev/null \
|
||||
| perf script -F ip,sym,iregs -i - 2> /dev/null \
|
||||
| egrep -q "DI:"
|
||||
|
@ -12,7 +12,8 @@ if ! [ -x "$(command -v cc)" ]; then
|
||||
fi
|
||||
|
||||
# skip the test if the hardware doesn't support branch stack sampling
|
||||
perf record -b -o- -B true > /dev/null 2>&1 || exit 2
|
||||
# and if the architecture doesn't support filter types: any,save_type,u
|
||||
perf record -b -o- -B --branch-filter any,save_type,u true > /dev/null 2>&1 || exit 2
|
||||
|
||||
TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX)
|
||||
|
||||
|
@ -43,10 +43,11 @@ static bool is_ignored_symbol(const char *name, char type)
|
||||
/* Symbol names that begin with the following are ignored.*/
|
||||
static const char * const ignored_prefixes[] = {
|
||||
"$", /* local symbols for ARM, MIPS, etc. */
|
||||
".LASANPC", /* s390 kasan local symbols */
|
||||
".L", /* local labels, .LBB,.Ltmpxxx,.L__unnamed_xx,.LASANPC, etc. */
|
||||
"__crc_", /* modversions */
|
||||
"__efistub_", /* arm64 EFI stub namespace */
|
||||
"__kvm_nvhe_", /* arm64 non-VHE KVM namespace */
|
||||
"__kvm_nvhe_$", /* arm64 local symbols in non-VHE KVM namespace */
|
||||
"__kvm_nvhe_.L", /* arm64 local symbols in non-VHE KVM namespace */
|
||||
"__AArch64ADRPThunk_", /* arm64 lld */
|
||||
"__ARMV5PILongThunk_", /* arm lld */
|
||||
"__ARMV7PILongThunk_",
|
||||
|
@ -269,7 +269,7 @@ CFLAGS_expr-flex.o += $(flex_flags)
|
||||
bison_flags := -DYYENABLE_NLS=0
|
||||
BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35)
|
||||
ifeq ($(BISON_GE_35),1)
|
||||
bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum
|
||||
bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum -Wno-unused-but-set-variable -Wno-unknown-warning-option
|
||||
else
|
||||
bison_flags += -w
|
||||
endif
|
||||
|
@ -498,7 +498,7 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
|
||||
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
|
||||
{
|
||||
union perf_mem_data_src data_src = { 0 };
|
||||
bool is_neoverse = is_midr_in_range(midr, neoverse_spe);
|
||||
bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
|
||||
|
||||
if (record->op == ARM_SPE_LD)
|
||||
data_src.mem_op = PERF_MEM_OP_LOAD;
|
||||
|
@ -48,6 +48,7 @@ const volatile __u32 num_cpus = 1;
|
||||
|
||||
int enabled = 0;
|
||||
int use_cgroup_v2 = 0;
|
||||
int perf_subsys_id = -1;
|
||||
|
||||
static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
|
||||
{
|
||||
@ -58,7 +59,15 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
|
||||
int level;
|
||||
int cnt;
|
||||
|
||||
cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_event_cgrp_id], cgroup);
|
||||
if (perf_subsys_id == -1) {
|
||||
#if __has_builtin(__builtin_preserve_enum_value)
|
||||
perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
|
||||
perf_event_cgrp_id);
|
||||
#else
|
||||
perf_subsys_id = perf_event_cgrp_id;
|
||||
#endif
|
||||
}
|
||||
cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup);
|
||||
level = BPF_CORE_READ(cgrp, level);
|
||||
|
||||
for (cnt = 0; i < MAX_LEVELS; i++) {
|
||||
|
@ -94,6 +94,8 @@ const volatile bool has_prev_state = false;
|
||||
const volatile bool needs_cgroup = false;
|
||||
const volatile bool uses_cgroup_v1 = false;
|
||||
|
||||
int perf_subsys_id = -1;
|
||||
|
||||
/*
|
||||
* Old kernel used to call it task_struct->state and now it's '__state'.
|
||||
* Use BPF CO-RE "ignored suffix rule" to deal with it like below:
|
||||
@ -119,11 +121,19 @@ static inline __u64 get_cgroup_id(struct task_struct *t)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
|
||||
if (uses_cgroup_v1)
|
||||
cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup);
|
||||
else
|
||||
cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp);
|
||||
if (!uses_cgroup_v1)
|
||||
return BPF_CORE_READ(t, cgroups, dfl_cgrp, kn, id);
|
||||
|
||||
if (perf_subsys_id == -1) {
|
||||
#if __has_builtin(__builtin_preserve_enum_value)
|
||||
perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
|
||||
perf_event_cgrp_id);
|
||||
#else
|
||||
perf_subsys_id = perf_event_cgrp_id;
|
||||
#endif
|
||||
}
|
||||
|
||||
cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_subsys_id], cgroup);
|
||||
return BPF_CORE_READ(cgrp, kn, id);
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,8 @@ static void config_hybrid_attr(struct perf_event_attr *attr,
|
||||
* If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
|
||||
*/
|
||||
attr->type = type;
|
||||
attr->config = attr->config | ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT);
|
||||
attr->config = (attr->config & PERF_HW_EVENT_MASK) |
|
||||
((__u64)pmu_type << PERF_PMU_TYPE_SHIFT);
|
||||
}
|
||||
|
||||
static int create_event_hybrid(__u32 config_type, int *idx,
|
||||
@ -48,13 +49,25 @@ static int create_event_hybrid(__u32 config_type, int *idx,
|
||||
__u64 config = attr->config;
|
||||
|
||||
config_hybrid_attr(attr, config_type, pmu->type);
|
||||
|
||||
/*
|
||||
* Some hybrid hardware cache events are only available on one CPU
|
||||
* PMU. For example, the 'L1-dcache-load-misses' is only available
|
||||
* on cpu_core, while the 'L1-icache-loads' is only available on
|
||||
* cpu_atom. We need to remove "not supported" hybrid cache events.
|
||||
*/
|
||||
if (attr->type == PERF_TYPE_HW_CACHE
|
||||
&& !is_event_supported(attr->type, attr->config))
|
||||
return 0;
|
||||
|
||||
evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id,
|
||||
pmu, config_terms);
|
||||
if (evsel)
|
||||
if (evsel) {
|
||||
evsel->pmu_name = strdup(pmu->name);
|
||||
else
|
||||
if (!evsel->pmu_name)
|
||||
return -ENOMEM;
|
||||
} else
|
||||
return -ENOMEM;
|
||||
|
||||
attr->type = type;
|
||||
attr->config = config;
|
||||
return 0;
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "util/parse-events-hybrid.h"
|
||||
#include "util/pmu-hybrid.h"
|
||||
#include "tracepoint.h"
|
||||
#include "thread_map.h"
|
||||
|
||||
#define MAX_NAME_LEN 100
|
||||
|
||||
@ -157,6 +158,44 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
|
||||
#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
|
||||
#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
|
||||
|
||||
bool is_event_supported(u8 type, u64 config)
|
||||
{
|
||||
bool ret = true;
|
||||
int open_return;
|
||||
struct evsel *evsel;
|
||||
struct perf_event_attr attr = {
|
||||
.type = type,
|
||||
.config = config,
|
||||
.disabled = 1,
|
||||
};
|
||||
struct perf_thread_map *tmap = thread_map__new_by_tid(0);
|
||||
|
||||
if (tmap == NULL)
|
||||
return false;
|
||||
|
||||
evsel = evsel__new(&attr);
|
||||
if (evsel) {
|
||||
open_return = evsel__open(evsel, NULL, tmap);
|
||||
ret = open_return >= 0;
|
||||
|
||||
if (open_return == -EACCES) {
|
||||
/*
|
||||
* This happens if the paranoid value
|
||||
* /proc/sys/kernel/perf_event_paranoid is set to 2
|
||||
* Re-run with exclude_kernel set; we don't do that
|
||||
* by default as some ARM machines do not support it.
|
||||
*
|
||||
*/
|
||||
evsel->core.attr.exclude_kernel = 1;
|
||||
ret = evsel__open(evsel, NULL, tmap) >= 0;
|
||||
}
|
||||
evsel__delete(evsel);
|
||||
}
|
||||
|
||||
perf_thread_map__put(tmap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const char *event_type(int type)
|
||||
{
|
||||
switch (type) {
|
||||
|
@ -19,6 +19,7 @@ struct option;
|
||||
struct perf_pmu;
|
||||
|
||||
bool have_tracepoints(struct list_head *evlist);
|
||||
bool is_event_supported(u8 type, u64 config);
|
||||
|
||||
const char *event_type(int type);
|
||||
|
||||
|
@ -22,7 +22,6 @@
|
||||
#include "probe-file.h"
|
||||
#include "string2.h"
|
||||
#include "strlist.h"
|
||||
#include "thread_map.h"
|
||||
#include "tracepoint.h"
|
||||
#include "pfm.h"
|
||||
#include "pmu-hybrid.h"
|
||||
@ -239,44 +238,6 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob,
|
||||
strlist__delete(sdtlist);
|
||||
}
|
||||
|
||||
static bool is_event_supported(u8 type, unsigned int config)
|
||||
{
|
||||
bool ret = true;
|
||||
int open_return;
|
||||
struct evsel *evsel;
|
||||
struct perf_event_attr attr = {
|
||||
.type = type,
|
||||
.config = config,
|
||||
.disabled = 1,
|
||||
};
|
||||
struct perf_thread_map *tmap = thread_map__new_by_tid(0);
|
||||
|
||||
if (tmap == NULL)
|
||||
return false;
|
||||
|
||||
evsel = evsel__new(&attr);
|
||||
if (evsel) {
|
||||
open_return = evsel__open(evsel, NULL, tmap);
|
||||
ret = open_return >= 0;
|
||||
|
||||
if (open_return == -EACCES) {
|
||||
/*
|
||||
* This happens if the paranoid value
|
||||
* /proc/sys/kernel/perf_event_paranoid is set to 2
|
||||
* Re-run with exclude_kernel set; we don't do that
|
||||
* by default as some ARM machines do not support it.
|
||||
*
|
||||
*/
|
||||
evsel->core.attr.exclude_kernel = 1;
|
||||
ret = evsel__open(evsel, NULL, tmap) >= 0;
|
||||
}
|
||||
evsel__delete(evsel);
|
||||
}
|
||||
|
||||
perf_thread_map__put(tmap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int print_hwcache_events(const char *event_glob, bool name_only)
|
||||
{
|
||||
unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0;
|
||||
|
@ -3,4 +3,4 @@ perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
|
||||
|
||||
CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum
|
||||
|
||||
CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-error=deprecated-declarations
|
||||
CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations
|
||||
|
Loading…
Reference in New Issue
Block a user