Performance events changes for v6.7 are:
- Add AMD Unified Memory Controller (UMC) events introduced with Zen 4 - Simplify & clean up the uncore management code - Fall back from RDPMC to RDMSR on certain uncore PMUs - Improve per-package and cstate event reading - Extend the Intel ref-cycles event to GP counters - Fix Intel MTL event constraints - Improve the Intel hybrid CPU handling code - Micro-optimize the RAPL code - Optimize perf_cgroup_switch() - Improve large AUX area error handling - Misc fixes and cleanups Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmU89YsRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1iQqQ/9EF9mG4te5By4qN+B7jADCmE71xG5ViKz sp4Thl86SHxhwFuiHn8dMUixrp+qbcemi5yTbQ9TF8cKl4s3Ju2CihU8jaauUp0a iS5W0IliMqLD1pxQoXAPLuPVInVYgrNOCbR4l6l7D6ervh5Z6PVEf7SVeAP3L5wo QV/V3NKkrYeNQL+FoKhCH8Vhxw0HxUmKJO7UhW6yuCt7BAok9Es18h3OVnn+7es4 BB7VI/JvdmXf2ioKhTPnDXJjC+vh5vnwiBoTcdQ2W9ADhWUvfL4ozxOXT6z7oC3A nwBOdXf8w8Rqnqqd8hduop1QUrusMxlEVgOMCk27qHx97uWgPceZWdoxDXGHBiRK fqJAwXERf9wp5/M57NDlPwyf/43Hocdx2CdLkQBpfD78/k/sB5hW0KxnzY0FUI9x jBRQyWD05IDJATBaMHz+VbrexS+Itvjp2QvSiSm9zislYD4zA9fQ3lAgFhEpcUbA ZA/nN4t+CbiGEAsJEuBPlvSC1ahUwVP/0nz3PFlVWFDqAx0mXgVNKBe083A9yh7I dVisVY6KPAVDzyOc1LqzU8WFXNFnIkIIaLrb6fRHJVEM8MDfpLPS/a+7AHdRcDP4 yq6fjVVjyP7e9lSQLYBUP3/3uiVnWQj92l6V6CrcgDMX5rDOb0VN+BQrmhPR6fWY WEim6WZrZj4= =OLsv -----END PGP SIGNATURE----- Merge tag 'perf-core-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull performance event updates from Ingo Molnar: - Add AMD Unified Memory Controller (UMC) events introduced with Zen 4 - Simplify & clean up the uncore management code - Fall back from RDPMC to RDMSR on certain uncore PMUs - Improve per-package and cstate event reading - Extend the Intel ref-cycles event to GP counters - Fix Intel MTL event constraints - Improve the Intel hybrid CPU handling code - Micro-optimize the RAPL code - Optimize perf_cgroup_switch() - Improve large AUX area error handling - Misc fixes and cleanups * tag 'perf-core-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits) perf/x86/amd/uncore: Pass through error code for initialization failures, instead of -ENODEV perf/x86/amd/uncore: Fix uninitialized return value in amd_uncore_init() x86/cpu: Fix the AMD Fam 17h, Fam 19h, Zen2 and Zen4 MSR enumerations perf: Optimize perf_cgroup_switch() perf/x86/amd/uncore: Add memory controller support perf/x86/amd/uncore: Add group exclusivity perf/x86/amd/uncore: Use rdmsr if rdpmc is unavailable perf/x86/amd/uncore: Move discovery and registration perf/x86/amd/uncore: Refactor uncore management perf/core: Allow reading package events from perf_event_read_local perf/x86/cstate: Allow reading the package statistics from local CPU perf/x86/intel/pt: Fix kernel-doc comments perf/x86/rapl: Annotate 'struct rapl_pmus' with __counted_by perf/core: Rename perf_proc_update_handler() -> perf_event_max_sample_rate_handler(), for readability perf/x86/rapl: Fix "Using plain integer as NULL pointer" Sparse warning perf/x86/rapl: Use local64_try_cmpxchg in rapl_event_update() perf/x86/rapl: Stop doing cpu_relax() in the local64_cmpxchg() loop in rapl_event_update() perf/core: Bail out early if the request AUX area is out of bound perf/x86/intel: Extend the ref-cycles event to GP counters perf/x86/intel: Fix broken fixed event constraints extension ...
This commit is contained in:
commit
bceb7accb7
File diff suppressed because it is too large
Load Diff
@ -1887,9 +1887,9 @@ ssize_t events_hybrid_sysfs_show(struct device *dev,
|
||||
|
||||
str = pmu_attr->event_str;
|
||||
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
|
||||
if (!(x86_pmu.hybrid_pmu[i].cpu_type & pmu_attr->pmu_type))
|
||||
if (!(x86_pmu.hybrid_pmu[i].pmu_type & pmu_attr->pmu_type))
|
||||
continue;
|
||||
if (x86_pmu.hybrid_pmu[i].cpu_type & pmu->cpu_type) {
|
||||
if (x86_pmu.hybrid_pmu[i].pmu_type & pmu->pmu_type) {
|
||||
next_str = strchr(str, ';');
|
||||
if (next_str)
|
||||
return snprintf(page, next_str - str + 1, "%s", str);
|
||||
@ -2169,7 +2169,7 @@ static int __init init_hw_perf_events(void)
|
||||
hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE;
|
||||
|
||||
err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name,
|
||||
(hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
|
||||
(hybrid_pmu->pmu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
@ -211,6 +211,14 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct event_constraint intel_grt_event_constraints[] __read_mostly = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct event_constraint intel_skl_event_constraints[] = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
@ -299,7 +307,7 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
|
||||
static struct extra_reg intel_glc_extra_regs[] __read_mostly = {
|
||||
INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
@ -309,11 +317,12 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
static struct event_constraint intel_spr_event_constraints[] = {
|
||||
static struct event_constraint intel_glc_event_constraints[] = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
|
||||
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
|
||||
@ -349,7 +358,7 @@ static struct event_constraint intel_spr_event_constraints[] = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct extra_reg intel_gnr_extra_regs[] __read_mostly = {
|
||||
static struct extra_reg intel_rwc_extra_regs[] __read_mostly = {
|
||||
INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
@ -473,7 +482,7 @@ static u64 intel_pmu_event_map(int hw_event)
|
||||
return intel_perfmon_event_map[hw_event];
|
||||
}
|
||||
|
||||
static __initconst const u64 spr_hw_cache_event_ids
|
||||
static __initconst const u64 glc_hw_cache_event_ids
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
@ -552,7 +561,7 @@ static __initconst const u64 spr_hw_cache_event_ids
|
||||
},
|
||||
};
|
||||
|
||||
static __initconst const u64 spr_hw_cache_extra_regs
|
||||
static __initconst const u64 glc_hw_cache_extra_regs
|
||||
[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
|
||||
@ -2556,16 +2565,6 @@ static int icl_set_topdown_event_period(struct perf_event *event)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int adl_set_topdown_event_period(struct perf_event *event)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->cpu_type != hybrid_big)
|
||||
return 0;
|
||||
|
||||
return icl_set_topdown_event_period(event);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period);
|
||||
|
||||
static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
|
||||
@ -2708,16 +2707,6 @@ static u64 icl_update_topdown_event(struct perf_event *event)
|
||||
x86_pmu.num_topdown_events - 1);
|
||||
}
|
||||
|
||||
static u64 adl_update_topdown_event(struct perf_event *event)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->cpu_type != hybrid_big)
|
||||
return 0;
|
||||
|
||||
return icl_update_topdown_event(event);
|
||||
}
|
||||
|
||||
DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
|
||||
|
||||
static void intel_pmu_read_topdown_event(struct perf_event *event)
|
||||
@ -3869,7 +3858,7 @@ static inline bool require_mem_loads_aux_event(struct perf_event *event)
|
||||
return false;
|
||||
|
||||
if (is_hybrid())
|
||||
return hybrid_pmu(event->pmu)->cpu_type == hybrid_big;
|
||||
return hybrid_pmu(event->pmu)->pmu_type == hybrid_big;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -4273,7 +4262,7 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
glc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
@ -4361,9 +4350,9 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->cpu_type == hybrid_big)
|
||||
return spr_get_event_constraints(cpuc, idx, event);
|
||||
else if (pmu->cpu_type == hybrid_small)
|
||||
if (pmu->pmu_type == hybrid_big)
|
||||
return glc_get_event_constraints(cpuc, idx, event);
|
||||
else if (pmu->pmu_type == hybrid_small)
|
||||
return tnt_get_event_constraints(cpuc, idx, event);
|
||||
|
||||
WARN_ON(1);
|
||||
@ -4409,7 +4398,7 @@ rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
{
|
||||
struct event_constraint *c;
|
||||
|
||||
c = spr_get_event_constraints(cpuc, idx, event);
|
||||
c = glc_get_event_constraints(cpuc, idx, event);
|
||||
|
||||
/* The Retire Latency is not supported by the fixed counter 0. */
|
||||
if (event->attr.precise_ip &&
|
||||
@ -4433,9 +4422,9 @@ mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->cpu_type == hybrid_big)
|
||||
if (pmu->pmu_type == hybrid_big)
|
||||
return rwc_get_event_constraints(cpuc, idx, event);
|
||||
if (pmu->cpu_type == hybrid_small)
|
||||
if (pmu->pmu_type == hybrid_small)
|
||||
return cmt_get_event_constraints(cpuc, idx, event);
|
||||
|
||||
WARN_ON(1);
|
||||
@ -4446,18 +4435,18 @@ static int adl_hw_config(struct perf_event *event)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->cpu_type == hybrid_big)
|
||||
if (pmu->pmu_type == hybrid_big)
|
||||
return hsw_hw_config(event);
|
||||
else if (pmu->cpu_type == hybrid_small)
|
||||
else if (pmu->pmu_type == hybrid_small)
|
||||
return intel_pmu_hw_config(event);
|
||||
|
||||
WARN_ON(1);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static u8 adl_get_hybrid_cpu_type(void)
|
||||
static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
|
||||
{
|
||||
return hybrid_big;
|
||||
return HYBRID_INTEL_CORE;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4490,7 +4479,7 @@ static void nhm_limit_period(struct perf_event *event, s64 *left)
|
||||
*left = max(*left, 32LL);
|
||||
}
|
||||
|
||||
static void spr_limit_period(struct perf_event *event, s64 *left)
|
||||
static void glc_limit_period(struct perf_event *event, s64 *left)
|
||||
{
|
||||
if (event->attr.precise_ip == 3)
|
||||
*left = max(*left, 128LL);
|
||||
@ -4618,6 +4607,23 @@ static void intel_pmu_check_num_counters(int *num_counters,
|
||||
int *num_counters_fixed,
|
||||
u64 *intel_ctrl, u64 fixed_mask);
|
||||
|
||||
static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
|
||||
int num_counters,
|
||||
int num_counters_fixed,
|
||||
u64 intel_ctrl);
|
||||
|
||||
static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
|
||||
|
||||
static inline bool intel_pmu_broken_perf_cap(void)
|
||||
{
|
||||
/* The Perf Metric (Bit 15) is always cleared */
|
||||
if ((boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE) ||
|
||||
(boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE_L))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
|
||||
{
|
||||
unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF);
|
||||
@ -4628,27 +4634,83 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
|
||||
&eax, &ebx, &ecx, &edx);
|
||||
pmu->num_counters = fls(eax);
|
||||
pmu->num_counters_fixed = fls(ebx);
|
||||
intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
|
||||
&pmu->intel_ctrl, ebx);
|
||||
}
|
||||
|
||||
|
||||
if (!intel_pmu_broken_perf_cap()) {
|
||||
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
|
||||
rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
|
||||
{
|
||||
intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
|
||||
&pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
|
||||
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
|
||||
pmu->unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
|
||||
0, pmu->num_counters, 0, 0);
|
||||
|
||||
if (pmu->intel_cap.perf_metrics)
|
||||
pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
|
||||
else
|
||||
pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
|
||||
|
||||
if (pmu->intel_cap.pebs_output_pt_available)
|
||||
pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
|
||||
else
|
||||
pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT;
|
||||
|
||||
intel_pmu_check_event_constraints(pmu->event_constraints,
|
||||
pmu->num_counters,
|
||||
pmu->num_counters_fixed,
|
||||
pmu->intel_ctrl);
|
||||
|
||||
intel_pmu_check_extra_regs(pmu->extra_regs);
|
||||
}
|
||||
|
||||
static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
|
||||
{
|
||||
u8 cpu_type = get_this_hybrid_cpu_type();
|
||||
int i;
|
||||
|
||||
/*
|
||||
* This is running on a CPU model that is known to have hybrid
|
||||
* configurations. But the CPU told us it is not hybrid, shame
|
||||
* on it. There should be a fixup function provided for these
|
||||
* troublesome CPUs (->get_hybrid_cpu_type).
|
||||
*/
|
||||
if (cpu_type == HYBRID_INTEL_NONE) {
|
||||
if (x86_pmu.get_hybrid_cpu_type)
|
||||
cpu_type = x86_pmu.get_hybrid_cpu_type();
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This essentially just maps between the 'hybrid_cpu_type'
|
||||
* and 'hybrid_pmu_type' enums:
|
||||
*/
|
||||
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
|
||||
enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
|
||||
|
||||
if (cpu_type == HYBRID_INTEL_CORE &&
|
||||
pmu_type == hybrid_big)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
if (cpu_type == HYBRID_INTEL_ATOM &&
|
||||
pmu_type == hybrid_small)
|
||||
return &x86_pmu.hybrid_pmu[i];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool init_hybrid_pmu(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
||||
u8 cpu_type = get_this_hybrid_cpu_type();
|
||||
struct x86_hybrid_pmu *pmu = NULL;
|
||||
int i;
|
||||
struct x86_hybrid_pmu *pmu = find_hybrid_pmu_for_cpu();
|
||||
|
||||
if (!cpu_type && x86_pmu.get_hybrid_cpu_type)
|
||||
cpu_type = x86_pmu.get_hybrid_cpu_type();
|
||||
|
||||
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
|
||||
if (x86_pmu.hybrid_pmu[i].cpu_type == cpu_type) {
|
||||
pmu = &x86_pmu.hybrid_pmu[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) {
|
||||
cpuc->pmu = NULL;
|
||||
return false;
|
||||
@ -4661,6 +4723,8 @@ static bool init_hybrid_pmu(int cpu)
|
||||
if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
|
||||
update_pmu_cap(pmu);
|
||||
|
||||
intel_pmu_check_hybrid_pmus(pmu);
|
||||
|
||||
if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
|
||||
return false;
|
||||
|
||||
@ -5337,14 +5401,14 @@ static struct attribute *icl_tsx_events_attrs[] = {
|
||||
EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2");
|
||||
EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82");
|
||||
|
||||
static struct attribute *spr_events_attrs[] = {
|
||||
static struct attribute *glc_events_attrs[] = {
|
||||
EVENT_PTR(mem_ld_hsw),
|
||||
EVENT_PTR(mem_st_spr),
|
||||
EVENT_PTR(mem_ld_aux),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *spr_td_events_attrs[] = {
|
||||
static struct attribute *glc_td_events_attrs[] = {
|
||||
EVENT_PTR(slots),
|
||||
EVENT_PTR(td_retiring),
|
||||
EVENT_PTR(td_bad_spec),
|
||||
@ -5357,7 +5421,7 @@ static struct attribute *spr_td_events_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *spr_tsx_events_attrs[] = {
|
||||
static struct attribute *glc_tsx_events_attrs[] = {
|
||||
EVENT_PTR(tx_start),
|
||||
EVENT_PTR(tx_abort),
|
||||
EVENT_PTR(tx_commit),
|
||||
@ -5699,7 +5763,7 @@ static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr)
|
||||
struct perf_pmu_events_hybrid_attr *pmu_attr =
|
||||
container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr);
|
||||
|
||||
return pmu->cpu_type & pmu_attr->pmu_type;
|
||||
return pmu->pmu_type & pmu_attr->pmu_type;
|
||||
}
|
||||
|
||||
static umode_t hybrid_events_is_visible(struct kobject *kobj,
|
||||
@ -5736,7 +5800,7 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj,
|
||||
container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr);
|
||||
int cpu = hybrid_find_supported_cpu(pmu);
|
||||
|
||||
return (cpu >= 0) && (pmu->cpu_type & pmu_attr->pmu_type) ? attr->mode : 0;
|
||||
return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0;
|
||||
}
|
||||
|
||||
static struct attribute_group hybrid_group_events_td = {
|
||||
@ -5880,40 +5944,105 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs)
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
|
||||
static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
|
||||
{ hybrid_small, "cpu_atom" },
|
||||
{ hybrid_big, "cpu_core" },
|
||||
};
|
||||
|
||||
static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
|
||||
{
|
||||
unsigned long pmus_mask = pmus;
|
||||
struct x86_hybrid_pmu *pmu;
|
||||
int i;
|
||||
int idx = 0, bit;
|
||||
|
||||
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
|
||||
pmu = &x86_pmu.hybrid_pmu[i];
|
||||
x86_pmu.num_hybrid_pmus = hweight_long(pmus_mask);
|
||||
x86_pmu.hybrid_pmu = kcalloc(x86_pmu.num_hybrid_pmus,
|
||||
sizeof(struct x86_hybrid_pmu),
|
||||
GFP_KERNEL);
|
||||
if (!x86_pmu.hybrid_pmu)
|
||||
return -ENOMEM;
|
||||
|
||||
intel_pmu_check_num_counters(&pmu->num_counters,
|
||||
&pmu->num_counters_fixed,
|
||||
&pmu->intel_ctrl,
|
||||
fixed_mask);
|
||||
static_branch_enable(&perf_is_hybrid);
|
||||
x86_pmu.filter = intel_pmu_filter;
|
||||
|
||||
if (pmu->intel_cap.perf_metrics) {
|
||||
pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
|
||||
pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS;
|
||||
for_each_set_bit(bit, &pmus_mask, ARRAY_SIZE(intel_hybrid_pmu_type_map)) {
|
||||
pmu = &x86_pmu.hybrid_pmu[idx++];
|
||||
pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id;
|
||||
pmu->name = intel_hybrid_pmu_type_map[bit].name;
|
||||
|
||||
pmu->num_counters = x86_pmu.num_counters;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
|
||||
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
|
||||
pmu->unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
|
||||
0, pmu->num_counters, 0, 0);
|
||||
|
||||
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
|
||||
if (pmu->pmu_type & hybrid_small) {
|
||||
pmu->intel_cap.perf_metrics = 0;
|
||||
pmu->intel_cap.pebs_output_pt_available = 1;
|
||||
pmu->mid_ack = true;
|
||||
} else if (pmu->pmu_type & hybrid_big) {
|
||||
pmu->intel_cap.perf_metrics = 1;
|
||||
pmu->intel_cap.pebs_output_pt_available = 0;
|
||||
pmu->late_ack = true;
|
||||
}
|
||||
|
||||
if (pmu->intel_cap.pebs_output_pt_available)
|
||||
pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
|
||||
|
||||
intel_pmu_check_event_constraints(pmu->event_constraints,
|
||||
pmu->num_counters,
|
||||
pmu->num_counters_fixed,
|
||||
pmu->intel_ctrl);
|
||||
|
||||
intel_pmu_check_extra_regs(pmu->extra_regs);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __always_inline bool is_mtl(u8 x86_model)
|
||||
static __always_inline void intel_pmu_ref_cycles_ext(void)
|
||||
{
|
||||
return (x86_model == INTEL_FAM6_METEORLAKE) ||
|
||||
(x86_model == INTEL_FAM6_METEORLAKE_L);
|
||||
if (!(x86_pmu.events_maskl & (INTEL_PMC_MSK_FIXED_REF_CYCLES >> INTEL_PMC_IDX_FIXED)))
|
||||
intel_perfmon_event_map[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x013c;
|
||||
}
|
||||
|
||||
static __always_inline void intel_pmu_init_glc(struct pmu *pmu)
|
||||
{
|
||||
x86_pmu.late_ack = true;
|
||||
x86_pmu.limit_period = glc_limit_period;
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
|
||||
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.num_topdown_events = 8;
|
||||
static_call_update(intel_pmu_update_topdown_event,
|
||||
&icl_update_topdown_event);
|
||||
static_call_update(intel_pmu_set_topdown_event_period,
|
||||
&icl_set_topdown_event_period);
|
||||
|
||||
memcpy(hybrid_var(pmu, hw_cache_event_ids), glc_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
||||
memcpy(hybrid_var(pmu, hw_cache_extra_regs), glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
||||
hybrid(pmu, event_constraints) = intel_glc_event_constraints;
|
||||
hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints;
|
||||
|
||||
intel_pmu_ref_cycles_ext();
|
||||
}
|
||||
|
||||
static __always_inline void intel_pmu_init_grt(struct pmu *pmu)
|
||||
{
|
||||
x86_pmu.mid_ack = true;
|
||||
x86_pmu.limit_period = glc_limit_period;
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
|
||||
|
||||
memcpy(hybrid_var(pmu, hw_cache_event_ids), glp_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
||||
memcpy(hybrid_var(pmu, hw_cache_extra_regs), tnt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
||||
hybrid_var(pmu, hw_cache_event_ids)[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
|
||||
hybrid(pmu, event_constraints) = intel_grt_event_constraints;
|
||||
hybrid(pmu, pebs_constraints) = intel_grt_pebs_event_constraints;
|
||||
hybrid(pmu, extra_regs) = intel_grt_extra_regs;
|
||||
|
||||
intel_pmu_ref_cycles_ext();
|
||||
}
|
||||
|
||||
__init int intel_pmu_init(void)
|
||||
@ -6194,28 +6323,10 @@ __init int intel_pmu_init(void)
|
||||
break;
|
||||
|
||||
case INTEL_FAM6_ATOM_GRACEMONT:
|
||||
x86_pmu.mid_ack = true;
|
||||
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
|
||||
sizeof(hw_cache_extra_regs));
|
||||
hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
|
||||
|
||||
x86_pmu.event_constraints = intel_slm_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
|
||||
x86_pmu.extra_regs = intel_grt_extra_regs;
|
||||
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
|
||||
|
||||
intel_pmu_init_grt(NULL);
|
||||
intel_pmu_pebs_data_source_grt();
|
||||
x86_pmu.pebs_latency_data = adl_latency_data_small;
|
||||
x86_pmu.get_event_constraints = tnt_get_event_constraints;
|
||||
x86_pmu.limit_period = spr_limit_period;
|
||||
td_attr = tnt_events_attrs;
|
||||
mem_attr = grt_mem_attrs;
|
||||
extra_attr = nhm_format_attr;
|
||||
@ -6225,28 +6336,11 @@ __init int intel_pmu_init(void)
|
||||
|
||||
case INTEL_FAM6_ATOM_CRESTMONT:
|
||||
case INTEL_FAM6_ATOM_CRESTMONT_X:
|
||||
x86_pmu.mid_ack = true;
|
||||
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
|
||||
sizeof(hw_cache_extra_regs));
|
||||
hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
|
||||
|
||||
x86_pmu.event_constraints = intel_slm_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
|
||||
intel_pmu_init_grt(NULL);
|
||||
x86_pmu.extra_regs = intel_cmt_extra_regs;
|
||||
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
|
||||
|
||||
intel_pmu_pebs_data_source_cmt();
|
||||
x86_pmu.pebs_latency_data = mtl_latency_data_small;
|
||||
x86_pmu.get_event_constraints = cmt_get_event_constraints;
|
||||
x86_pmu.limit_period = spr_limit_period;
|
||||
td_attr = cmt_events_attrs;
|
||||
mem_attr = grt_mem_attrs;
|
||||
extra_attr = cmt_format_attr;
|
||||
@ -6563,44 +6657,23 @@ __init int intel_pmu_init(void)
|
||||
case INTEL_FAM6_SAPPHIRERAPIDS_X:
|
||||
case INTEL_FAM6_EMERALDRAPIDS_X:
|
||||
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
|
||||
x86_pmu.extra_regs = intel_spr_extra_regs;
|
||||
x86_pmu.extra_regs = intel_glc_extra_regs;
|
||||
fallthrough;
|
||||
case INTEL_FAM6_GRANITERAPIDS_X:
|
||||
case INTEL_FAM6_GRANITERAPIDS_D:
|
||||
pmem = true;
|
||||
x86_pmu.late_ack = true;
|
||||
memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
|
||||
memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
|
||||
|
||||
x86_pmu.event_constraints = intel_spr_event_constraints;
|
||||
x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
|
||||
intel_pmu_init_glc(NULL);
|
||||
if (!x86_pmu.extra_regs)
|
||||
x86_pmu.extra_regs = intel_gnr_extra_regs;
|
||||
x86_pmu.limit_period = spr_limit_period;
|
||||
x86_pmu.extra_regs = intel_rwc_extra_regs;
|
||||
x86_pmu.pebs_ept = 1;
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
|
||||
|
||||
x86_pmu.hw_config = hsw_hw_config;
|
||||
x86_pmu.get_event_constraints = spr_get_event_constraints;
|
||||
x86_pmu.get_event_constraints = glc_get_event_constraints;
|
||||
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
||||
hsw_format_attr : nhm_format_attr;
|
||||
extra_skl_attr = skl_format_attr;
|
||||
mem_attr = spr_events_attrs;
|
||||
td_attr = spr_td_events_attrs;
|
||||
tsx_attr = spr_tsx_events_attrs;
|
||||
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
intel_pmu_pebs_data_source_skl(pmem);
|
||||
x86_pmu.num_topdown_events = 8;
|
||||
static_call_update(intel_pmu_update_topdown_event,
|
||||
&icl_update_topdown_event);
|
||||
static_call_update(intel_pmu_set_topdown_event_period,
|
||||
&icl_set_topdown_event_period);
|
||||
mem_attr = glc_events_attrs;
|
||||
td_attr = glc_td_events_attrs;
|
||||
tsx_attr = glc_tsx_events_attrs;
|
||||
intel_pmu_pebs_data_source_skl(true);
|
||||
pr_cont("Sapphire Rapids events, ");
|
||||
name = "sapphire_rapids";
|
||||
break;
|
||||
@ -6610,47 +6683,17 @@ __init int intel_pmu_init(void)
|
||||
case INTEL_FAM6_RAPTORLAKE:
|
||||
case INTEL_FAM6_RAPTORLAKE_P:
|
||||
case INTEL_FAM6_RAPTORLAKE_S:
|
||||
case INTEL_FAM6_METEORLAKE:
|
||||
case INTEL_FAM6_METEORLAKE_L:
|
||||
/*
|
||||
* Alder Lake has 2 types of CPU, core and atom.
|
||||
*
|
||||
* Initialize the common PerfMon capabilities here.
|
||||
*/
|
||||
x86_pmu.hybrid_pmu = kcalloc(X86_HYBRID_NUM_PMUS,
|
||||
sizeof(struct x86_hybrid_pmu),
|
||||
GFP_KERNEL);
|
||||
if (!x86_pmu.hybrid_pmu)
|
||||
return -ENOMEM;
|
||||
static_branch_enable(&perf_is_hybrid);
|
||||
x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
|
||||
intel_pmu_init_hybrid(hybrid_big_small);
|
||||
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
x86_pmu.pebs_latency_data = adl_latency_data_small;
|
||||
x86_pmu.num_topdown_events = 8;
|
||||
static_call_update(intel_pmu_update_topdown_event,
|
||||
&adl_update_topdown_event);
|
||||
static_call_update(intel_pmu_set_topdown_event_period,
|
||||
&adl_set_topdown_event_period);
|
||||
|
||||
x86_pmu.filter = intel_pmu_filter;
|
||||
x86_pmu.get_event_constraints = adl_get_event_constraints;
|
||||
x86_pmu.hw_config = adl_hw_config;
|
||||
x86_pmu.limit_period = spr_limit_period;
|
||||
x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
|
||||
/*
|
||||
* The rtm_abort_event is used to check whether to enable GPRs
|
||||
* for the RTM abort event. Atom doesn't have the RTM abort
|
||||
* event. There is no harmful to set it in the common
|
||||
* x86_pmu.rtm_abort_event.
|
||||
*/
|
||||
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
|
||||
|
||||
td_attr = adl_hybrid_events_attrs;
|
||||
mem_attr = adl_hybrid_mem_attrs;
|
||||
@ -6660,9 +6703,7 @@ __init int intel_pmu_init(void)
|
||||
|
||||
/* Initialize big core specific PerfMon capabilities.*/
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
|
||||
pmu->name = "cpu_core";
|
||||
pmu->cpu_type = hybrid_big;
|
||||
pmu->late_ack = true;
|
||||
intel_pmu_init_glc(&pmu->pmu);
|
||||
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
|
||||
pmu->num_counters = x86_pmu.num_counters + 2;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
|
||||
@ -6687,54 +6728,45 @@ __init int intel_pmu_init(void)
|
||||
pmu->unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
|
||||
0, pmu->num_counters, 0, 0);
|
||||
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
|
||||
pmu->intel_cap.perf_metrics = 1;
|
||||
pmu->intel_cap.pebs_output_pt_available = 0;
|
||||
|
||||
memcpy(pmu->hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
|
||||
memcpy(pmu->hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
|
||||
pmu->event_constraints = intel_spr_event_constraints;
|
||||
pmu->pebs_constraints = intel_spr_pebs_event_constraints;
|
||||
pmu->extra_regs = intel_spr_extra_regs;
|
||||
pmu->extra_regs = intel_glc_extra_regs;
|
||||
|
||||
/* Initialize Atom core specific PerfMon capabilities.*/
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
|
||||
pmu->name = "cpu_atom";
|
||||
pmu->cpu_type = hybrid_small;
|
||||
pmu->mid_ack = true;
|
||||
pmu->num_counters = x86_pmu.num_counters;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
|
||||
pmu->max_pebs_events = x86_pmu.max_pebs_events;
|
||||
pmu->unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
|
||||
0, pmu->num_counters, 0, 0);
|
||||
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
|
||||
pmu->intel_cap.perf_metrics = 0;
|
||||
pmu->intel_cap.pebs_output_pt_available = 1;
|
||||
intel_pmu_init_grt(&pmu->pmu);
|
||||
|
||||
memcpy(pmu->hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
|
||||
memcpy(pmu->hw_cache_extra_regs, tnt_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
|
||||
pmu->hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
|
||||
pmu->event_constraints = intel_slm_event_constraints;
|
||||
pmu->pebs_constraints = intel_grt_pebs_event_constraints;
|
||||
pmu->extra_regs = intel_grt_extra_regs;
|
||||
if (is_mtl(boot_cpu_data.x86_model)) {
|
||||
x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs;
|
||||
x86_pmu.pebs_latency_data = mtl_latency_data_small;
|
||||
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
||||
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
|
||||
mem_attr = mtl_hybrid_mem_attrs;
|
||||
intel_pmu_pebs_data_source_mtl();
|
||||
x86_pmu.get_event_constraints = mtl_get_event_constraints;
|
||||
pmu->extra_regs = intel_cmt_extra_regs;
|
||||
pr_cont("Meteorlake Hybrid events, ");
|
||||
name = "meteorlake_hybrid";
|
||||
} else {
|
||||
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
|
||||
intel_pmu_pebs_data_source_adl();
|
||||
pr_cont("Alderlake Hybrid events, ");
|
||||
name = "alderlake_hybrid";
|
||||
}
|
||||
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
|
||||
intel_pmu_pebs_data_source_adl();
|
||||
pr_cont("Alderlake Hybrid events, ");
|
||||
name = "alderlake_hybrid";
|
||||
break;
|
||||
|
||||
case INTEL_FAM6_METEORLAKE:
|
||||
case INTEL_FAM6_METEORLAKE_L:
|
||||
intel_pmu_init_hybrid(hybrid_big_small);
|
||||
|
||||
x86_pmu.pebs_latency_data = mtl_latency_data_small;
|
||||
x86_pmu.get_event_constraints = mtl_get_event_constraints;
|
||||
x86_pmu.hw_config = adl_hw_config;
|
||||
|
||||
td_attr = adl_hybrid_events_attrs;
|
||||
mem_attr = mtl_hybrid_mem_attrs;
|
||||
tsx_attr = adl_hybrid_tsx_attrs;
|
||||
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
||||
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
|
||||
|
||||
/* Initialize big core specific PerfMon capabilities.*/
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
|
||||
intel_pmu_init_glc(&pmu->pmu);
|
||||
pmu->extra_regs = intel_rwc_extra_regs;
|
||||
|
||||
/* Initialize Atom core specific PerfMon capabilities.*/
|
||||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
|
||||
intel_pmu_init_grt(&pmu->pmu);
|
||||
pmu->extra_regs = intel_cmt_extra_regs;
|
||||
|
||||
intel_pmu_pebs_data_source_mtl();
|
||||
pr_cont("Meteorlake Hybrid events, ");
|
||||
name = "meteorlake_hybrid";
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -6846,9 +6878,6 @@ __init int intel_pmu_init(void)
|
||||
if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
|
||||
x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
|
||||
|
||||
if (is_hybrid())
|
||||
intel_pmu_check_hybrid_pmus((u64)fixed_mask);
|
||||
|
||||
if (x86_pmu.intel_cap.pebs_timing_info)
|
||||
x86_pmu.flags |= PMU_FL_RETIRE_LATENCY;
|
||||
|
||||
|
@ -336,6 +336,9 @@ static int cstate_pmu_event_init(struct perf_event *event)
|
||||
cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
|
||||
if (!(pkg_msr_mask & (1 << cfg)))
|
||||
return -EINVAL;
|
||||
|
||||
event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
|
||||
|
||||
event->hw.event_base = pkg_msr[cfg].msr;
|
||||
cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
|
||||
topology_die_cpumask(event->cpu));
|
||||
|
@ -261,7 +261,7 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
|
||||
{
|
||||
u64 val;
|
||||
|
||||
WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
|
||||
WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
|
||||
|
||||
dse &= PERF_PEBS_DATA_SOURCE_MASK;
|
||||
val = hybrid_var(event->pmu, pebs_data_source)[dse];
|
||||
@ -1058,7 +1058,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
struct event_constraint intel_spr_pebs_event_constraints[] = {
|
||||
struct event_constraint intel_glc_pebs_event_constraints[] = {
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
|
||||
|
||||
|
@ -736,6 +736,7 @@ static bool topa_table_full(struct topa *topa)
|
||||
/**
|
||||
* topa_insert_pages() - create a list of ToPA tables
|
||||
* @buf: PT buffer being initialized.
|
||||
* @cpu: CPU on which to allocate.
|
||||
* @gfp: Allocation flags.
|
||||
*
|
||||
* This initializes a list of ToPA tables with entries from
|
||||
@ -1207,8 +1208,11 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
|
||||
/**
|
||||
* pt_buffer_init_topa() - initialize ToPA table for pt buffer
|
||||
* @buf: PT buffer.
|
||||
* @size: Total size of all regions within this ToPA.
|
||||
* @cpu: CPU on which to allocate.
|
||||
* @nr_pages: No. of pages to allocate.
|
||||
* @gfp: Allocation flags.
|
||||
*
|
||||
* Return: 0 on success or error code.
|
||||
*/
|
||||
static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
|
||||
unsigned long nr_pages, gfp_t gfp)
|
||||
@ -1281,7 +1285,7 @@ out:
|
||||
|
||||
/**
|
||||
* pt_buffer_setup_aux() - set up topa tables for a PT buffer
|
||||
* @cpu: Cpu on which to allocate, -1 means current.
|
||||
* @event: Performance event
|
||||
* @pages: Array of pointers to buffer pages passed from perf core.
|
||||
* @nr_pages: Number of pages in the buffer.
|
||||
* @snapshot: If this is a snapshot/overwrite counter.
|
||||
|
@ -652,10 +652,29 @@ enum {
|
||||
#define PERF_PEBS_DATA_SOURCE_MAX 0x10
|
||||
#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1)
|
||||
|
||||
enum hybrid_cpu_type {
|
||||
HYBRID_INTEL_NONE,
|
||||
HYBRID_INTEL_ATOM = 0x20,
|
||||
HYBRID_INTEL_CORE = 0x40,
|
||||
};
|
||||
|
||||
enum hybrid_pmu_type {
|
||||
not_hybrid,
|
||||
hybrid_small = BIT(0),
|
||||
hybrid_big = BIT(1),
|
||||
|
||||
hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */
|
||||
};
|
||||
|
||||
#define X86_HYBRID_PMU_ATOM_IDX 0
|
||||
#define X86_HYBRID_PMU_CORE_IDX 1
|
||||
|
||||
#define X86_HYBRID_NUM_PMUS 2
|
||||
|
||||
struct x86_hybrid_pmu {
|
||||
struct pmu pmu;
|
||||
const char *name;
|
||||
u8 cpu_type;
|
||||
enum hybrid_pmu_type pmu_type;
|
||||
cpumask_t supported_cpus;
|
||||
union perf_capabilities intel_cap;
|
||||
u64 intel_ctrl;
|
||||
@ -721,18 +740,6 @@ extern struct static_key_false perf_is_hybrid;
|
||||
__Fp; \
|
||||
})
|
||||
|
||||
enum hybrid_pmu_type {
|
||||
hybrid_big = 0x40,
|
||||
hybrid_small = 0x20,
|
||||
|
||||
hybrid_big_small = hybrid_big | hybrid_small,
|
||||
};
|
||||
|
||||
#define X86_HYBRID_PMU_ATOM_IDX 0
|
||||
#define X86_HYBRID_PMU_CORE_IDX 1
|
||||
|
||||
#define X86_HYBRID_NUM_PMUS 2
|
||||
|
||||
/*
|
||||
* struct x86_pmu - generic x86 pmu
|
||||
*/
|
||||
@ -940,7 +947,7 @@ struct x86_pmu {
|
||||
*/
|
||||
int num_hybrid_pmus;
|
||||
struct x86_hybrid_pmu *hybrid_pmu;
|
||||
u8 (*get_hybrid_cpu_type) (void);
|
||||
enum hybrid_cpu_type (*get_hybrid_cpu_type) (void);
|
||||
};
|
||||
|
||||
struct x86_perf_task_context_opt {
|
||||
@ -1521,7 +1528,7 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_icl_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_spr_pebs_event_constraints[];
|
||||
extern struct event_constraint intel_glc_pebs_event_constraints[];
|
||||
|
||||
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
|
||||
|
||||
|
@ -115,7 +115,7 @@ struct rapl_pmu {
|
||||
struct rapl_pmus {
|
||||
struct pmu pmu;
|
||||
unsigned int maxdie;
|
||||
struct rapl_pmu *pmus[];
|
||||
struct rapl_pmu *pmus[] __counted_by(maxdie);
|
||||
};
|
||||
|
||||
enum rapl_unit_quirk {
|
||||
@ -179,15 +179,11 @@ static u64 rapl_event_update(struct perf_event *event)
|
||||
s64 delta, sdelta;
|
||||
int shift = RAPL_CNTR_WIDTH;
|
||||
|
||||
again:
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
rdmsrl(event->hw.event_base, new_raw_count);
|
||||
|
||||
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
new_raw_count) != prev_raw_count) {
|
||||
cpu_relax();
|
||||
goto again;
|
||||
}
|
||||
do {
|
||||
rdmsrl(event->hw.event_base, new_raw_count);
|
||||
} while (!local64_try_cmpxchg(&hwc->prev_count,
|
||||
&prev_raw_count, new_raw_count));
|
||||
|
||||
/*
|
||||
* Now we have the new raw value and have updated the prev
|
||||
@ -537,11 +533,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
|
||||
* - want to use same event codes across both architectures
|
||||
*/
|
||||
static struct perf_msr amd_rapl_msrs[] = {
|
||||
[PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 },
|
||||
[PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 },
|
||||
[PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK },
|
||||
[PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 },
|
||||
[PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 },
|
||||
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 },
|
||||
[PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 },
|
||||
[PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, NULL, false, 0 },
|
||||
[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 },
|
||||
};
|
||||
|
||||
static int rapl_cpu_offline(unsigned int cpu)
|
||||
|
@ -638,12 +638,16 @@
|
||||
#define MSR_AMD64_LBR_SELECT 0xc000010e
|
||||
|
||||
/* Zen4 */
|
||||
#define MSR_ZEN4_BP_CFG 0xc001102e
|
||||
#define MSR_ZEN4_BP_CFG 0xc001102e
|
||||
#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
|
||||
|
||||
/* Fam 19h MSRs */
|
||||
#define MSR_F19H_UMC_PERF_CTL 0xc0010800
|
||||
#define MSR_F19H_UMC_PERF_CTR 0xc0010801
|
||||
|
||||
/* Zen 2 */
|
||||
#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
|
||||
#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
|
||||
#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
|
||||
#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
|
||||
|
||||
/* Fam 17h MSRs */
|
||||
#define MSR_F17H_IRPERF 0xc00000e9
|
||||
|
@ -112,6 +112,13 @@
|
||||
(AMD64_PERFMON_V2_EVENTSEL_EVENT_NB | \
|
||||
AMD64_PERFMON_V2_EVENTSEL_UMASK_NB)
|
||||
|
||||
#define AMD64_PERFMON_V2_ENABLE_UMC BIT_ULL(31)
|
||||
#define AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC GENMASK_ULL(7, 0)
|
||||
#define AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC GENMASK_ULL(9, 8)
|
||||
#define AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC \
|
||||
(AMD64_PERFMON_V2_EVENTSEL_EVENT_UMC | \
|
||||
AMD64_PERFMON_V2_EVENTSEL_RDWRMASK_UMC)
|
||||
|
||||
#define AMD64_NUM_COUNTERS 4
|
||||
#define AMD64_NUM_COUNTERS_CORE 6
|
||||
#define AMD64_NUM_COUNTERS_NB 4
|
||||
@ -232,6 +239,8 @@ union cpuid_0x80000022_ebx {
|
||||
unsigned int lbr_v2_stack_sz:6;
|
||||
/* Number of Data Fabric Counters */
|
||||
unsigned int num_df_pmc:6;
|
||||
/* Number of Unified Memory Controller Counters */
|
||||
unsigned int num_umc_pmc:6;
|
||||
} split;
|
||||
unsigned int full;
|
||||
};
|
||||
|
@ -879,6 +879,7 @@ struct perf_event_pmu_context {
|
||||
unsigned int embedded : 1;
|
||||
|
||||
unsigned int nr_events;
|
||||
unsigned int nr_cgroups;
|
||||
|
||||
atomic_t refcount; /* event <-> epc */
|
||||
struct rcu_head rcu_head;
|
||||
@ -1574,7 +1575,7 @@ extern int sysctl_perf_cpu_time_max_percent;
|
||||
|
||||
extern void perf_sample_event_took(u64 sample_len_ns);
|
||||
|
||||
int perf_proc_update_handler(struct ctl_table *table, int write,
|
||||
int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos);
|
||||
int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos);
|
||||
|
@ -375,6 +375,7 @@ enum event_type_t {
|
||||
EVENT_TIME = 0x4,
|
||||
/* see ctx_resched() for details */
|
||||
EVENT_CPU = 0x8,
|
||||
EVENT_CGROUP = 0x10,
|
||||
EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
|
||||
};
|
||||
|
||||
@ -449,8 +450,8 @@ static void update_perf_cpu_limits(void)
|
||||
|
||||
static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc);
|
||||
|
||||
int perf_proc_update_handler(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
int perf_event_max_sample_rate_handler(struct ctl_table *table, int write,
|
||||
void *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
int perf_cpu = sysctl_perf_cpu_time_max_percent;
|
||||
@ -684,20 +685,26 @@ do { \
|
||||
___p; \
|
||||
})
|
||||
|
||||
static void perf_ctx_disable(struct perf_event_context *ctx)
|
||||
static void perf_ctx_disable(struct perf_event_context *ctx, bool cgroup)
|
||||
{
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
if (cgroup && !pmu_ctx->nr_cgroups)
|
||||
continue;
|
||||
perf_pmu_disable(pmu_ctx->pmu);
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_ctx_enable(struct perf_event_context *ctx)
|
||||
static void perf_ctx_enable(struct perf_event_context *ctx, bool cgroup)
|
||||
{
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
if (cgroup && !pmu_ctx->nr_cgroups)
|
||||
continue;
|
||||
perf_pmu_enable(pmu_ctx->pmu);
|
||||
}
|
||||
}
|
||||
|
||||
static void ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type);
|
||||
@ -856,9 +863,9 @@ static void perf_cgroup_switch(struct task_struct *task)
|
||||
return;
|
||||
|
||||
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
|
||||
perf_ctx_disable(&cpuctx->ctx);
|
||||
perf_ctx_disable(&cpuctx->ctx, true);
|
||||
|
||||
ctx_sched_out(&cpuctx->ctx, EVENT_ALL);
|
||||
ctx_sched_out(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP);
|
||||
/*
|
||||
* must not be done before ctxswout due
|
||||
* to update_cgrp_time_from_cpuctx() in
|
||||
@ -870,9 +877,9 @@ static void perf_cgroup_switch(struct task_struct *task)
|
||||
* perf_cgroup_set_timestamp() in ctx_sched_in()
|
||||
* to not have to pass task around
|
||||
*/
|
||||
ctx_sched_in(&cpuctx->ctx, EVENT_ALL);
|
||||
ctx_sched_in(&cpuctx->ctx, EVENT_ALL|EVENT_CGROUP);
|
||||
|
||||
perf_ctx_enable(&cpuctx->ctx);
|
||||
perf_ctx_enable(&cpuctx->ctx, true);
|
||||
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
||||
}
|
||||
|
||||
@ -965,6 +972,8 @@ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ct
|
||||
if (!is_cgroup_event(event))
|
||||
return;
|
||||
|
||||
event->pmu_ctx->nr_cgroups++;
|
||||
|
||||
/*
|
||||
* Because cgroup events are always per-cpu events,
|
||||
* @ctx == &cpuctx->ctx.
|
||||
@ -985,6 +994,8 @@ perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *c
|
||||
if (!is_cgroup_event(event))
|
||||
return;
|
||||
|
||||
event->pmu_ctx->nr_cgroups--;
|
||||
|
||||
/*
|
||||
* Because cgroup events are always per-cpu events,
|
||||
* @ctx == &cpuctx->ctx.
|
||||
@ -2679,9 +2690,9 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
|
||||
|
||||
event_type &= EVENT_ALL;
|
||||
|
||||
perf_ctx_disable(&cpuctx->ctx);
|
||||
perf_ctx_disable(&cpuctx->ctx, false);
|
||||
if (task_ctx) {
|
||||
perf_ctx_disable(task_ctx);
|
||||
perf_ctx_disable(task_ctx, false);
|
||||
task_ctx_sched_out(task_ctx, event_type);
|
||||
}
|
||||
|
||||
@ -2699,9 +2710,9 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
|
||||
|
||||
perf_event_sched_in(cpuctx, task_ctx);
|
||||
|
||||
perf_ctx_enable(&cpuctx->ctx);
|
||||
perf_ctx_enable(&cpuctx->ctx, false);
|
||||
if (task_ctx)
|
||||
perf_ctx_enable(task_ctx);
|
||||
perf_ctx_enable(task_ctx, false);
|
||||
}
|
||||
|
||||
void perf_pmu_resched(struct pmu *pmu)
|
||||
@ -3246,6 +3257,9 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
int is_active = ctx->is_active;
|
||||
bool cgroup = event_type & EVENT_CGROUP;
|
||||
|
||||
event_type &= ~EVENT_CGROUP;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
@ -3292,8 +3306,11 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
|
||||
|
||||
is_active ^= ctx->is_active; /* changed bits */
|
||||
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry)
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
if (cgroup && !pmu_ctx->nr_cgroups)
|
||||
continue;
|
||||
__pmu_ctx_sched_out(pmu_ctx, is_active);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3484,7 +3501,7 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
|
||||
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
|
||||
if (context_equiv(ctx, next_ctx)) {
|
||||
|
||||
perf_ctx_disable(ctx);
|
||||
perf_ctx_disable(ctx, false);
|
||||
|
||||
/* PMIs are disabled; ctx->nr_pending is stable. */
|
||||
if (local_read(&ctx->nr_pending) ||
|
||||
@ -3504,7 +3521,7 @@ perf_event_context_sched_out(struct task_struct *task, struct task_struct *next)
|
||||
perf_ctx_sched_task_cb(ctx, false);
|
||||
perf_event_swap_task_ctx_data(ctx, next_ctx);
|
||||
|
||||
perf_ctx_enable(ctx);
|
||||
perf_ctx_enable(ctx, false);
|
||||
|
||||
/*
|
||||
* RCU_INIT_POINTER here is safe because we've not
|
||||
@ -3528,13 +3545,13 @@ unlock:
|
||||
|
||||
if (do_switch) {
|
||||
raw_spin_lock(&ctx->lock);
|
||||
perf_ctx_disable(ctx);
|
||||
perf_ctx_disable(ctx, false);
|
||||
|
||||
inside_switch:
|
||||
perf_ctx_sched_task_cb(ctx, false);
|
||||
task_ctx_sched_out(ctx, EVENT_ALL);
|
||||
|
||||
perf_ctx_enable(ctx);
|
||||
perf_ctx_enable(ctx, false);
|
||||
raw_spin_unlock(&ctx->lock);
|
||||
}
|
||||
}
|
||||
@ -3820,47 +3837,32 @@ static int merge_sched_in(struct perf_event *event, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ctx_pinned_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
|
||||
static void pmu_groups_sched_in(struct perf_event_context *ctx,
|
||||
struct perf_event_groups *groups,
|
||||
struct pmu *pmu)
|
||||
{
|
||||
int can_add_hw = 1;
|
||||
visit_groups_merge(ctx, groups, smp_processor_id(), pmu,
|
||||
merge_sched_in, &can_add_hw);
|
||||
}
|
||||
|
||||
static void ctx_groups_sched_in(struct perf_event_context *ctx,
|
||||
struct perf_event_groups *groups,
|
||||
bool cgroup)
|
||||
{
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
int can_add_hw = 1;
|
||||
|
||||
if (pmu) {
|
||||
visit_groups_merge(ctx, &ctx->pinned_groups,
|
||||
smp_processor_id(), pmu,
|
||||
merge_sched_in, &can_add_hw);
|
||||
} else {
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
can_add_hw = 1;
|
||||
visit_groups_merge(ctx, &ctx->pinned_groups,
|
||||
smp_processor_id(), pmu_ctx->pmu,
|
||||
merge_sched_in, &can_add_hw);
|
||||
}
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
if (cgroup && !pmu_ctx->nr_cgroups)
|
||||
continue;
|
||||
pmu_groups_sched_in(ctx, groups, pmu_ctx->pmu);
|
||||
}
|
||||
}
|
||||
|
||||
static void ctx_flexible_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
|
||||
static void __pmu_ctx_sched_in(struct perf_event_context *ctx,
|
||||
struct pmu *pmu)
|
||||
{
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
int can_add_hw = 1;
|
||||
|
||||
if (pmu) {
|
||||
visit_groups_merge(ctx, &ctx->flexible_groups,
|
||||
smp_processor_id(), pmu,
|
||||
merge_sched_in, &can_add_hw);
|
||||
} else {
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
can_add_hw = 1;
|
||||
visit_groups_merge(ctx, &ctx->flexible_groups,
|
||||
smp_processor_id(), pmu_ctx->pmu,
|
||||
merge_sched_in, &can_add_hw);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void __pmu_ctx_sched_in(struct perf_event_context *ctx, struct pmu *pmu)
|
||||
{
|
||||
ctx_flexible_sched_in(ctx, pmu);
|
||||
pmu_groups_sched_in(ctx, &ctx->flexible_groups, pmu);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -3868,6 +3870,9 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
|
||||
int is_active = ctx->is_active;
|
||||
bool cgroup = event_type & EVENT_CGROUP;
|
||||
|
||||
event_type &= ~EVENT_CGROUP;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
@ -3900,11 +3905,11 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
|
||||
* in order to give them the best chance of going on.
|
||||
*/
|
||||
if (is_active & EVENT_PINNED)
|
||||
ctx_pinned_sched_in(ctx, NULL);
|
||||
ctx_groups_sched_in(ctx, &ctx->pinned_groups, cgroup);
|
||||
|
||||
/* Then walk through the lower prio flexible groups */
|
||||
if (is_active & EVENT_FLEXIBLE)
|
||||
ctx_flexible_sched_in(ctx, NULL);
|
||||
ctx_groups_sched_in(ctx, &ctx->flexible_groups, cgroup);
|
||||
}
|
||||
|
||||
static void perf_event_context_sched_in(struct task_struct *task)
|
||||
@ -3919,11 +3924,11 @@ static void perf_event_context_sched_in(struct task_struct *task)
|
||||
|
||||
if (cpuctx->task_ctx == ctx) {
|
||||
perf_ctx_lock(cpuctx, ctx);
|
||||
perf_ctx_disable(ctx);
|
||||
perf_ctx_disable(ctx, false);
|
||||
|
||||
perf_ctx_sched_task_cb(ctx, true);
|
||||
|
||||
perf_ctx_enable(ctx);
|
||||
perf_ctx_enable(ctx, false);
|
||||
perf_ctx_unlock(cpuctx, ctx);
|
||||
goto rcu_unlock;
|
||||
}
|
||||
@ -3936,7 +3941,7 @@ static void perf_event_context_sched_in(struct task_struct *task)
|
||||
if (!ctx->nr_events)
|
||||
goto unlock;
|
||||
|
||||
perf_ctx_disable(ctx);
|
||||
perf_ctx_disable(ctx, false);
|
||||
/*
|
||||
* We want to keep the following priority order:
|
||||
* cpu pinned (that don't need to move), task pinned,
|
||||
@ -3946,7 +3951,7 @@ static void perf_event_context_sched_in(struct task_struct *task)
|
||||
* events, no need to flip the cpuctx's events around.
|
||||
*/
|
||||
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree)) {
|
||||
perf_ctx_disable(&cpuctx->ctx);
|
||||
perf_ctx_disable(&cpuctx->ctx, false);
|
||||
ctx_sched_out(&cpuctx->ctx, EVENT_FLEXIBLE);
|
||||
}
|
||||
|
||||
@ -3955,9 +3960,9 @@ static void perf_event_context_sched_in(struct task_struct *task)
|
||||
perf_ctx_sched_task_cb(cpuctx->task_ctx, true);
|
||||
|
||||
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
|
||||
perf_ctx_enable(&cpuctx->ctx);
|
||||
perf_ctx_enable(&cpuctx->ctx, false);
|
||||
|
||||
perf_ctx_enable(ctx);
|
||||
perf_ctx_enable(ctx, false);
|
||||
|
||||
unlock:
|
||||
perf_ctx_unlock(cpuctx, ctx);
|
||||
@ -4427,6 +4432,9 @@ static int __perf_event_read_cpu(struct perf_event *event, int event_cpu)
|
||||
{
|
||||
u16 local_pkg, event_pkg;
|
||||
|
||||
if ((unsigned)event_cpu >= nr_cpu_ids)
|
||||
return event_cpu;
|
||||
|
||||
if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
|
||||
int local_cpu = smp_processor_id();
|
||||
|
||||
@ -4529,6 +4537,8 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
|
||||
u64 *enabled, u64 *running)
|
||||
{
|
||||
unsigned long flags;
|
||||
int event_oncpu;
|
||||
int event_cpu;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
@ -4553,15 +4563,22 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the event CPU numbers, and adjust them to local if the event is
|
||||
* a per-package event that can be read locally
|
||||
*/
|
||||
event_oncpu = __perf_event_read_cpu(event, event->oncpu);
|
||||
event_cpu = __perf_event_read_cpu(event, event->cpu);
|
||||
|
||||
/* If this is a per-CPU event, it must be for this CPU */
|
||||
if (!(event->attach_state & PERF_ATTACH_TASK) &&
|
||||
event->cpu != smp_processor_id()) {
|
||||
event_cpu != smp_processor_id()) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If this is a pinned event it must be running on this CPU */
|
||||
if (event->attr.pinned && event->oncpu != smp_processor_id()) {
|
||||
if (event->attr.pinned && event_oncpu != smp_processor_id()) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
@ -4571,7 +4588,7 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
|
||||
* or local to this CPU. Furthermore it means its ACTIVE (otherwise
|
||||
* oncpu == -1).
|
||||
*/
|
||||
if (event->oncpu == smp_processor_id())
|
||||
if (event_oncpu == smp_processor_id())
|
||||
event->pmu->read(event);
|
||||
|
||||
*value = local64_read(&event->count);
|
||||
|
@ -700,6 +700,12 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
|
||||
watermark = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* kcalloc_node() is unable to allocate buffer if the size is larger
|
||||
* than: PAGE_SIZE << MAX_ORDER; directly bail out in this case.
|
||||
*/
|
||||
if (get_order((unsigned long)nr_pages * sizeof(void *)) > MAX_ORDER)
|
||||
return -ENOMEM;
|
||||
rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL,
|
||||
node);
|
||||
if (!rb->aux_pages)
|
||||
|
@ -1983,7 +1983,7 @@ static struct ctl_table kern_table[] = {
|
||||
.data = &sysctl_perf_event_sample_rate,
|
||||
.maxlen = sizeof(sysctl_perf_event_sample_rate),
|
||||
.mode = 0644,
|
||||
.proc_handler = perf_proc_update_handler,
|
||||
.proc_handler = perf_event_max_sample_rate_handler,
|
||||
.extra1 = SYSCTL_ONE,
|
||||
},
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user