Perf events changes for v6.10:
- Combine perf and BPF for fast evalution of HW breakpoint conditions. - Add LBR capture support outside of hardware events - Trigger IO signals for watermark_wakeup - Add RAPL support for Intel Arrow Lake and Lunar Lake - Optimize frequency-throttling - Miscellaneous cleanups & fixes. Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmZBsC8RHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1izyxAAo7yOdhk9q+y2YWlKx2FmxUlZ8vlxBDRT 22bIN2d1ADrRS2IMsXC2/PhLnw0RNMCjBf6vyXi1hrMMK2zjuCFet5WDN8NboWEp hMdUSv1ODf5vb2I8frYS9X4jPtXDKSpIBR9e3E7iFYU6vj3BUXLSXnfXFjRsLU8i BG1k4apAWkDw0UjwQsRdxOoTFxp17idO3Ruz0/ksXleO/0aR0WR68tGO2WS1Hz95 mBhdjudekpWgT8VktGPrXsgUU3jqywTx04zFkWS36+IqDqNeNMPmePC7hqohlvv4 ZEPg6XrjdFmcDE6nc2YFYLD9njLDbdKPLeGTEtSNFSAmHYqV8W+UFlNa6hlXEE7n KFnvJ8zLymW/UQGaPsIcqqTSXkGKuTsUZJO+QK/VF+sK7VpMJtwTaUliSlN7zQtF 6HDBjp4sLB3NW16AN/M65LjpqyLdRxD7tvXoPLTt9mOVQt41ckv2Tfe2m6hg9OVQ qFzEdhgXxOUMyO9ifEX4HC2sBkKee4Jt76SLkpdr6kuuqlTRisIVdhlJ7yjK9/Rk RbuK/4eqL1p/o4GFAPP8gQjfdMSWatOZzxpE4V1cnzEdGjwuUMPJrbYPiAkgHskO HpzXtY+xFbAiaDanW1kUmwlqO8yO18WvdUem+SRRlFvbeE+grmgmtRZecNOi7mgg MlKdr1a4mV8= =r0yr -----END PGP SIGNATURE----- Merge tag 'perf-core-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull perf events updates from Ingo Molnar: - Combine perf and BPF for fast evalution of HW breakpoint conditions - Add LBR capture support outside of hardware events - Trigger IO signals for watermark_wakeup - Add RAPL support for Intel Arrow Lake and Lunar Lake - Optimize frequency-throttling - Miscellaneous cleanups & fixes * tag 'perf-core-2024-05-13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits) perf/bpf: Mark perf_event_set_bpf_handler() and perf_event_free_bpf_handler() as inline too selftests/perf_events: Test FASYNC with watermark wakeups perf/ring_buffer: Trigger IO signals for watermark_wakeup perf: Move perf_event_fasync() to perf_event.h perf/bpf: Change the !CONFIG_BPF_SYSCALL stubs to static inlines selftest/bpf: Test a perf BPF program that suppresses side effects perf/bpf: Allow a BPF program to suppress all sample side effects perf/bpf: Remove unneeded uses_default_overflow_handler() perf/bpf: Call BPF handler directly, not through overflow machinery perf/bpf: Remove #ifdef CONFIG_BPF_SYSCALL from struct perf_event members perf/bpf: Create bpf_overflow_handler() stub for !CONFIG_BPF_SYSCALL perf/bpf: Reorder bpf_overflow_handler() ahead of __perf_event_overflow() perf/x86/rapl: Add support for Intel Lunar Lake perf/x86/rapl: Add support for Intel Arrow Lake perf/core: Reduce PMU access to adjust sample freq perf/core: Optimize perf_adjust_freq_unthr_context() perf/x86/amd: Don't reject non-sampling events with configured LBR perf/x86/amd: Support capturing LBR from software events perf/x86/amd: Avoid taking branches before disabling LBR perf/x86/amd: Ensure amd_pmu_core_disable_all() is always inlined ...
This commit is contained in:
commit
17ca7fc22f
@ -626,7 +626,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
|
||||
hw->address &= ~alignment_mask;
|
||||
hw->ctrl.len <<= offset;
|
||||
|
||||
if (uses_default_overflow_handler(bp)) {
|
||||
if (is_default_overflow_handler(bp)) {
|
||||
/*
|
||||
* Mismatch breakpoints are required for single-stepping
|
||||
* breakpoints.
|
||||
@ -798,7 +798,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
|
||||
* Otherwise, insert a temporary mismatch breakpoint so that
|
||||
* we can single-step over the watchpoint trigger.
|
||||
*/
|
||||
if (!uses_default_overflow_handler(wp))
|
||||
if (!is_default_overflow_handler(wp))
|
||||
continue;
|
||||
step:
|
||||
enable_single_step(wp, instruction_pointer(regs));
|
||||
@ -811,7 +811,7 @@ step:
|
||||
info->trigger = addr;
|
||||
pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
|
||||
perf_bp_event(wp, regs);
|
||||
if (uses_default_overflow_handler(wp))
|
||||
if (is_default_overflow_handler(wp))
|
||||
enable_single_step(wp, instruction_pointer(regs));
|
||||
}
|
||||
|
||||
@ -886,7 +886,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs)
|
||||
info->trigger = addr;
|
||||
pr_debug("breakpoint fired: address = 0x%x\n", addr);
|
||||
perf_bp_event(bp, regs);
|
||||
if (uses_default_overflow_handler(bp))
|
||||
if (is_default_overflow_handler(bp))
|
||||
enable_single_step(bp, addr);
|
||||
goto unlock;
|
||||
}
|
||||
|
@ -655,7 +655,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr,
|
||||
perf_bp_event(bp, regs);
|
||||
|
||||
/* Do we need to handle the stepping? */
|
||||
if (uses_default_overflow_handler(bp))
|
||||
if (is_default_overflow_handler(bp))
|
||||
step = 1;
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
@ -734,7 +734,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
|
||||
static int watchpoint_report(struct perf_event *wp, unsigned long addr,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
int step = uses_default_overflow_handler(wp);
|
||||
int step = is_default_overflow_handler(wp);
|
||||
struct arch_hw_breakpoint *info = counter_arch_bp(wp);
|
||||
|
||||
info->trigger = addr;
|
||||
|
@ -647,7 +647,7 @@ static void amd_pmu_cpu_dead(int cpu)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void amd_pmu_set_global_ctl(u64 ctl)
|
||||
static __always_inline void amd_pmu_set_global_ctl(u64 ctl)
|
||||
{
|
||||
wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
|
||||
}
|
||||
@ -907,6 +907,37 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
|
||||
return amd_pmu_adjust_nmi_window(handled);
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD-specific callback invoked through perf_snapshot_branch_stack static
|
||||
* call, defined in include/linux/perf_event.h. See its definition for API
|
||||
* details. It's up to caller to provide enough space in *entries* to fit all
|
||||
* LBR records, otherwise returned result will be truncated to *cnt* entries.
|
||||
*/
|
||||
static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
|
||||
{
|
||||
struct cpu_hw_events *cpuc;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* The sequence of steps to freeze LBR should be completely inlined
|
||||
* and contain no branches to minimize contamination of LBR snapshot
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
amd_pmu_core_disable_all();
|
||||
__amd_pmu_lbr_disable();
|
||||
|
||||
cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
amd_pmu_lbr_read();
|
||||
cnt = min(cnt, x86_pmu.lbr_nr);
|
||||
memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
|
||||
|
||||
amd_pmu_v2_enable_all(0);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
@ -1443,6 +1474,10 @@ static int __init amd_core_pmu_init(void)
|
||||
static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset);
|
||||
static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add);
|
||||
static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del);
|
||||
|
||||
/* Only support branch_stack snapshot on perfmon v2 */
|
||||
if (x86_pmu.handle_irq == amd_pmu_v2_handle_irq)
|
||||
static_call_update(perf_snapshot_branch_stack, amd_pmu_v2_snapshot_branch_stack);
|
||||
} else if (!amd_brs_init()) {
|
||||
/*
|
||||
* BRS requires special event constraints and flushing on ctxsw.
|
||||
|
@ -310,10 +310,6 @@ int amd_pmu_lbr_hw_config(struct perf_event *event)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/* LBR is not recommended in counting mode */
|
||||
if (!is_sampling_event(event))
|
||||
return -EINVAL;
|
||||
|
||||
ret = amd_pmu_lbr_setup_filter(event);
|
||||
if (!ret)
|
||||
event->attach_state |= PERF_ATTACH_SCHED_CB;
|
||||
@ -414,18 +410,11 @@ void amd_pmu_lbr_enable_all(void)
|
||||
void amd_pmu_lbr_disable_all(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
u64 dbg_ctl, dbg_extn_cfg;
|
||||
|
||||
if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
|
||||
return;
|
||||
|
||||
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
|
||||
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
|
||||
rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
|
||||
wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
||||
}
|
||||
__amd_pmu_lbr_disable();
|
||||
}
|
||||
|
||||
__init int amd_pmu_lbr_init(void)
|
||||
|
@ -1329,6 +1329,19 @@ void amd_pmu_lbr_enable_all(void);
|
||||
void amd_pmu_lbr_disable_all(void);
|
||||
int amd_pmu_lbr_hw_config(struct perf_event *event);
|
||||
|
||||
static __always_inline void __amd_pmu_lbr_disable(void)
|
||||
{
|
||||
u64 dbg_ctl, dbg_extn_cfg;
|
||||
|
||||
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
|
||||
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
|
||||
rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
|
||||
wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS_AMD_BRS
|
||||
|
||||
#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
|
||||
|
@ -675,10 +675,8 @@ static const struct attribute_group *rapl_attr_update[] = {
|
||||
static int __init init_rapl_pmus(void)
|
||||
{
|
||||
int maxdie = topology_max_packages() * topology_max_dies_per_package();
|
||||
size_t size;
|
||||
|
||||
size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *);
|
||||
rapl_pmus = kzalloc(size, GFP_KERNEL);
|
||||
rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, maxdie), GFP_KERNEL);
|
||||
if (!rapl_pmus)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -808,6 +806,9 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE_H, &model_skl),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE, &model_skl),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(LUNARLAKE_M, &model_skl),
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
|
||||
|
@ -809,11 +809,8 @@ struct perf_event {
|
||||
u64 (*clock)(void);
|
||||
perf_overflow_handler_t overflow_handler;
|
||||
void *overflow_handler_context;
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
perf_overflow_handler_t orig_overflow_handler;
|
||||
struct bpf_prog *prog;
|
||||
u64 bpf_cookie;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_EVENT_TRACING
|
||||
struct trace_event_call *tp_event;
|
||||
@ -883,6 +880,7 @@ struct perf_event_pmu_context {
|
||||
|
||||
unsigned int nr_events;
|
||||
unsigned int nr_cgroups;
|
||||
unsigned int nr_freq;
|
||||
|
||||
atomic_t refcount; /* event <-> epc */
|
||||
struct rcu_head rcu_head;
|
||||
@ -897,6 +895,11 @@ struct perf_event_pmu_context {
|
||||
int rotate_necessary;
|
||||
};
|
||||
|
||||
static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
|
||||
{
|
||||
return !list_empty(&epc->flexible_active) || !list_empty(&epc->pinned_active);
|
||||
}
|
||||
|
||||
struct perf_event_groups {
|
||||
struct rb_root tree;
|
||||
u64 index;
|
||||
@ -1342,8 +1345,10 @@ extern int perf_event_output(struct perf_event *event,
|
||||
struct pt_regs *regs);
|
||||
|
||||
static inline bool
|
||||
__is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
|
||||
is_default_overflow_handler(struct perf_event *event)
|
||||
{
|
||||
perf_overflow_handler_t overflow_handler = event->overflow_handler;
|
||||
|
||||
if (likely(overflow_handler == perf_event_output_forward))
|
||||
return true;
|
||||
if (unlikely(overflow_handler == perf_event_output_backward))
|
||||
@ -1351,22 +1356,6 @@ __is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
|
||||
return false;
|
||||
}
|
||||
|
||||
#define is_default_overflow_handler(event) \
|
||||
__is_default_overflow_handler((event)->overflow_handler)
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static inline bool uses_default_overflow_handler(struct perf_event *event)
|
||||
{
|
||||
if (likely(is_default_overflow_handler(event)))
|
||||
return true;
|
||||
|
||||
return __is_default_overflow_handler(event->orig_overflow_handler);
|
||||
}
|
||||
#else
|
||||
#define uses_default_overflow_handler(event) \
|
||||
is_default_overflow_handler(event)
|
||||
#endif
|
||||
|
||||
extern void
|
||||
perf_event_header__init_id(struct perf_event_header *header,
|
||||
struct perf_sample_data *data,
|
||||
@ -1697,6 +1686,14 @@ perf_event_addr_filters(struct perf_event *event)
|
||||
return ifh;
|
||||
}
|
||||
|
||||
static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
|
||||
{
|
||||
/* Only the parent has fasync state */
|
||||
if (event->parent)
|
||||
event = event->parent;
|
||||
return &event->fasync;
|
||||
}
|
||||
|
||||
extern void perf_event_addr_filters_sync(struct perf_event *event);
|
||||
extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
|
||||
|
||||
|
@ -2302,8 +2302,10 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
|
||||
|
||||
if (!is_software_event(event))
|
||||
cpc->active_oncpu--;
|
||||
if (event->attr.freq && event->attr.sample_freq)
|
||||
if (event->attr.freq && event->attr.sample_freq) {
|
||||
ctx->nr_freq--;
|
||||
epc->nr_freq--;
|
||||
}
|
||||
if (event->attr.exclusive || !cpc->active_oncpu)
|
||||
cpc->exclusive = 0;
|
||||
|
||||
@ -2558,9 +2560,10 @@ event_sched_in(struct perf_event *event, struct perf_event_context *ctx)
|
||||
|
||||
if (!is_software_event(event))
|
||||
cpc->active_oncpu++;
|
||||
if (event->attr.freq && event->attr.sample_freq)
|
||||
if (event->attr.freq && event->attr.sample_freq) {
|
||||
ctx->nr_freq++;
|
||||
|
||||
epc->nr_freq++;
|
||||
}
|
||||
if (event->attr.exclusive)
|
||||
cpc->exclusive = 1;
|
||||
|
||||
@ -4123,30 +4126,14 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* combine freq adjustment with unthrottling to avoid two passes over the
|
||||
* events. At the same time, make sure, having freq events does not change
|
||||
* the rate of unthrottling as that would introduce bias.
|
||||
*/
|
||||
static void
|
||||
perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
|
||||
static void perf_adjust_freq_unthr_events(struct list_head *event_list)
|
||||
{
|
||||
struct perf_event *event;
|
||||
struct hw_perf_event *hwc;
|
||||
u64 now, period = TICK_NSEC;
|
||||
s64 delta;
|
||||
|
||||
/*
|
||||
* only need to iterate over all events iff:
|
||||
* - context have events in frequency mode (needs freq adjust)
|
||||
* - there are events to unthrottle on this cpu
|
||||
*/
|
||||
if (!(ctx->nr_freq || unthrottle))
|
||||
return;
|
||||
|
||||
raw_spin_lock(&ctx->lock);
|
||||
|
||||
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
|
||||
list_for_each_entry(event, event_list, active_list) {
|
||||
if (event->state != PERF_EVENT_STATE_ACTIVE)
|
||||
continue;
|
||||
|
||||
@ -4154,18 +4141,17 @@ perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
|
||||
if (!event_filter_match(event))
|
||||
continue;
|
||||
|
||||
perf_pmu_disable(event->pmu);
|
||||
|
||||
hwc = &event->hw;
|
||||
|
||||
if (hwc->interrupts == MAX_INTERRUPTS) {
|
||||
hwc->interrupts = 0;
|
||||
perf_log_throttle(event, 1);
|
||||
event->pmu->start(event, 0);
|
||||
if (!event->attr.freq || !event->attr.sample_freq)
|
||||
event->pmu->start(event, 0);
|
||||
}
|
||||
|
||||
if (!event->attr.freq || !event->attr.sample_freq)
|
||||
goto next;
|
||||
continue;
|
||||
|
||||
/*
|
||||
* stop the event and update event->count
|
||||
@ -4187,8 +4173,41 @@ perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
|
||||
perf_adjust_period(event, period, delta, false);
|
||||
|
||||
event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
|
||||
next:
|
||||
perf_pmu_enable(event->pmu);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* combine freq adjustment with unthrottling to avoid two passes over the
|
||||
* events. At the same time, make sure, having freq events does not change
|
||||
* the rate of unthrottling as that would introduce bias.
|
||||
*/
|
||||
static void
|
||||
perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
|
||||
{
|
||||
struct perf_event_pmu_context *pmu_ctx;
|
||||
|
||||
/*
|
||||
* only need to iterate over all events iff:
|
||||
* - context have events in frequency mode (needs freq adjust)
|
||||
* - there are events to unthrottle on this cpu
|
||||
*/
|
||||
if (!(ctx->nr_freq || unthrottle))
|
||||
return;
|
||||
|
||||
raw_spin_lock(&ctx->lock);
|
||||
|
||||
list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
|
||||
if (!(pmu_ctx->nr_freq || unthrottle))
|
||||
continue;
|
||||
if (!perf_pmu_ctx_is_active(pmu_ctx))
|
||||
continue;
|
||||
if (pmu_ctx->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT)
|
||||
continue;
|
||||
|
||||
perf_pmu_disable(pmu_ctx->pmu);
|
||||
perf_adjust_freq_unthr_events(&pmu_ctx->pinned_active);
|
||||
perf_adjust_freq_unthr_events(&pmu_ctx->flexible_active);
|
||||
perf_pmu_enable(pmu_ctx->pmu);
|
||||
}
|
||||
|
||||
raw_spin_unlock(&ctx->lock);
|
||||
@ -6684,14 +6703,6 @@ static const struct file_operations perf_fops = {
|
||||
* to user-space before waking everybody up.
|
||||
*/
|
||||
|
||||
static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
|
||||
{
|
||||
/* only the parent has fasync state */
|
||||
if (event->parent)
|
||||
event = event->parent;
|
||||
return &event->fasync;
|
||||
}
|
||||
|
||||
void perf_event_wakeup(struct perf_event *event)
|
||||
{
|
||||
ring_buffer_wakeup(event);
|
||||
@ -9544,6 +9555,100 @@ static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *r
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static int bpf_overflow_handler(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct bpf_perf_event_data_kern ctx = {
|
||||
.data = data,
|
||||
.event = event,
|
||||
};
|
||||
struct bpf_prog *prog;
|
||||
int ret = 0;
|
||||
|
||||
ctx.regs = perf_arch_bpf_user_pt_regs(regs);
|
||||
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
|
||||
goto out;
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(event->prog);
|
||||
if (prog) {
|
||||
perf_prepare_sample(data, event, regs);
|
||||
ret = bpf_prog_run(prog, &ctx);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
__this_cpu_dec(bpf_prog_active);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int perf_event_set_bpf_handler(struct perf_event *event,
|
||||
struct bpf_prog *prog,
|
||||
u64 bpf_cookie)
|
||||
{
|
||||
if (event->overflow_handler_context)
|
||||
/* hw breakpoint or kernel counter */
|
||||
return -EINVAL;
|
||||
|
||||
if (event->prog)
|
||||
return -EEXIST;
|
||||
|
||||
if (prog->type != BPF_PROG_TYPE_PERF_EVENT)
|
||||
return -EINVAL;
|
||||
|
||||
if (event->attr.precise_ip &&
|
||||
prog->call_get_stack &&
|
||||
(!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) ||
|
||||
event->attr.exclude_callchain_kernel ||
|
||||
event->attr.exclude_callchain_user)) {
|
||||
/*
|
||||
* On perf_event with precise_ip, calling bpf_get_stack()
|
||||
* may trigger unwinder warnings and occasional crashes.
|
||||
* bpf_get_[stack|stackid] works around this issue by using
|
||||
* callchain attached to perf_sample_data. If the
|
||||
* perf_event does not full (kernel and user) callchain
|
||||
* attached to perf_sample_data, do not allow attaching BPF
|
||||
* program that calls bpf_get_[stack|stackid].
|
||||
*/
|
||||
return -EPROTO;
|
||||
}
|
||||
|
||||
event->prog = prog;
|
||||
event->bpf_cookie = bpf_cookie;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void perf_event_free_bpf_handler(struct perf_event *event)
|
||||
{
|
||||
struct bpf_prog *prog = event->prog;
|
||||
|
||||
if (!prog)
|
||||
return;
|
||||
|
||||
event->prog = NULL;
|
||||
bpf_prog_put(prog);
|
||||
}
|
||||
#else
|
||||
static inline int bpf_overflow_handler(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int perf_event_set_bpf_handler(struct perf_event *event,
|
||||
struct bpf_prog *prog,
|
||||
u64 bpf_cookie)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void perf_event_free_bpf_handler(struct perf_event *event)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Generic event overflow handling, sampling.
|
||||
*/
|
||||
@ -9564,6 +9669,9 @@ static int __perf_event_overflow(struct perf_event *event,
|
||||
|
||||
ret = __perf_event_account_interrupt(event, throttle);
|
||||
|
||||
if (event->prog && !bpf_overflow_handler(event, data, regs))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* XXX event_limit might not quite work as expected on inherited
|
||||
* events
|
||||
@ -10422,97 +10530,6 @@ static void perf_event_free_filter(struct perf_event *event)
|
||||
ftrace_profile_free_filter(event);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static void bpf_overflow_handler(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct bpf_perf_event_data_kern ctx = {
|
||||
.data = data,
|
||||
.event = event,
|
||||
};
|
||||
struct bpf_prog *prog;
|
||||
int ret = 0;
|
||||
|
||||
ctx.regs = perf_arch_bpf_user_pt_regs(regs);
|
||||
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
|
||||
goto out;
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(event->prog);
|
||||
if (prog) {
|
||||
perf_prepare_sample(data, event, regs);
|
||||
ret = bpf_prog_run(prog, &ctx);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
__this_cpu_dec(bpf_prog_active);
|
||||
if (!ret)
|
||||
return;
|
||||
|
||||
event->orig_overflow_handler(event, data, regs);
|
||||
}
|
||||
|
||||
static int perf_event_set_bpf_handler(struct perf_event *event,
|
||||
struct bpf_prog *prog,
|
||||
u64 bpf_cookie)
|
||||
{
|
||||
if (event->overflow_handler_context)
|
||||
/* hw breakpoint or kernel counter */
|
||||
return -EINVAL;
|
||||
|
||||
if (event->prog)
|
||||
return -EEXIST;
|
||||
|
||||
if (prog->type != BPF_PROG_TYPE_PERF_EVENT)
|
||||
return -EINVAL;
|
||||
|
||||
if (event->attr.precise_ip &&
|
||||
prog->call_get_stack &&
|
||||
(!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) ||
|
||||
event->attr.exclude_callchain_kernel ||
|
||||
event->attr.exclude_callchain_user)) {
|
||||
/*
|
||||
* On perf_event with precise_ip, calling bpf_get_stack()
|
||||
* may trigger unwinder warnings and occasional crashes.
|
||||
* bpf_get_[stack|stackid] works around this issue by using
|
||||
* callchain attached to perf_sample_data. If the
|
||||
* perf_event does not full (kernel and user) callchain
|
||||
* attached to perf_sample_data, do not allow attaching BPF
|
||||
* program that calls bpf_get_[stack|stackid].
|
||||
*/
|
||||
return -EPROTO;
|
||||
}
|
||||
|
||||
event->prog = prog;
|
||||
event->bpf_cookie = bpf_cookie;
|
||||
event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
|
||||
WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_event_free_bpf_handler(struct perf_event *event)
|
||||
{
|
||||
struct bpf_prog *prog = event->prog;
|
||||
|
||||
if (!prog)
|
||||
return;
|
||||
|
||||
WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
|
||||
event->prog = NULL;
|
||||
bpf_prog_put(prog);
|
||||
}
|
||||
#else
|
||||
static int perf_event_set_bpf_handler(struct perf_event *event,
|
||||
struct bpf_prog *prog,
|
||||
u64 bpf_cookie)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static void perf_event_free_bpf_handler(struct perf_event *event)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* returns true if the event is a tracepoint, or a kprobe/upprobe created
|
||||
* with perf_event_open()
|
||||
@ -11971,13 +11988,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
||||
overflow_handler = parent_event->overflow_handler;
|
||||
context = parent_event->overflow_handler_context;
|
||||
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
|
||||
if (overflow_handler == bpf_overflow_handler) {
|
||||
if (parent_event->prog) {
|
||||
struct bpf_prog *prog = parent_event->prog;
|
||||
|
||||
bpf_prog_inc(prog);
|
||||
event->prog = prog;
|
||||
event->orig_overflow_handler =
|
||||
parent_event->orig_overflow_handler;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -22,6 +22,10 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
|
||||
atomic_set(&handle->rb->poll, EPOLLIN);
|
||||
|
||||
handle->event->pending_wakeup = 1;
|
||||
|
||||
if (*perf_event_fasync(handle->event) && !handle->event->pending_kill)
|
||||
handle->event->pending_kill = POLL_IN;
|
||||
|
||||
irq_work_queue(&handle->event->pending_irq);
|
||||
}
|
||||
|
||||
|
137
tools/testing/selftests/bpf/prog_tests/perf_skip.c
Normal file
137
tools/testing/selftests/bpf/prog_tests/perf_skip.c
Normal file
@ -0,0 +1,137 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <test_progs.h>
|
||||
#include "test_perf_skip.skel.h"
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#ifndef TRAP_PERF
|
||||
#define TRAP_PERF 6
|
||||
#endif
|
||||
|
||||
int sigio_count, sigtrap_count;
|
||||
|
||||
static void handle_sigio(int sig __always_unused)
|
||||
{
|
||||
++sigio_count;
|
||||
}
|
||||
|
||||
static void handle_sigtrap(int signum __always_unused,
|
||||
siginfo_t *info,
|
||||
void *ucontext __always_unused)
|
||||
{
|
||||
ASSERT_EQ(info->si_code, TRAP_PERF, "si_code");
|
||||
++sigtrap_count;
|
||||
}
|
||||
|
||||
static noinline int test_function(void)
|
||||
{
|
||||
asm volatile ("");
|
||||
return 0;
|
||||
}
|
||||
|
||||
void serial_test_perf_skip(void)
|
||||
{
|
||||
struct sigaction action = {};
|
||||
struct sigaction previous_sigtrap;
|
||||
sighandler_t previous_sigio = SIG_ERR;
|
||||
struct test_perf_skip *skel = NULL;
|
||||
struct perf_event_attr attr = {};
|
||||
int perf_fd = -1;
|
||||
int err;
|
||||
struct f_owner_ex owner;
|
||||
struct bpf_link *prog_link = NULL;
|
||||
|
||||
action.sa_flags = SA_SIGINFO | SA_NODEFER;
|
||||
action.sa_sigaction = handle_sigtrap;
|
||||
sigemptyset(&action.sa_mask);
|
||||
if (!ASSERT_OK(sigaction(SIGTRAP, &action, &previous_sigtrap), "sigaction"))
|
||||
return;
|
||||
|
||||
previous_sigio = signal(SIGIO, handle_sigio);
|
||||
if (!ASSERT_NEQ(previous_sigio, SIG_ERR, "signal"))
|
||||
goto cleanup;
|
||||
|
||||
skel = test_perf_skip__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_load"))
|
||||
goto cleanup;
|
||||
|
||||
attr.type = PERF_TYPE_BREAKPOINT;
|
||||
attr.size = sizeof(attr);
|
||||
attr.bp_type = HW_BREAKPOINT_X;
|
||||
attr.bp_addr = (uintptr_t)test_function;
|
||||
attr.bp_len = sizeof(long);
|
||||
attr.sample_period = 1;
|
||||
attr.sample_type = PERF_SAMPLE_IP;
|
||||
attr.pinned = 1;
|
||||
attr.exclude_kernel = 1;
|
||||
attr.exclude_hv = 1;
|
||||
attr.precise_ip = 3;
|
||||
attr.sigtrap = 1;
|
||||
attr.remove_on_exec = 1;
|
||||
|
||||
perf_fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
|
||||
if (perf_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
|
||||
printf("SKIP:no PERF_TYPE_BREAKPOINT/HW_BREAKPOINT_X\n");
|
||||
test__skip();
|
||||
goto cleanup;
|
||||
}
|
||||
if (!ASSERT_OK(perf_fd < 0, "perf_event_open"))
|
||||
goto cleanup;
|
||||
|
||||
/* Configure the perf event to signal on sample. */
|
||||
err = fcntl(perf_fd, F_SETFL, O_ASYNC);
|
||||
if (!ASSERT_OK(err, "fcntl(F_SETFL, O_ASYNC)"))
|
||||
goto cleanup;
|
||||
|
||||
owner.type = F_OWNER_TID;
|
||||
owner.pid = syscall(__NR_gettid);
|
||||
err = fcntl(perf_fd, F_SETOWN_EX, &owner);
|
||||
if (!ASSERT_OK(err, "fcntl(F_SETOWN_EX)"))
|
||||
goto cleanup;
|
||||
|
||||
/* Allow at most one sample. A sample rejected by bpf should
|
||||
* not count against this.
|
||||
*/
|
||||
err = ioctl(perf_fd, PERF_EVENT_IOC_REFRESH, 1);
|
||||
if (!ASSERT_OK(err, "ioctl(PERF_EVENT_IOC_REFRESH)"))
|
||||
goto cleanup;
|
||||
|
||||
prog_link = bpf_program__attach_perf_event(skel->progs.handler, perf_fd);
|
||||
if (!ASSERT_OK_PTR(prog_link, "bpf_program__attach_perf_event"))
|
||||
goto cleanup;
|
||||
|
||||
/* Configure the bpf program to suppress the sample. */
|
||||
skel->bss->ip = (uintptr_t)test_function;
|
||||
test_function();
|
||||
|
||||
ASSERT_EQ(sigio_count, 0, "sigio_count");
|
||||
ASSERT_EQ(sigtrap_count, 0, "sigtrap_count");
|
||||
|
||||
/* Configure the bpf program to allow the sample. */
|
||||
skel->bss->ip = 0;
|
||||
test_function();
|
||||
|
||||
ASSERT_EQ(sigio_count, 1, "sigio_count");
|
||||
ASSERT_EQ(sigtrap_count, 1, "sigtrap_count");
|
||||
|
||||
/* Test that the sample above is the only one allowed (by perf, not
|
||||
* by bpf)
|
||||
*/
|
||||
test_function();
|
||||
|
||||
ASSERT_EQ(sigio_count, 1, "sigio_count");
|
||||
ASSERT_EQ(sigtrap_count, 1, "sigtrap_count");
|
||||
|
||||
cleanup:
|
||||
bpf_link__destroy(prog_link);
|
||||
if (perf_fd >= 0)
|
||||
close(perf_fd);
|
||||
test_perf_skip__destroy(skel);
|
||||
|
||||
if (previous_sigio != SIG_ERR)
|
||||
signal(SIGIO, previous_sigio);
|
||||
sigaction(SIGTRAP, &previous_sigtrap, NULL);
|
||||
}
|
15
tools/testing/selftests/bpf/progs/test_perf_skip.c
Normal file
15
tools/testing/selftests/bpf/progs/test_perf_skip.c
Normal file
@ -0,0 +1,15 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
uintptr_t ip;
|
||||
|
||||
SEC("perf_event")
|
||||
int handler(struct bpf_perf_event_data *data)
|
||||
{
|
||||
/* Skip events that have the correct ip. */
|
||||
return ip != PT_REGS_IP(&data->regs);
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
@ -1,3 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
sigtrap_threads
|
||||
remove_on_exec
|
||||
watermark_signal
|
||||
|
@ -2,5 +2,5 @@
|
||||
CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
|
||||
LDFLAGS += -lpthread
|
||||
|
||||
TEST_GEN_PROGS := sigtrap_threads remove_on_exec
|
||||
TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal
|
||||
include ../lib.mk
|
||||
|
146
tools/testing/selftests/perf_events/watermark_signal.c
Normal file
146
tools/testing/selftests/perf_events/watermark_signal.c
Normal file
@ -0,0 +1,146 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <stddef.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "../kselftest_harness.h"
|
||||
|
||||
#define __maybe_unused __attribute__((__unused__))
|
||||
|
||||
static int sigio_count;
|
||||
|
||||
static void handle_sigio(int signum __maybe_unused,
|
||||
siginfo_t *oh __maybe_unused,
|
||||
void *uc __maybe_unused)
|
||||
{
|
||||
++sigio_count;
|
||||
}
|
||||
|
||||
static void do_child(void)
|
||||
{
|
||||
raise(SIGSTOP);
|
||||
|
||||
for (int i = 0; i < 20; ++i)
|
||||
sleep(1);
|
||||
|
||||
raise(SIGSTOP);
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
TEST(watermark_signal)
|
||||
{
|
||||
struct perf_event_attr attr;
|
||||
struct perf_event_mmap_page *p = NULL;
|
||||
struct sigaction previous_sigio, sigio = { 0 };
|
||||
pid_t child = -1;
|
||||
int child_status;
|
||||
int fd = -1;
|
||||
long page_size = sysconf(_SC_PAGE_SIZE);
|
||||
|
||||
sigio.sa_sigaction = handle_sigio;
|
||||
EXPECT_EQ(sigaction(SIGIO, &sigio, &previous_sigio), 0);
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
attr.size = sizeof(attr);
|
||||
attr.type = PERF_TYPE_SOFTWARE;
|
||||
attr.config = PERF_COUNT_SW_DUMMY;
|
||||
attr.sample_period = 1;
|
||||
attr.disabled = 1;
|
||||
attr.watermark = 1;
|
||||
attr.context_switch = 1;
|
||||
attr.wakeup_watermark = 1;
|
||||
|
||||
child = fork();
|
||||
EXPECT_GE(child, 0);
|
||||
if (child == 0)
|
||||
do_child();
|
||||
else if (child < 0) {
|
||||
perror("fork()");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (waitpid(child, &child_status, WSTOPPED) != child ||
|
||||
!(WIFSTOPPED(child_status) && WSTOPSIG(child_status) == SIGSTOP)) {
|
||||
fprintf(stderr,
|
||||
"failed to sycnhronize with child errno=%d status=%x\n",
|
||||
errno,
|
||||
child_status);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
fd = syscall(__NR_perf_event_open, &attr, child, -1, -1,
|
||||
PERF_FLAG_FD_CLOEXEC);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "failed opening event %llx\n", attr.config);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (fcntl(fd, F_SETFL, FASYNC)) {
|
||||
perror("F_SETFL FASYNC");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (fcntl(fd, F_SETOWN, getpid())) {
|
||||
perror("F_SETOWN getpid()");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (fcntl(fd, F_SETSIG, SIGIO)) {
|
||||
perror("F_SETSIG SIGIO");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if (p == NULL) {
|
||||
perror("mmap");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) {
|
||||
perror("PERF_EVENT_IOC_ENABLE");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (kill(child, SIGCONT) < 0) {
|
||||
perror("SIGCONT");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (waitpid(child, &child_status, WSTOPPED) != -1 || errno != EINTR)
|
||||
fprintf(stderr,
|
||||
"expected SIGIO to terminate wait errno=%d status=%x\n%d",
|
||||
errno,
|
||||
child_status,
|
||||
sigio_count);
|
||||
|
||||
EXPECT_GE(sigio_count, 1);
|
||||
|
||||
cleanup:
|
||||
if (p != NULL)
|
||||
munmap(p, 2 * page_size);
|
||||
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
|
||||
if (child > 0) {
|
||||
kill(child, SIGKILL);
|
||||
waitpid(child, NULL, 0);
|
||||
}
|
||||
|
||||
sigaction(SIGIO, &previous_sigio, NULL);
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
Loading…
x
Reference in New Issue
Block a user