perf intel-pt: Add support for PERF_RECORD_AUX_OUTPUT_HW_ID

Originally, software only supported redirecting at most one PEBS event to
Intel PT (PEBS-via-PT) because it was not able to differentiate one event
from another. To overcome that, add support for the
PERF_RECORD_AUX_OUTPUT_HW_ID side-band event.

Committer notes:

Cast the pointer arg to for_each_set_bit() to (unsigned long *), to fix
the build on 32-bit systems.

Reviewed-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: x86@kernel.org
Link: http://lore.kernel.org/lkml/20210907163903.11820-4-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Adrian Hunter 2021-09-07 19:39:03 +03:00 committed by Arnaldo Carvalho de Melo
parent 6175047358
commit dedcc0ea6d
2 changed files with 87 additions and 5 deletions

View File

@ -1144,7 +1144,12 @@ Recording is selected by using the aux-output config term e.g.
perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname
Note that currently, software only supports redirecting at most one PEBS event. Originally, software only supported redirecting at most one PEBS event because it
was not able to differentiate one event from another. To overcome that, more recent
kernels and perf tools add support for the PERF_RECORD_AUX_OUTPUT_HW_ID side-band event.
To check for the presence of that event in a PEBS-via-PT trace:
perf script -D --no-itrace | grep PERF_RECORD_AUX_OUTPUT_HW_ID
To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g.

View File

@ -111,6 +111,7 @@ struct intel_pt {
u64 cbr_id; u64 cbr_id;
u64 psb_id; u64 psb_id;
bool single_pebs;
bool sample_pebs; bool sample_pebs;
struct evsel *pebs_evsel; struct evsel *pebs_evsel;
@ -148,6 +149,14 @@ enum switch_state {
INTEL_PT_SS_EXPECTING_SWITCH_IP, INTEL_PT_SS_EXPECTING_SWITCH_IP,
}; };
/* applicable_counters is 64-bits */
#define INTEL_PT_MAX_PEBS 64
struct intel_pt_pebs_event {
struct evsel *evsel;
u64 id;
};
struct intel_pt_queue { struct intel_pt_queue {
struct intel_pt *pt; struct intel_pt *pt;
unsigned int queue_nr; unsigned int queue_nr;
@ -189,6 +198,7 @@ struct intel_pt_queue {
u64 last_br_cyc_cnt; u64 last_br_cyc_cnt;
unsigned int cbr_seen; unsigned int cbr_seen;
char insn[INTEL_PT_INSN_BUF_SZ]; char insn[INTEL_PT_INSN_BUF_SZ];
struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS];
}; };
static void intel_pt_dump(struct intel_pt *pt __maybe_unused, static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
@ -1978,15 +1988,13 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
} }
} }
static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
{ {
const struct intel_pt_blk_items *items = &ptq->state->items; const struct intel_pt_blk_items *items = &ptq->state->items;
struct perf_sample sample = { .ip = 0, }; struct perf_sample sample = { .ip = 0, };
union perf_event *event = ptq->event_buf; union perf_event *event = ptq->event_buf;
struct intel_pt *pt = ptq->pt; struct intel_pt *pt = ptq->pt;
struct evsel *evsel = pt->pebs_evsel;
u64 sample_type = evsel->core.attr.sample_type; u64 sample_type = evsel->core.attr.sample_type;
u64 id = evsel->core.id[0];
u8 cpumode; u8 cpumode;
u64 regs[8 * sizeof(sample.intr_regs.mask)]; u64 regs[8 * sizeof(sample.intr_regs.mask)];
@ -2112,6 +2120,45 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
} }
static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
struct evsel *evsel = pt->pebs_evsel;
u64 id = evsel->core.id[0];
return intel_pt_do_synth_pebs_sample(ptq, evsel, id);
}
static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
{
const struct intel_pt_blk_items *items = &ptq->state->items;
struct intel_pt_pebs_event *pe;
struct intel_pt *pt = ptq->pt;
int err = -EINVAL;
int hw_id;
if (!items->has_applicable_counters || !items->applicable_counters) {
if (!pt->single_pebs)
pr_err("PEBS-via-PT record with no applicable_counters\n");
return intel_pt_synth_single_pebs_sample(ptq);
}
for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) {
pe = &ptq->pebs[hw_id];
if (!pe->evsel) {
if (!pt->single_pebs)
pr_err("PEBS-via-PT record with no matching event, hw_id %d\n",
hw_id);
return intel_pt_synth_single_pebs_sample(ptq);
}
err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id);
if (err)
return err;
}
return err;
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
pid_t pid, pid_t tid, u64 ip, u64 timestamp) pid_t pid, pid_t tid, u64 ip, u64 timestamp)
{ {
@ -2882,6 +2929,30 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
event->itrace_start.tid); event->itrace_start.tid);
} }
static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
union perf_event *event,
struct perf_sample *sample)
{
u64 hw_id = event->aux_output_hw_id.hw_id;
struct auxtrace_queue *queue;
struct intel_pt_queue *ptq;
struct evsel *evsel;
queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session);
evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id);
if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) {
pr_err("Bad AUX output hardware ID\n");
return -EINVAL;
}
ptq = queue->priv;
ptq->pebs[hw_id].evsel = evsel;
ptq->pebs[hw_id].id = sample->id;
return 0;
}
static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
struct addr_location *al) struct addr_location *al)
{ {
@ -3009,6 +3080,8 @@ static int intel_pt_process_event(struct perf_session *session,
err = intel_pt_process_switch(pt, sample); err = intel_pt_process_switch(pt, sample);
else if (event->header.type == PERF_RECORD_ITRACE_START) else if (event->header.type == PERF_RECORD_ITRACE_START)
err = intel_pt_process_itrace_start(pt, event, sample); err = intel_pt_process_itrace_start(pt, event, sample);
else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID)
err = intel_pt_process_aux_output_hw_id(pt, event, sample);
else if (event->header.type == PERF_RECORD_SWITCH || else if (event->header.type == PERF_RECORD_SWITCH ||
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
err = intel_pt_context_switch(pt, event, sample); err = intel_pt_context_switch(pt, event, sample);
@ -3393,9 +3466,13 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt)
evlist__for_each_entry(pt->session->evlist, evsel) { evlist__for_each_entry(pt->session->evlist, evsel) {
if (evsel->core.attr.aux_output && evsel->core.id) { if (evsel->core.attr.aux_output && evsel->core.id) {
if (pt->single_pebs) {
pt->single_pebs = false;
return;
}
pt->single_pebs = true;
pt->sample_pebs = true; pt->sample_pebs = true;
pt->pebs_evsel = evsel; pt->pebs_evsel = evsel;
return;
} }
} }
} }