6ade6c6460
Show the branch speculation info if provided by the branch recording hardware feature. This can be useful for optimizing code further. The speculation info is appended to the end of the list of fields so any existing tools that use "/" as a delimiter for access fields via an index remain unaffected. Also show "-" instead of "N/A" when speculation info is unavailable because "/" is used as the field separator. E.g. $ perf record -j any,u,save_type ./test_branch $ perf script --fields brstacksym Before: [...] check_match+0x60/strcmp+0x0/P/-/-/0/CALL do_lookup_x+0x3c5/check_match+0x0/P/-/-/0/CALL [...] After: [...] check_match+0x60/strcmp+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH do_lookup_x+0x3c5/check_match+0x0/P/-/-/0/CALL/NON_SPEC_CORRECT_PATH [...] The bitfield swapping scheme used duing sample parsing has changed because of the addition of new branch flags, namely "spec", "new_type" and "priv". Earlier, these were all part of the "reserved" field but now, each of these fields get swapped separately. Change the expected flag values accordingly for the test to pass. E.g. $ perf test -v 27 Before: 27: Sample parsing : --- start --- test child forked, pid 61979 parsing failed for sample_type 0x800 test child finished with -1 ---- end ---- Sample parsing: FAILED! After: 27: Sample parsing : --- start --- test child forked, pid 63293 test child finished with 0 ---- end ---- Sample parsing: Ok Signed-off-by: Sandipan Das <sandipan.das@amd.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Ananth Narayan <ananth.narayan@amd.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Santosh Shukla <santosh.shukla@amd.com> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Richter <tmricht@linux.ibm.com> Cc: x86@kernel.org Link: https://lore.kernel.org/r/56e272583552526e999ba0b536ac009ae3613966.1675333809.git.sandipan.das@amd.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
95 lines
2.2 KiB
C
95 lines
2.2 KiB
C
#ifndef _PERF_BRANCH_H
|
|
#define _PERF_BRANCH_H 1
|
|
/*
|
|
* The linux/stddef.h isn't need here, but is needed for __always_inline used
|
|
* in files included from uapi/linux/perf_event.h such as
|
|
* /usr/include/linux/swab.h and /usr/include/linux/byteorder/little_endian.h,
|
|
* detected in at least musl libc, used in Alpine Linux. -acme
|
|
*/
|
|
#include <stdio.h>
|
|
#include <linux/perf_event.h>
|
|
#include <linux/types.h>
|
|
#include "util/map_symbol.h"
|
|
#include "util/sample.h"
|
|
|
|
struct branch_flags {
|
|
union {
|
|
u64 value;
|
|
struct {
|
|
u64 mispred:1;
|
|
u64 predicted:1;
|
|
u64 in_tx:1;
|
|
u64 abort:1;
|
|
u64 cycles:16;
|
|
u64 type:4;
|
|
u64 spec:2;
|
|
u64 new_type:4;
|
|
u64 priv:3;
|
|
u64 reserved:31;
|
|
};
|
|
};
|
|
};
|
|
|
|
struct branch_info {
|
|
struct addr_map_symbol from;
|
|
struct addr_map_symbol to;
|
|
struct branch_flags flags;
|
|
char *srcline_from;
|
|
char *srcline_to;
|
|
};
|
|
|
|
struct branch_entry {
|
|
u64 from;
|
|
u64 to;
|
|
struct branch_flags flags;
|
|
};
|
|
|
|
struct branch_stack {
|
|
u64 nr;
|
|
u64 hw_idx;
|
|
struct branch_entry entries[];
|
|
};
|
|
|
|
/*
|
|
* The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
|
|
* Otherwise, the output format of a sample with branch stack is
|
|
* struct branch_stack {
|
|
* u64 nr;
|
|
* struct branch_entry entries[0];
|
|
* }
|
|
* Check whether the hw_idx is available,
|
|
* and return the corresponding pointer of entries[0].
|
|
*/
|
|
static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
|
|
{
|
|
u64 *entry = (u64 *)sample->branch_stack;
|
|
|
|
entry++;
|
|
if (sample->no_hw_idx)
|
|
return (struct branch_entry *)entry;
|
|
return (struct branch_entry *)(++entry);
|
|
}
|
|
|
|
struct branch_type_stat {
|
|
bool branch_to;
|
|
u64 counts[PERF_BR_MAX];
|
|
u64 new_counts[PERF_BR_NEW_MAX];
|
|
u64 cond_fwd;
|
|
u64 cond_bwd;
|
|
u64 cross_4k;
|
|
u64 cross_2m;
|
|
};
|
|
|
|
void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
|
|
u64 from, u64 to);
|
|
|
|
const char *branch_type_name(int type);
|
|
const char *branch_new_type_name(int new_type);
|
|
const char *get_branch_type(struct branch_entry *e);
|
|
void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
|
|
int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
|
|
|
|
const char *branch_spec_desc(int spec);
|
|
|
|
#endif /* _PERF_BRANCH_H */
|