The X86 specific arch__intr_reg_mask() is to check whether the kernel and hardware can collect XMM registers. But it doesn't work on some hybrid platform. Without the patch on ADL-N: $ perf record -I? available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15 The config of the test event doesn't contain the PMU information. The kernel may fail to initialize it on the correct hybrid PMU and return the wrong non-supported information. Add the PMU information into the config for the hybrid platform. The same register set is supported among different hybrid PMUs. Checking the first available one is good enough. With the patch on ADL-N: $ perf record -I? available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 XMM9 XMM10 XMM11 XMM12 XMM13 XMM14 XMM15 Fixes: 6466ec14aaf44ff1 ("perf regs x86: Add X86 specific arch__intr_reg_mask()") Reported-by: Ammy Yi <ammy.yi@intel.com> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Acked-by: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com> Link: https://lore.kernel.org/r/20220518145125.1494156-1-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
315 lines
8.1 KiB
C
315 lines
8.1 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
#include <regex.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/zalloc.h>
|
|
|
|
#include "../../../perf-sys.h"
|
|
#include "../../../util/perf_regs.h"
|
|
#include "../../../util/debug.h"
|
|
#include "../../../util/event.h"
|
|
#include "../../../util/pmu.h"
|
|
#include "../../../util/pmu-hybrid.h"
|
|
|
|
const struct sample_reg sample_reg_masks[] = {
|
|
SMPL_REG(AX, PERF_REG_X86_AX),
|
|
SMPL_REG(BX, PERF_REG_X86_BX),
|
|
SMPL_REG(CX, PERF_REG_X86_CX),
|
|
SMPL_REG(DX, PERF_REG_X86_DX),
|
|
SMPL_REG(SI, PERF_REG_X86_SI),
|
|
SMPL_REG(DI, PERF_REG_X86_DI),
|
|
SMPL_REG(BP, PERF_REG_X86_BP),
|
|
SMPL_REG(SP, PERF_REG_X86_SP),
|
|
SMPL_REG(IP, PERF_REG_X86_IP),
|
|
SMPL_REG(FLAGS, PERF_REG_X86_FLAGS),
|
|
SMPL_REG(CS, PERF_REG_X86_CS),
|
|
SMPL_REG(SS, PERF_REG_X86_SS),
|
|
#ifdef HAVE_ARCH_X86_64_SUPPORT
|
|
SMPL_REG(R8, PERF_REG_X86_R8),
|
|
SMPL_REG(R9, PERF_REG_X86_R9),
|
|
SMPL_REG(R10, PERF_REG_X86_R10),
|
|
SMPL_REG(R11, PERF_REG_X86_R11),
|
|
SMPL_REG(R12, PERF_REG_X86_R12),
|
|
SMPL_REG(R13, PERF_REG_X86_R13),
|
|
SMPL_REG(R14, PERF_REG_X86_R14),
|
|
SMPL_REG(R15, PERF_REG_X86_R15),
|
|
#endif
|
|
SMPL_REG2(XMM0, PERF_REG_X86_XMM0),
|
|
SMPL_REG2(XMM1, PERF_REG_X86_XMM1),
|
|
SMPL_REG2(XMM2, PERF_REG_X86_XMM2),
|
|
SMPL_REG2(XMM3, PERF_REG_X86_XMM3),
|
|
SMPL_REG2(XMM4, PERF_REG_X86_XMM4),
|
|
SMPL_REG2(XMM5, PERF_REG_X86_XMM5),
|
|
SMPL_REG2(XMM6, PERF_REG_X86_XMM6),
|
|
SMPL_REG2(XMM7, PERF_REG_X86_XMM7),
|
|
SMPL_REG2(XMM8, PERF_REG_X86_XMM8),
|
|
SMPL_REG2(XMM9, PERF_REG_X86_XMM9),
|
|
SMPL_REG2(XMM10, PERF_REG_X86_XMM10),
|
|
SMPL_REG2(XMM11, PERF_REG_X86_XMM11),
|
|
SMPL_REG2(XMM12, PERF_REG_X86_XMM12),
|
|
SMPL_REG2(XMM13, PERF_REG_X86_XMM13),
|
|
SMPL_REG2(XMM14, PERF_REG_X86_XMM14),
|
|
SMPL_REG2(XMM15, PERF_REG_X86_XMM15),
|
|
SMPL_REG_END
|
|
};
|
|
|
|
struct sdt_name_reg {
|
|
const char *sdt_name;
|
|
const char *uprobe_name;
|
|
};
|
|
#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m}
|
|
#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL}
|
|
|
|
static const struct sdt_name_reg sdt_reg_tbl[] = {
|
|
SDT_NAME_REG(eax, ax),
|
|
SDT_NAME_REG(rax, ax),
|
|
SDT_NAME_REG(al, ax),
|
|
SDT_NAME_REG(ah, ax),
|
|
SDT_NAME_REG(ebx, bx),
|
|
SDT_NAME_REG(rbx, bx),
|
|
SDT_NAME_REG(bl, bx),
|
|
SDT_NAME_REG(bh, bx),
|
|
SDT_NAME_REG(ecx, cx),
|
|
SDT_NAME_REG(rcx, cx),
|
|
SDT_NAME_REG(cl, cx),
|
|
SDT_NAME_REG(ch, cx),
|
|
SDT_NAME_REG(edx, dx),
|
|
SDT_NAME_REG(rdx, dx),
|
|
SDT_NAME_REG(dl, dx),
|
|
SDT_NAME_REG(dh, dx),
|
|
SDT_NAME_REG(esi, si),
|
|
SDT_NAME_REG(rsi, si),
|
|
SDT_NAME_REG(sil, si),
|
|
SDT_NAME_REG(edi, di),
|
|
SDT_NAME_REG(rdi, di),
|
|
SDT_NAME_REG(dil, di),
|
|
SDT_NAME_REG(ebp, bp),
|
|
SDT_NAME_REG(rbp, bp),
|
|
SDT_NAME_REG(bpl, bp),
|
|
SDT_NAME_REG(rsp, sp),
|
|
SDT_NAME_REG(esp, sp),
|
|
SDT_NAME_REG(spl, sp),
|
|
|
|
/* rNN registers */
|
|
SDT_NAME_REG(r8b, r8),
|
|
SDT_NAME_REG(r8w, r8),
|
|
SDT_NAME_REG(r8d, r8),
|
|
SDT_NAME_REG(r9b, r9),
|
|
SDT_NAME_REG(r9w, r9),
|
|
SDT_NAME_REG(r9d, r9),
|
|
SDT_NAME_REG(r10b, r10),
|
|
SDT_NAME_REG(r10w, r10),
|
|
SDT_NAME_REG(r10d, r10),
|
|
SDT_NAME_REG(r11b, r11),
|
|
SDT_NAME_REG(r11w, r11),
|
|
SDT_NAME_REG(r11d, r11),
|
|
SDT_NAME_REG(r12b, r12),
|
|
SDT_NAME_REG(r12w, r12),
|
|
SDT_NAME_REG(r12d, r12),
|
|
SDT_NAME_REG(r13b, r13),
|
|
SDT_NAME_REG(r13w, r13),
|
|
SDT_NAME_REG(r13d, r13),
|
|
SDT_NAME_REG(r14b, r14),
|
|
SDT_NAME_REG(r14w, r14),
|
|
SDT_NAME_REG(r14d, r14),
|
|
SDT_NAME_REG(r15b, r15),
|
|
SDT_NAME_REG(r15w, r15),
|
|
SDT_NAME_REG(r15d, r15),
|
|
SDT_NAME_REG_END,
|
|
};
|
|
|
|
/*
|
|
* Perf only supports OP which is in +/-NUM(REG) form.
|
|
* Here plus-minus sign, NUM and parenthesis are optional,
|
|
* only REG is mandatory.
|
|
*
|
|
* SDT events also supports indirect addressing mode with a
|
|
* symbol as offset, scaled mode and constants in OP. But
|
|
* perf does not support them yet. Below are few examples.
|
|
*
|
|
* OP with scaled mode:
|
|
* (%rax,%rsi,8)
|
|
* 10(%ras,%rsi,8)
|
|
*
|
|
* OP with indirect addressing mode:
|
|
* check_action(%rip)
|
|
* mp_+52(%rip)
|
|
* 44+mp_(%rip)
|
|
*
|
|
* OP with constant values:
|
|
* $0
|
|
* $123
|
|
* $-1
|
|
*/
|
|
#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$"
|
|
|
|
static regex_t sdt_op_regex;
|
|
|
|
static int sdt_init_op_regex(void)
|
|
{
|
|
static int initialized;
|
|
int ret = 0;
|
|
|
|
if (initialized)
|
|
return 0;
|
|
|
|
ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED);
|
|
if (ret < 0) {
|
|
pr_debug4("Regex compilation error.\n");
|
|
return ret;
|
|
}
|
|
|
|
initialized = 1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Max x86 register name length is 5(ex: %r15d). So, 6th char
|
|
* should always contain NULL. This helps to find register name
|
|
* length using strlen, instead of maintaining one more variable.
|
|
*/
|
|
#define SDT_REG_NAME_SIZE 6
|
|
|
|
/*
|
|
* The uprobe parser does not support all gas register names;
|
|
* so, we have to replace them (ex. for x86_64: %rax -> %ax).
|
|
* Note: If register does not require renaming, just copy
|
|
* paste as it is, but don't leave it empty.
|
|
*/
|
|
static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg)
|
|
{
|
|
int i = 0;
|
|
|
|
for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) {
|
|
if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) {
|
|
strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name);
|
|
return;
|
|
}
|
|
}
|
|
|
|
strncpy(uprobe_reg, sdt_reg, sdt_len);
|
|
}
|
|
|
|
int arch_sdt_arg_parse_op(char *old_op, char **new_op)
|
|
{
|
|
char new_reg[SDT_REG_NAME_SIZE] = {0};
|
|
int new_len = 0, ret;
|
|
/*
|
|
* rm[0]: +/-NUM(REG)
|
|
* rm[1]: +/-
|
|
* rm[2]: NUM
|
|
* rm[3]: (
|
|
* rm[4]: REG
|
|
* rm[5]: )
|
|
*/
|
|
regmatch_t rm[6];
|
|
/*
|
|
* Max prefix length is 2 as it may contains sign(+/-)
|
|
* and displacement 0 (Both sign and displacement 0 are
|
|
* optional so it may be empty). Use one more character
|
|
* to hold last NULL so that strlen can be used to find
|
|
* prefix length, instead of maintaining one more variable.
|
|
*/
|
|
char prefix[3] = {0};
|
|
|
|
ret = sdt_init_op_regex();
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
/*
|
|
* If unsupported OR does not match with regex OR
|
|
* register name too long, skip it.
|
|
*/
|
|
if (strchr(old_op, ',') || strchr(old_op, '$') ||
|
|
regexec(&sdt_op_regex, old_op, 6, rm, 0) ||
|
|
rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) {
|
|
pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
|
|
return SDT_ARG_SKIP;
|
|
}
|
|
|
|
/*
|
|
* Prepare prefix.
|
|
* If SDT OP has parenthesis but does not provide
|
|
* displacement, add 0 for displacement.
|
|
* SDT Uprobe Prefix
|
|
* -----------------------------
|
|
* +24(%rdi) +24(%di) +
|
|
* 24(%rdi) +24(%di) +
|
|
* %rdi %di
|
|
* (%rdi) +0(%di) +0
|
|
* -80(%rbx) -80(%bx) -
|
|
*/
|
|
if (rm[3].rm_so != rm[3].rm_eo) {
|
|
if (rm[1].rm_so != rm[1].rm_eo)
|
|
prefix[0] = *(old_op + rm[1].rm_so);
|
|
else if (rm[2].rm_so != rm[2].rm_eo)
|
|
prefix[0] = '+';
|
|
else
|
|
scnprintf(prefix, sizeof(prefix), "+0");
|
|
}
|
|
|
|
/* Rename register */
|
|
sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so,
|
|
new_reg);
|
|
|
|
/* Prepare final OP which should be valid for uprobe_events */
|
|
new_len = strlen(prefix) +
|
|
(rm[2].rm_eo - rm[2].rm_so) +
|
|
(rm[3].rm_eo - rm[3].rm_so) +
|
|
strlen(new_reg) +
|
|
(rm[5].rm_eo - rm[5].rm_so) +
|
|
1; /* NULL */
|
|
|
|
*new_op = zalloc(new_len);
|
|
if (!*new_op)
|
|
return -ENOMEM;
|
|
|
|
scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s",
|
|
strlen(prefix), prefix,
|
|
(int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
|
|
(int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so,
|
|
strlen(new_reg), new_reg,
|
|
(int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so);
|
|
|
|
return SDT_ARG_VALID;
|
|
}
|
|
|
|
uint64_t arch__intr_reg_mask(void)
|
|
{
|
|
struct perf_event_attr attr = {
|
|
.type = PERF_TYPE_HARDWARE,
|
|
.config = PERF_COUNT_HW_CPU_CYCLES,
|
|
.sample_type = PERF_SAMPLE_REGS_INTR,
|
|
.sample_regs_intr = PERF_REG_EXTENDED_MASK,
|
|
.precise_ip = 1,
|
|
.disabled = 1,
|
|
.exclude_kernel = 1,
|
|
};
|
|
struct perf_pmu *pmu;
|
|
int fd;
|
|
/*
|
|
* In an unnamed union, init it here to build on older gcc versions
|
|
*/
|
|
attr.sample_period = 1;
|
|
|
|
if (perf_pmu__has_hybrid()) {
|
|
/*
|
|
* The same register set is supported among different hybrid PMUs.
|
|
* Only check the first available one.
|
|
*/
|
|
pmu = list_first_entry(&perf_pmu__hybrid_pmus, typeof(*pmu), hybrid_list);
|
|
attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT;
|
|
}
|
|
|
|
event_attr_init(&attr);
|
|
|
|
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
|
|
if (fd != -1) {
|
|
close(fd);
|
|
return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
|
|
}
|
|
|
|
return PERF_REGS_MASK;
|
|
}
|