perf/core improvements ad fixes:
New features: - Add support to measure SMI cost in 'perf stat' (Kan Liang) - Add support for unwinding callchains in powerpc with libdw (Paolo Bonzini) Fixes: - Fix message: cpu list option is -C not -c (Adrian Hunter) - Fix 'perf script' message: field list option is -F not -f (Adrian Hunter) - Intel PT fixes: (Adrian Hunter) o Fix missing stack clear o Ensure IP is zero when state is INTEL_PT_STATE_NO_IP o Fix last_ip usage o Ensure never to set 'last_ip' when packet 'count' is zero o Clear FUP flag on error o Fix transactions_sample_type Infrastructure: - Intel PT cleanups/refactorings (Adrian Hunter) o Use FUP always when scanning for an IP o Add missing __fallthrough o Remove redundant initial_skip checks o Allow decoding with branch tracing disabled o Add default config for pass-through branch enable o Add documentation for new config terms o Add decoder support for ptwrite and power event packets o Add reserved byte to CBR packet payload o Add decoder support for CBR events - Move find_process() to the only place that uses it, skimming some more fat from util.[ch] (Arnaldo Carvalho de Melo) - Do parameter validation earlier on fetch_kernel_version() (Arnaldo Carvalho de Melo) - Remove unused _ALL_SOURCE define (Arnaldo Carvalho de Melo) - Add sysfs__write_int function (Kan Liang) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAABCAAGBQJZSouZAAoJENZQFvNTUqpAzIIP/jlEKCAym9EtGAmsOBxvAIiQ ocoV0g8/bRW+Dr8TGt7xbZNswBHCk/4e7ap+y9/NtME4KIwuZMovf8h/oImf+C1z ULiomjLEcJy9CQoWRDIe/kUK0LKmIVNVeNqIxisEukZmRhTW6GkpOeO3/BMALDbo YAoXkpSEeiJEx3OcD+yLlu6e2a0JJkJa0U2ujWxBjeNDatyWHKkM043eu4kbshvc YrfQUeQr4soWU45c/iWVOmRG+u9R2lfxK+T0cA1PNRYMJX70qXh0/jyGumv4iRUt KoKjGu2oh8WiE4xwImQrAkqEbuDwBbK3dZ71ZYsainR+RMphk25AD4KEM+5rjQnu DD++iGcKl5fAI+llzRxEjgSOM5CJ0iIufqHq2UB6ZB3NSL8sEUu7ehnbLA1lHF75 ABDGStB6Tis16NRKaM0hCz2OykluEOr4+RhUOUY4Nk58+vzxCigVpFSQF1B8duWC WcjQP0w3GXkaKcXpq75fsDeLQC9UCrGCU2bYDuY34HrYroScIGCPsHow3qKV38ml lFUujzQHrHxImDNwFSRAsnNLEVIy/SsUunY9QlDCZysVwMC47/W6gpquQl8rfBMb KLiUA2ohLbYZsFUK5SaICWePtuu5IVcCGfEgrNqbnCYT9+9kPSFIPEPMXl+F+fAq UWPPqIBS4tmAA1PD9mz3 =7oUP -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo-4.13-20170621' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: New features: - Add support to measure SMI cost in 'perf stat' (Kan Liang) - Add support for unwinding callchains in powerpc with libdw (Paolo Bonzini) Fixes: - Fix message: cpu list option is -C not -c (Adrian Hunter) - Fix 'perf script' message: field list option is -F not -f (Adrian Hunter) - Intel PT fixes: (Adrian Hunter) o Fix missing stack clear o Ensure IP is zero when state is INTEL_PT_STATE_NO_IP o Fix last_ip usage o Ensure never to set 'last_ip' when packet 'count' is zero o Clear FUP flag on error o Fix transactions_sample_type Infrastructure changes: - Intel PT cleanups/refactorings (Adrian Hunter) o Use FUP always when scanning for an IP o Add missing __fallthrough o Remove redundant initial_skip checks o Allow decoding with branch tracing disabled o Add default config for pass-through branch enable o Add documentation for new config terms o Add decoder support for ptwrite and power event packets o Add reserved byte to CBR packet payload o Add decoder support for CBR events - Move find_process() to the only place that uses it, skimming some more fat from util.[ch] (Arnaldo Carvalho de Melo) - Do parameter validation earlier on fetch_kernel_version() (Arnaldo Carvalho de Melo) - Remove unused _ALL_SOURCE define (Arnaldo Carvalho de Melo) - Add sysfs__write_int function (Kan Liang) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
8e70e84091
@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
|
||||
return err;
|
||||
}
|
||||
|
||||
int filename__write_int(const char *filename, int value)
|
||||
{
|
||||
int fd = open(filename, O_WRONLY), err = -1;
|
||||
char buf[64];
|
||||
|
||||
if (fd < 0)
|
||||
return err;
|
||||
|
||||
sprintf(buf, "%d", value);
|
||||
if (write(fd, buf, sizeof(buf)) == sizeof(buf))
|
||||
err = 0;
|
||||
|
||||
close(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
int procfs__read_str(const char *entry, char **buf, size_t *sizep)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value)
|
||||
|
||||
return filename__read_int(path, value);
|
||||
}
|
||||
|
||||
int sysfs__write_int(const char *entry, int value)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
const char *sysfs = sysfs__mountpoint();
|
||||
|
||||
if (!sysfs)
|
||||
return -1;
|
||||
|
||||
if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX)
|
||||
return -1;
|
||||
|
||||
return filename__write_int(path, value);
|
||||
}
|
||||
|
@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value);
|
||||
int filename__read_ull(const char *filename, unsigned long long *value);
|
||||
int filename__read_str(const char *filename, char **buf, size_t *sizep);
|
||||
|
||||
int filename__write_int(const char *filename, int value);
|
||||
|
||||
int procfs__read_str(const char *entry, char **buf, size_t *sizep);
|
||||
|
||||
int sysctl__read_int(const char *sysctl, int *value);
|
||||
@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value);
|
||||
int sysfs__read_ull(const char *entry, unsigned long long *value);
|
||||
int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
|
||||
int sysfs__read_bool(const char *entry, bool *value);
|
||||
|
||||
int sysfs__write_int(const char *entry, int value);
|
||||
#endif /* __API_FS__ */
|
||||
|
@ -364,6 +364,42 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc
|
||||
|
||||
CYC packets are not requested by default.
|
||||
|
||||
pt Specifies pass-through which enables the 'branch' config term.
|
||||
|
||||
The default config selects 'pt' if it is available, so a user will
|
||||
never need to specify this term.
|
||||
|
||||
branch Enable branch tracing. Branch tracing is enabled by default so to
|
||||
disable branch tracing use 'branch=0'.
|
||||
|
||||
The default config selects 'branch' if it is available.
|
||||
|
||||
ptw Enable PTWRITE packets which are produced when a ptwrite instruction
|
||||
is executed.
|
||||
|
||||
Support for this feature is indicated by:
|
||||
|
||||
/sys/bus/event_source/devices/intel_pt/caps/ptwrite
|
||||
|
||||
which contains "1" if the feature is supported and
|
||||
"0" otherwise.
|
||||
|
||||
fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
|
||||
provides the address of the ptwrite instruction. In the absence of
|
||||
fup_on_ptw, the decoder will use the address of the previous branch
|
||||
if branch tracing is enabled, otherwise the address will be zero.
|
||||
Note that fup_on_ptw will work even when branch tracing is disabled.
|
||||
|
||||
pwr_evt Enable power events. The power events provide information about
|
||||
changes to the CPU C-state.
|
||||
|
||||
Support for this feature is indicated by:
|
||||
|
||||
/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
|
||||
|
||||
which contains "1" if the feature is supported and
|
||||
"0" otherwise.
|
||||
|
||||
|
||||
new snapshot option
|
||||
-------------------
|
||||
|
@ -239,6 +239,20 @@ taskset.
|
||||
--no-merge::
|
||||
Do not merge results from same PMUs.
|
||||
|
||||
--smi-cost::
|
||||
Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
|
||||
|
||||
During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
|
||||
freeze core counters on SMI.
|
||||
The aperf counter will not be effected by the setting.
|
||||
The cost of SMI can be measured by (aperf - unhalted core cycles).
|
||||
|
||||
In practice, the percentages of SMI cycles is very useful for performance
|
||||
oriented analysis. --metric_only will be applied by default.
|
||||
The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
|
||||
|
||||
Users who wants to get the actual value can apply --no-metric-only.
|
||||
|
||||
EXAMPLES
|
||||
--------
|
||||
|
||||
|
@ -61,7 +61,7 @@ endif
|
||||
# Disable it on all other architectures in case libdw unwind
|
||||
# support is detected in system. Add supported architectures
|
||||
# to the check.
|
||||
ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm))
|
||||
ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc))
|
||||
NO_LIBDW_DWARF_UNWIND := 1
|
||||
endif
|
||||
|
||||
|
@ -5,4 +5,6 @@ libperf-y += perf_regs.o
|
||||
|
||||
libperf-$(CONFIG_DWARF) += dwarf-regs.o
|
||||
libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
|
||||
|
||||
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
|
||||
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
|
||||
|
73
tools/perf/arch/powerpc/util/unwind-libdw.c
Normal file
73
tools/perf/arch/powerpc/util/unwind-libdw.c
Normal file
@ -0,0 +1,73 @@
|
||||
#include <elfutils/libdwfl.h>
|
||||
#include "../../util/unwind-libdw.h"
|
||||
#include "../../util/perf_regs.h"
|
||||
#include "../../util/event.h"
|
||||
|
||||
/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */
|
||||
static const int special_regs[3][2] = {
|
||||
{ 65, PERF_REG_POWERPC_LINK },
|
||||
{ 101, PERF_REG_POWERPC_XER },
|
||||
{ 109, PERF_REG_POWERPC_CTR },
|
||||
};
|
||||
|
||||
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
|
||||
{
|
||||
struct unwind_info *ui = arg;
|
||||
struct regs_dump *user_regs = &ui->sample->user_regs;
|
||||
Dwarf_Word dwarf_regs[32], dwarf_nip;
|
||||
size_t i;
|
||||
|
||||
#define REG(r) ({ \
|
||||
Dwarf_Word val = 0; \
|
||||
perf_reg_value(&val, user_regs, PERF_REG_POWERPC_##r); \
|
||||
val; \
|
||||
})
|
||||
|
||||
dwarf_regs[0] = REG(R0);
|
||||
dwarf_regs[1] = REG(R1);
|
||||
dwarf_regs[2] = REG(R2);
|
||||
dwarf_regs[3] = REG(R3);
|
||||
dwarf_regs[4] = REG(R4);
|
||||
dwarf_regs[5] = REG(R5);
|
||||
dwarf_regs[6] = REG(R6);
|
||||
dwarf_regs[7] = REG(R7);
|
||||
dwarf_regs[8] = REG(R8);
|
||||
dwarf_regs[9] = REG(R9);
|
||||
dwarf_regs[10] = REG(R10);
|
||||
dwarf_regs[11] = REG(R11);
|
||||
dwarf_regs[12] = REG(R12);
|
||||
dwarf_regs[13] = REG(R13);
|
||||
dwarf_regs[14] = REG(R14);
|
||||
dwarf_regs[15] = REG(R15);
|
||||
dwarf_regs[16] = REG(R16);
|
||||
dwarf_regs[17] = REG(R17);
|
||||
dwarf_regs[18] = REG(R18);
|
||||
dwarf_regs[19] = REG(R19);
|
||||
dwarf_regs[20] = REG(R20);
|
||||
dwarf_regs[21] = REG(R21);
|
||||
dwarf_regs[22] = REG(R22);
|
||||
dwarf_regs[23] = REG(R23);
|
||||
dwarf_regs[24] = REG(R24);
|
||||
dwarf_regs[25] = REG(R25);
|
||||
dwarf_regs[26] = REG(R26);
|
||||
dwarf_regs[27] = REG(R27);
|
||||
dwarf_regs[28] = REG(R28);
|
||||
dwarf_regs[29] = REG(R29);
|
||||
dwarf_regs[30] = REG(R30);
|
||||
dwarf_regs[31] = REG(R31);
|
||||
if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs))
|
||||
return false;
|
||||
|
||||
dwarf_nip = REG(NIP);
|
||||
dwfl_thread_state_register_pc(thread, dwarf_nip);
|
||||
for (i = 0; i < ARRAY_SIZE(special_regs); i++) {
|
||||
Dwarf_Word val = 0;
|
||||
perf_reg_value(&val, user_regs, special_regs[i][1]);
|
||||
if (!dwfl_thread_state_registers(thread,
|
||||
special_regs[i][0], 1,
|
||||
&val))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
@ -192,6 +192,7 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
|
||||
int psb_cyc, psb_periods, psb_period;
|
||||
int pos = 0;
|
||||
u64 config;
|
||||
char c;
|
||||
|
||||
pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
|
||||
|
||||
@ -225,6 +226,10 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
|
||||
}
|
||||
}
|
||||
|
||||
if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
|
||||
perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
|
||||
pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
|
||||
|
||||
pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
|
||||
|
||||
intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
|
||||
|
@ -385,7 +385,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
|
||||
*/
|
||||
if (!evsel && output[j].user_set && !output[j].wildcard_set) {
|
||||
pr_err("%s events do not exist. "
|
||||
"Remove corresponding -f option to proceed.\n",
|
||||
"Remove corresponding -F option to proceed.\n",
|
||||
event_type(j));
|
||||
return -1;
|
||||
}
|
||||
|
@ -86,6 +86,7 @@
|
||||
#define DEFAULT_SEPARATOR " "
|
||||
#define CNTR_NOT_SUPPORTED "<not supported>"
|
||||
#define CNTR_NOT_COUNTED "<not counted>"
|
||||
#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
|
||||
|
||||
static void print_counters(struct timespec *ts, int argc, const char **argv);
|
||||
|
||||
@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const char *smi_cost_attrs = {
|
||||
"{"
|
||||
"msr/aperf/,"
|
||||
"msr/smi/,"
|
||||
"cycles"
|
||||
"}"
|
||||
};
|
||||
|
||||
static struct perf_evlist *evsel_list;
|
||||
|
||||
static struct target target = {
|
||||
@ -137,6 +146,8 @@ static bool null_run = false;
|
||||
static int detailed_run = 0;
|
||||
static bool transaction_run;
|
||||
static bool topdown_run = false;
|
||||
static bool smi_cost = false;
|
||||
static bool smi_reset = false;
|
||||
static bool big_num = true;
|
||||
static int big_num_opt = -1;
|
||||
static const char *csv_sep = NULL;
|
||||
@ -1782,6 +1793,8 @@ static const struct option stat_options[] = {
|
||||
"Only print computed metrics. No raw values", enable_metric_only),
|
||||
OPT_BOOLEAN(0, "topdown", &topdown_run,
|
||||
"measure topdown level 1 statistics"),
|
||||
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
|
||||
"measure SMI cost"),
|
||||
OPT_END()
|
||||
};
|
||||
|
||||
@ -2160,6 +2173,39 @@ static int add_default_attributes(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (smi_cost) {
|
||||
int smi;
|
||||
|
||||
if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
|
||||
fprintf(stderr, "freeze_on_smi is not supported.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!smi) {
|
||||
if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
|
||||
fprintf(stderr, "Failed to set freeze_on_smi.\n");
|
||||
return -1;
|
||||
}
|
||||
smi_reset = true;
|
||||
}
|
||||
|
||||
if (pmu_have_event("msr", "aperf") &&
|
||||
pmu_have_event("msr", "smi")) {
|
||||
if (!force_metric_only)
|
||||
metric_only = true;
|
||||
err = parse_events(evsel_list, smi_cost_attrs, NULL);
|
||||
} else {
|
||||
fprintf(stderr, "To measure SMI cost, it needs "
|
||||
"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
|
||||
return -1;
|
||||
}
|
||||
if (err) {
|
||||
fprintf(stderr, "Cannot set up SMI cost events\n");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (topdown_run) {
|
||||
char *str = NULL;
|
||||
bool warn = false;
|
||||
@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
|
||||
perf_stat__exit_aggr_mode();
|
||||
perf_evlist__free_stats(evsel_list);
|
||||
out:
|
||||
if (smi_cost && smi_reset)
|
||||
sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
|
||||
|
||||
perf_evlist__delete(evsel_list);
|
||||
return status;
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <api/fs/fs.h>
|
||||
#include <api/fs/tracing_path.h>
|
||||
#include <traceevent/event-parse.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
@ -19,6 +20,8 @@
|
||||
#include <linux/err.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include "asm/bug.h"
|
||||
#include "callchain.h"
|
||||
#include "cgroup.h"
|
||||
@ -2472,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool find_process(const char *name)
|
||||
{
|
||||
size_t len = strlen(name);
|
||||
DIR *dir;
|
||||
struct dirent *d;
|
||||
int ret = -1;
|
||||
|
||||
dir = opendir(procfs__mountpoint());
|
||||
if (!dir)
|
||||
return false;
|
||||
|
||||
/* Walk through the directory. */
|
||||
while (ret && (d = readdir(dir)) != NULL) {
|
||||
char path[PATH_MAX];
|
||||
char *data;
|
||||
size_t size;
|
||||
|
||||
if ((d->d_type != DT_DIR) ||
|
||||
!strcmp(".", d->d_name) ||
|
||||
!strcmp("..", d->d_name))
|
||||
continue;
|
||||
|
||||
scnprintf(path, sizeof(path), "%s/%s/comm",
|
||||
procfs__mountpoint(), d->d_name);
|
||||
|
||||
if (filename__read_str(path, &data, &size))
|
||||
continue;
|
||||
|
||||
ret = strncmp(name, data, len);
|
||||
free(data);
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
return ret ? false : true;
|
||||
}
|
||||
|
||||
int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
|
||||
int err, char *msg, size_t size)
|
||||
{
|
||||
|
@ -64,6 +64,25 @@ enum intel_pt_pkt_state {
|
||||
INTEL_PT_STATE_FUP_NO_TIP,
|
||||
};
|
||||
|
||||
static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
|
||||
{
|
||||
switch (pkt_state) {
|
||||
case INTEL_PT_STATE_NO_PSB:
|
||||
case INTEL_PT_STATE_NO_IP:
|
||||
case INTEL_PT_STATE_ERR_RESYNC:
|
||||
case INTEL_PT_STATE_IN_SYNC:
|
||||
case INTEL_PT_STATE_TNT:
|
||||
return true;
|
||||
case INTEL_PT_STATE_TIP:
|
||||
case INTEL_PT_STATE_TIP_PGD:
|
||||
case INTEL_PT_STATE_FUP:
|
||||
case INTEL_PT_STATE_FUP_NO_TIP:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
};
|
||||
}
|
||||
|
||||
#ifdef INTEL_PT_STRICT
|
||||
#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
|
||||
#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
|
||||
@ -87,11 +106,13 @@ struct intel_pt_decoder {
|
||||
const unsigned char *buf;
|
||||
size_t len;
|
||||
bool return_compression;
|
||||
bool branch_enable;
|
||||
bool mtc_insn;
|
||||
bool pge;
|
||||
bool have_tma;
|
||||
bool have_cyc;
|
||||
bool fixup_last_mtc;
|
||||
bool have_last_ip;
|
||||
uint64_t pos;
|
||||
uint64_t last_ip;
|
||||
uint64_t ip;
|
||||
@ -99,6 +120,7 @@ struct intel_pt_decoder {
|
||||
uint64_t timestamp;
|
||||
uint64_t tsc_timestamp;
|
||||
uint64_t ref_timestamp;
|
||||
uint64_t sample_timestamp;
|
||||
uint64_t ret_addr;
|
||||
uint64_t ctc_timestamp;
|
||||
uint64_t ctc_delta;
|
||||
@ -119,6 +141,7 @@ struct intel_pt_decoder {
|
||||
int pkt_len;
|
||||
int last_packet_type;
|
||||
unsigned int cbr;
|
||||
unsigned int cbr_seen;
|
||||
unsigned int max_non_turbo_ratio;
|
||||
double max_non_turbo_ratio_fp;
|
||||
double cbr_cyc_to_tsc;
|
||||
@ -136,9 +159,18 @@ struct intel_pt_decoder {
|
||||
bool continuous_period;
|
||||
bool overflow;
|
||||
bool set_fup_tx_flags;
|
||||
bool set_fup_ptw;
|
||||
bool set_fup_mwait;
|
||||
bool set_fup_pwre;
|
||||
bool set_fup_exstop;
|
||||
unsigned int fup_tx_flags;
|
||||
unsigned int tx_flags;
|
||||
uint64_t fup_ptw_payload;
|
||||
uint64_t fup_mwait_payload;
|
||||
uint64_t fup_pwre_payload;
|
||||
uint64_t cbr_payload;
|
||||
uint64_t timestamp_insn_cnt;
|
||||
uint64_t sample_insn_cnt;
|
||||
uint64_t stuck_ip;
|
||||
int no_progress;
|
||||
int stuck_ip_prd;
|
||||
@ -192,6 +224,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
|
||||
decoder->pgd_ip = params->pgd_ip;
|
||||
decoder->data = params->data;
|
||||
decoder->return_compression = params->return_compression;
|
||||
decoder->branch_enable = params->branch_enable;
|
||||
|
||||
decoder->period = params->period;
|
||||
decoder->period_type = params->period_type;
|
||||
@ -398,6 +431,7 @@ static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
|
||||
static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
|
||||
{
|
||||
decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
|
||||
decoder->have_last_ip = true;
|
||||
}
|
||||
|
||||
static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
|
||||
@ -635,6 +669,8 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
|
||||
case INTEL_PT_PAD:
|
||||
case INTEL_PT_VMCS:
|
||||
case INTEL_PT_MNT:
|
||||
case INTEL_PT_PTWRITE:
|
||||
case INTEL_PT_PTWRITE_IP:
|
||||
return 0;
|
||||
|
||||
case INTEL_PT_MTC:
|
||||
@ -733,6 +769,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
|
||||
|
||||
case INTEL_PT_TIP_PGD:
|
||||
case INTEL_PT_TRACESTOP:
|
||||
case INTEL_PT_EXSTOP:
|
||||
case INTEL_PT_EXSTOP_IP:
|
||||
case INTEL_PT_MWAIT:
|
||||
case INTEL_PT_PWRE:
|
||||
case INTEL_PT_PWRX:
|
||||
case INTEL_PT_OVF:
|
||||
case INTEL_PT_BAD: /* Does not happen */
|
||||
default:
|
||||
@ -898,6 +939,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
|
||||
|
||||
decoder->tot_insn_cnt += insn_cnt;
|
||||
decoder->timestamp_insn_cnt += insn_cnt;
|
||||
decoder->sample_insn_cnt += insn_cnt;
|
||||
decoder->period_insn_cnt += insn_cnt;
|
||||
|
||||
if (err) {
|
||||
@ -990,6 +1032,57 @@ out_no_progress:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
if (decoder->set_fup_tx_flags) {
|
||||
decoder->set_fup_tx_flags = false;
|
||||
decoder->tx_flags = decoder->fup_tx_flags;
|
||||
decoder->state.type = INTEL_PT_TRANSACTION;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
decoder->state.flags = decoder->fup_tx_flags;
|
||||
return true;
|
||||
}
|
||||
if (decoder->set_fup_ptw) {
|
||||
decoder->set_fup_ptw = false;
|
||||
decoder->state.type = INTEL_PT_PTW;
|
||||
decoder->state.flags |= INTEL_PT_FUP_IP;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
decoder->state.ptw_payload = decoder->fup_ptw_payload;
|
||||
return true;
|
||||
}
|
||||
if (decoder->set_fup_mwait) {
|
||||
decoder->set_fup_mwait = false;
|
||||
decoder->state.type = INTEL_PT_MWAIT_OP;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
decoder->state.mwait_payload = decoder->fup_mwait_payload;
|
||||
ret = true;
|
||||
}
|
||||
if (decoder->set_fup_pwre) {
|
||||
decoder->set_fup_pwre = false;
|
||||
decoder->state.type |= INTEL_PT_PWR_ENTRY;
|
||||
decoder->state.type &= ~INTEL_PT_BRANCH;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
decoder->state.pwre_payload = decoder->fup_pwre_payload;
|
||||
ret = true;
|
||||
}
|
||||
if (decoder->set_fup_exstop) {
|
||||
decoder->set_fup_exstop = false;
|
||||
decoder->state.type |= INTEL_PT_EX_STOP;
|
||||
decoder->state.type &= ~INTEL_PT_BRANCH;
|
||||
decoder->state.flags |= INTEL_PT_FUP_IP;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
ret = true;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
|
||||
{
|
||||
struct intel_pt_insn intel_pt_insn;
|
||||
@ -1003,15 +1096,8 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
|
||||
if (err == INTEL_PT_RETURN)
|
||||
return 0;
|
||||
if (err == -EAGAIN) {
|
||||
if (decoder->set_fup_tx_flags) {
|
||||
decoder->set_fup_tx_flags = false;
|
||||
decoder->tx_flags = decoder->fup_tx_flags;
|
||||
decoder->state.type = INTEL_PT_TRANSACTION;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
decoder->state.flags = decoder->fup_tx_flags;
|
||||
if (intel_pt_fup_event(decoder))
|
||||
return 0;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
decoder->set_fup_tx_flags = false;
|
||||
@ -1360,7 +1446,9 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
|
||||
|
||||
static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
|
||||
{
|
||||
unsigned int cbr = decoder->packet.payload;
|
||||
unsigned int cbr = decoder->packet.payload & 0xff;
|
||||
|
||||
decoder->cbr_payload = decoder->packet.payload;
|
||||
|
||||
if (decoder->cbr == cbr)
|
||||
return;
|
||||
@ -1417,6 +1505,13 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
|
||||
case INTEL_PT_TRACESTOP:
|
||||
case INTEL_PT_BAD:
|
||||
case INTEL_PT_PSB:
|
||||
case INTEL_PT_PTWRITE:
|
||||
case INTEL_PT_PTWRITE_IP:
|
||||
case INTEL_PT_EXSTOP:
|
||||
case INTEL_PT_EXSTOP_IP:
|
||||
case INTEL_PT_MWAIT:
|
||||
case INTEL_PT_PWRE:
|
||||
case INTEL_PT_PWRX:
|
||||
decoder->have_tma = false;
|
||||
intel_pt_log("ERROR: Unexpected packet\n");
|
||||
return -EAGAIN;
|
||||
@ -1446,7 +1541,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
|
||||
|
||||
case INTEL_PT_FUP:
|
||||
decoder->pge = true;
|
||||
intel_pt_set_last_ip(decoder);
|
||||
if (decoder->packet.count)
|
||||
intel_pt_set_last_ip(decoder);
|
||||
break;
|
||||
|
||||
case INTEL_PT_MODE_TSX:
|
||||
@ -1497,6 +1593,13 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
|
||||
case INTEL_PT_MODE_TSX:
|
||||
case INTEL_PT_BAD:
|
||||
case INTEL_PT_PSBEND:
|
||||
case INTEL_PT_PTWRITE:
|
||||
case INTEL_PT_PTWRITE_IP:
|
||||
case INTEL_PT_EXSTOP:
|
||||
case INTEL_PT_EXSTOP_IP:
|
||||
case INTEL_PT_MWAIT:
|
||||
case INTEL_PT_PWRE:
|
||||
case INTEL_PT_PWRX:
|
||||
intel_pt_log("ERROR: Missing TIP after FUP\n");
|
||||
decoder->pkt_state = INTEL_PT_STATE_ERR3;
|
||||
return -ENOENT;
|
||||
@ -1625,6 +1728,15 @@ next:
|
||||
break;
|
||||
}
|
||||
intel_pt_set_last_ip(decoder);
|
||||
if (!decoder->branch_enable) {
|
||||
decoder->ip = decoder->last_ip;
|
||||
if (intel_pt_fup_event(decoder))
|
||||
return 0;
|
||||
no_tip = false;
|
||||
break;
|
||||
}
|
||||
if (decoder->set_fup_mwait)
|
||||
no_tip = true;
|
||||
err = intel_pt_walk_fup(decoder);
|
||||
if (err != -EAGAIN) {
|
||||
if (err)
|
||||
@ -1650,6 +1762,8 @@ next:
|
||||
break;
|
||||
|
||||
case INTEL_PT_PSB:
|
||||
decoder->last_ip = 0;
|
||||
decoder->have_last_ip = true;
|
||||
intel_pt_clear_stack(&decoder->stack);
|
||||
err = intel_pt_walk_psbend(decoder);
|
||||
if (err == -EAGAIN)
|
||||
@ -1696,6 +1810,16 @@ next:
|
||||
|
||||
case INTEL_PT_CBR:
|
||||
intel_pt_calc_cbr(decoder);
|
||||
if (!decoder->branch_enable &&
|
||||
decoder->cbr != decoder->cbr_seen) {
|
||||
decoder->cbr_seen = decoder->cbr;
|
||||
decoder->state.type = INTEL_PT_CBR_CHG;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
decoder->state.cbr_payload =
|
||||
decoder->packet.payload;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case INTEL_PT_MODE_EXEC:
|
||||
@ -1722,6 +1846,71 @@ next:
|
||||
case INTEL_PT_PAD:
|
||||
break;
|
||||
|
||||
case INTEL_PT_PTWRITE_IP:
|
||||
decoder->fup_ptw_payload = decoder->packet.payload;
|
||||
err = intel_pt_get_next_packet(decoder);
|
||||
if (err)
|
||||
return err;
|
||||
if (decoder->packet.type == INTEL_PT_FUP) {
|
||||
decoder->set_fup_ptw = true;
|
||||
no_tip = true;
|
||||
} else {
|
||||
intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
|
||||
decoder->pos);
|
||||
}
|
||||
goto next;
|
||||
|
||||
case INTEL_PT_PTWRITE:
|
||||
decoder->state.type = INTEL_PT_PTW;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
decoder->state.ptw_payload = decoder->packet.payload;
|
||||
return 0;
|
||||
|
||||
case INTEL_PT_MWAIT:
|
||||
decoder->fup_mwait_payload = decoder->packet.payload;
|
||||
decoder->set_fup_mwait = true;
|
||||
break;
|
||||
|
||||
case INTEL_PT_PWRE:
|
||||
if (decoder->set_fup_mwait) {
|
||||
decoder->fup_pwre_payload =
|
||||
decoder->packet.payload;
|
||||
decoder->set_fup_pwre = true;
|
||||
break;
|
||||
}
|
||||
decoder->state.type = INTEL_PT_PWR_ENTRY;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
decoder->state.pwrx_payload = decoder->packet.payload;
|
||||
return 0;
|
||||
|
||||
case INTEL_PT_EXSTOP_IP:
|
||||
err = intel_pt_get_next_packet(decoder);
|
||||
if (err)
|
||||
return err;
|
||||
if (decoder->packet.type == INTEL_PT_FUP) {
|
||||
decoder->set_fup_exstop = true;
|
||||
no_tip = true;
|
||||
} else {
|
||||
intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
|
||||
decoder->pos);
|
||||
}
|
||||
goto next;
|
||||
|
||||
case INTEL_PT_EXSTOP:
|
||||
decoder->state.type = INTEL_PT_EX_STOP;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
return 0;
|
||||
|
||||
case INTEL_PT_PWRX:
|
||||
decoder->state.type = INTEL_PT_PWR_EXIT;
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->state.to_ip = 0;
|
||||
decoder->state.pwrx_payload = decoder->packet.payload;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return intel_pt_bug(decoder);
|
||||
}
|
||||
@ -1730,8 +1919,9 @@ next:
|
||||
|
||||
static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
|
||||
{
|
||||
return decoder->last_ip || decoder->packet.count == 0 ||
|
||||
decoder->packet.count == 3 || decoder->packet.count == 6;
|
||||
return decoder->packet.count &&
|
||||
(decoder->have_last_ip || decoder->packet.count == 3 ||
|
||||
decoder->packet.count == 6);
|
||||
}
|
||||
|
||||
/* Walk PSB+ packets to get in sync. */
|
||||
@ -1750,6 +1940,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
|
||||
__fallthrough;
|
||||
case INTEL_PT_TIP_PGE:
|
||||
case INTEL_PT_TIP:
|
||||
case INTEL_PT_PTWRITE:
|
||||
case INTEL_PT_PTWRITE_IP:
|
||||
case INTEL_PT_EXSTOP:
|
||||
case INTEL_PT_EXSTOP_IP:
|
||||
case INTEL_PT_MWAIT:
|
||||
case INTEL_PT_PWRE:
|
||||
case INTEL_PT_PWRX:
|
||||
intel_pt_log("ERROR: Unexpected packet\n");
|
||||
return -ENOENT;
|
||||
|
||||
@ -1854,14 +2051,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
|
||||
break;
|
||||
|
||||
case INTEL_PT_FUP:
|
||||
if (decoder->overflow) {
|
||||
if (intel_pt_have_ip(decoder))
|
||||
intel_pt_set_ip(decoder);
|
||||
if (decoder->ip)
|
||||
return 0;
|
||||
}
|
||||
if (decoder->packet.count)
|
||||
intel_pt_set_last_ip(decoder);
|
||||
if (intel_pt_have_ip(decoder))
|
||||
intel_pt_set_ip(decoder);
|
||||
if (decoder->ip)
|
||||
return 0;
|
||||
break;
|
||||
|
||||
case INTEL_PT_MTC:
|
||||
@ -1910,6 +2103,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
|
||||
break;
|
||||
|
||||
case INTEL_PT_PSB:
|
||||
decoder->last_ip = 0;
|
||||
decoder->have_last_ip = true;
|
||||
intel_pt_clear_stack(&decoder->stack);
|
||||
err = intel_pt_walk_psb(decoder);
|
||||
if (err)
|
||||
return err;
|
||||
@ -1925,6 +2121,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
|
||||
case INTEL_PT_VMCS:
|
||||
case INTEL_PT_MNT:
|
||||
case INTEL_PT_PAD:
|
||||
case INTEL_PT_PTWRITE:
|
||||
case INTEL_PT_PTWRITE_IP:
|
||||
case INTEL_PT_EXSTOP:
|
||||
case INTEL_PT_EXSTOP_IP:
|
||||
case INTEL_PT_MWAIT:
|
||||
case INTEL_PT_PWRE:
|
||||
case INTEL_PT_PWRX:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1935,6 +2138,19 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
|
||||
{
|
||||
int err;
|
||||
|
||||
decoder->set_fup_tx_flags = false;
|
||||
decoder->set_fup_ptw = false;
|
||||
decoder->set_fup_mwait = false;
|
||||
decoder->set_fup_pwre = false;
|
||||
decoder->set_fup_exstop = false;
|
||||
|
||||
if (!decoder->branch_enable) {
|
||||
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
|
||||
decoder->overflow = false;
|
||||
decoder->state.type = 0; /* Do not have a sample */
|
||||
return 0;
|
||||
}
|
||||
|
||||
intel_pt_log("Scanning for full IP\n");
|
||||
err = intel_pt_walk_to_ip(decoder);
|
||||
if (err)
|
||||
@ -2043,6 +2259,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
|
||||
|
||||
decoder->pge = false;
|
||||
decoder->continuous_period = false;
|
||||
decoder->have_last_ip = false;
|
||||
decoder->last_ip = 0;
|
||||
decoder->ip = 0;
|
||||
intel_pt_clear_stack(&decoder->stack);
|
||||
@ -2051,6 +2268,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
decoder->have_last_ip = true;
|
||||
decoder->pkt_state = INTEL_PT_STATE_NO_IP;
|
||||
|
||||
err = intel_pt_walk_psb(decoder);
|
||||
@ -2069,7 +2287,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
|
||||
|
||||
static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
|
||||
{
|
||||
uint64_t est = decoder->timestamp_insn_cnt << 1;
|
||||
uint64_t est = decoder->sample_insn_cnt << 1;
|
||||
|
||||
if (!decoder->cbr || !decoder->max_non_turbo_ratio)
|
||||
goto out;
|
||||
@ -2077,7 +2295,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
|
||||
est *= decoder->max_non_turbo_ratio;
|
||||
est /= decoder->cbr;
|
||||
out:
|
||||
return decoder->timestamp + est;
|
||||
return decoder->sample_timestamp + est;
|
||||
}
|
||||
|
||||
const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
|
||||
@ -2093,8 +2311,10 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
|
||||
err = intel_pt_sync(decoder);
|
||||
break;
|
||||
case INTEL_PT_STATE_NO_IP:
|
||||
decoder->have_last_ip = false;
|
||||
decoder->last_ip = 0;
|
||||
/* Fall through */
|
||||
decoder->ip = 0;
|
||||
__fallthrough;
|
||||
case INTEL_PT_STATE_ERR_RESYNC:
|
||||
err = intel_pt_sync_ip(decoder);
|
||||
break;
|
||||
@ -2130,15 +2350,29 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
|
||||
}
|
||||
} while (err == -ENOLINK);
|
||||
|
||||
decoder->state.err = err ? intel_pt_ext_err(err) : 0;
|
||||
decoder->state.timestamp = decoder->timestamp;
|
||||
if (err) {
|
||||
decoder->state.err = intel_pt_ext_err(err);
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
decoder->sample_timestamp = decoder->timestamp;
|
||||
decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
|
||||
} else {
|
||||
decoder->state.err = 0;
|
||||
if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
|
||||
decoder->cbr_seen = decoder->cbr;
|
||||
decoder->state.type |= INTEL_PT_CBR_CHG;
|
||||
decoder->state.cbr_payload = decoder->cbr_payload;
|
||||
}
|
||||
if (intel_pt_sample_time(decoder->pkt_state)) {
|
||||
decoder->sample_timestamp = decoder->timestamp;
|
||||
decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
|
||||
}
|
||||
}
|
||||
|
||||
decoder->state.timestamp = decoder->sample_timestamp;
|
||||
decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
|
||||
decoder->state.cr3 = decoder->cr3;
|
||||
decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
|
||||
|
||||
if (err)
|
||||
decoder->state.from_ip = decoder->ip;
|
||||
|
||||
return &decoder->state;
|
||||
}
|
||||
|
||||
|
@ -25,11 +25,18 @@
|
||||
#define INTEL_PT_IN_TX (1 << 0)
|
||||
#define INTEL_PT_ABORT_TX (1 << 1)
|
||||
#define INTEL_PT_ASYNC (1 << 2)
|
||||
#define INTEL_PT_FUP_IP (1 << 3)
|
||||
|
||||
enum intel_pt_sample_type {
|
||||
INTEL_PT_BRANCH = 1 << 0,
|
||||
INTEL_PT_INSTRUCTION = 1 << 1,
|
||||
INTEL_PT_TRANSACTION = 1 << 2,
|
||||
INTEL_PT_PTW = 1 << 3,
|
||||
INTEL_PT_MWAIT_OP = 1 << 4,
|
||||
INTEL_PT_PWR_ENTRY = 1 << 5,
|
||||
INTEL_PT_EX_STOP = 1 << 6,
|
||||
INTEL_PT_PWR_EXIT = 1 << 7,
|
||||
INTEL_PT_CBR_CHG = 1 << 8,
|
||||
};
|
||||
|
||||
enum intel_pt_period_type {
|
||||
@ -63,6 +70,11 @@ struct intel_pt_state {
|
||||
uint64_t timestamp;
|
||||
uint64_t est_timestamp;
|
||||
uint64_t trace_nr;
|
||||
uint64_t ptw_payload;
|
||||
uint64_t mwait_payload;
|
||||
uint64_t pwre_payload;
|
||||
uint64_t pwrx_payload;
|
||||
uint64_t cbr_payload;
|
||||
uint32_t flags;
|
||||
enum intel_pt_insn_op insn_op;
|
||||
int insn_len;
|
||||
@ -87,6 +99,7 @@ struct intel_pt_params {
|
||||
bool (*pgd_ip)(uint64_t ip, void *data);
|
||||
void *data;
|
||||
bool return_compression;
|
||||
bool branch_enable;
|
||||
uint64_t period;
|
||||
enum intel_pt_period_type period_type;
|
||||
unsigned max_non_turbo_ratio;
|
||||
|
@ -64,6 +64,13 @@ static const char * const packet_name[] = {
|
||||
[INTEL_PT_PIP] = "PIP",
|
||||
[INTEL_PT_OVF] = "OVF",
|
||||
[INTEL_PT_MNT] = "MNT",
|
||||
[INTEL_PT_PTWRITE] = "PTWRITE",
|
||||
[INTEL_PT_PTWRITE_IP] = "PTWRITE",
|
||||
[INTEL_PT_EXSTOP] = "EXSTOP",
|
||||
[INTEL_PT_EXSTOP_IP] = "EXSTOP",
|
||||
[INTEL_PT_MWAIT] = "MWAIT",
|
||||
[INTEL_PT_PWRE] = "PWRE",
|
||||
[INTEL_PT_PWRX] = "PWRX",
|
||||
};
|
||||
|
||||
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
|
||||
@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
|
||||
if (len < 4)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->type = INTEL_PT_CBR;
|
||||
packet->payload = buf[2];
|
||||
packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
|
||||
return 4;
|
||||
}
|
||||
|
||||
@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
|
||||
}
|
||||
}
|
||||
|
||||
static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
packet->count = (buf[1] >> 5) & 0x3;
|
||||
packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
|
||||
INTEL_PT_PTWRITE;
|
||||
|
||||
switch (packet->count) {
|
||||
case 0:
|
||||
if (len < 6)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
|
||||
return 6;
|
||||
case 1:
|
||||
if (len < 10)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
|
||||
return 10;
|
||||
default:
|
||||
return INTEL_PT_BAD_PACKET;
|
||||
}
|
||||
}
|
||||
|
||||
static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
|
||||
{
|
||||
packet->type = INTEL_PT_EXSTOP;
|
||||
return 2;
|
||||
}
|
||||
|
||||
static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
|
||||
{
|
||||
packet->type = INTEL_PT_EXSTOP_IP;
|
||||
return 2;
|
||||
}
|
||||
|
||||
static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
if (len < 10)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->type = INTEL_PT_MWAIT;
|
||||
packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
|
||||
return 10;
|
||||
}
|
||||
|
||||
static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
if (len < 4)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->type = INTEL_PT_PWRE;
|
||||
memcpy_le64(&packet->payload, buf + 2, 2);
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
if (len < 7)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
packet->type = INTEL_PT_PWRX;
|
||||
memcpy_le64(&packet->payload, buf + 2, 5);
|
||||
return 7;
|
||||
}
|
||||
|
||||
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
|
||||
struct intel_pt_pkt *packet)
|
||||
{
|
||||
if (len < 2)
|
||||
return INTEL_PT_NEED_MORE_BYTES;
|
||||
|
||||
if ((buf[1] & 0x1f) == 0x12)
|
||||
return intel_pt_get_ptwrite(buf, len, packet);
|
||||
|
||||
switch (buf[1]) {
|
||||
case 0xa3: /* Long TNT */
|
||||
return intel_pt_get_long_tnt(buf, len, packet);
|
||||
@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
|
||||
return intel_pt_get_tma(buf, len, packet);
|
||||
case 0xC3: /* 3-byte header */
|
||||
return intel_pt_get_3byte(buf, len, packet);
|
||||
case 0x62: /* EXSTOP no IP */
|
||||
return intel_pt_get_exstop(packet);
|
||||
case 0xE2: /* EXSTOP with IP */
|
||||
return intel_pt_get_exstop_ip(packet);
|
||||
case 0xC2: /* MWAIT */
|
||||
return intel_pt_get_mwait(buf, len, packet);
|
||||
case 0x22: /* PWRE */
|
||||
return intel_pt_get_pwre(buf, len, packet);
|
||||
case 0xA2: /* PWRX */
|
||||
return intel_pt_get_pwrx(buf, len, packet);
|
||||
default:
|
||||
return INTEL_PT_BAD_PACKET;
|
||||
}
|
||||
@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
|
||||
ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
|
||||
name, payload, nr);
|
||||
return ret;
|
||||
case INTEL_PT_PTWRITE:
|
||||
return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
|
||||
case INTEL_PT_PTWRITE_IP:
|
||||
return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
|
||||
case INTEL_PT_EXSTOP:
|
||||
return snprintf(buf, buf_len, "%s IP:0", name);
|
||||
case INTEL_PT_EXSTOP_IP:
|
||||
return snprintf(buf, buf_len, "%s IP:1", name);
|
||||
case INTEL_PT_MWAIT:
|
||||
return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
|
||||
name, payload, (unsigned int)(payload & 0xff),
|
||||
(unsigned int)((payload >> 32) & 0x3));
|
||||
case INTEL_PT_PWRE:
|
||||
return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
|
||||
name, payload, !!(payload & 0x80),
|
||||
(unsigned int)((payload >> 12) & 0xf),
|
||||
(unsigned int)((payload >> 8) & 0xf));
|
||||
case INTEL_PT_PWRX:
|
||||
return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
|
||||
name, payload,
|
||||
(unsigned int)((payload >> 4) & 0xf),
|
||||
(unsigned int)(payload & 0xf),
|
||||
(unsigned int)((payload >> 8) & 0xf));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -52,6 +52,13 @@ enum intel_pt_pkt_type {
|
||||
INTEL_PT_PIP,
|
||||
INTEL_PT_OVF,
|
||||
INTEL_PT_MNT,
|
||||
INTEL_PT_PTWRITE,
|
||||
INTEL_PT_PTWRITE_IP,
|
||||
INTEL_PT_EXSTOP,
|
||||
INTEL_PT_EXSTOP_IP,
|
||||
INTEL_PT_MWAIT,
|
||||
INTEL_PT_PWRE,
|
||||
INTEL_PT_PWRX,
|
||||
};
|
||||
|
||||
struct intel_pt_pkt {
|
||||
|
@ -668,6 +668,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool intel_pt_branch_enable(struct intel_pt *pt)
|
||||
{
|
||||
struct perf_evsel *evsel;
|
||||
u64 config;
|
||||
|
||||
evlist__for_each_entry(pt->session->evlist, evsel) {
|
||||
if (intel_pt_get_config(pt, &evsel->attr, &config) &&
|
||||
(config & 1) && !(config & 0x2000))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
|
||||
{
|
||||
struct perf_evsel *evsel;
|
||||
@ -799,6 +812,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
|
||||
params.walk_insn = intel_pt_walk_next_insn;
|
||||
params.data = ptq;
|
||||
params.return_compression = intel_pt_return_compression(pt);
|
||||
params.branch_enable = intel_pt_branch_enable(pt);
|
||||
params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
|
||||
params.mtc_period = intel_pt_mtc_period(pt);
|
||||
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
|
||||
@ -1308,18 +1322,14 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
|
||||
ptq->have_sample = false;
|
||||
|
||||
if (pt->sample_instructions &&
|
||||
(state->type & INTEL_PT_INSTRUCTION) &&
|
||||
(!pt->synth_opts.initial_skip ||
|
||||
pt->num_events++ >= pt->synth_opts.initial_skip)) {
|
||||
(state->type & INTEL_PT_INSTRUCTION)) {
|
||||
err = intel_pt_synth_instruction_sample(ptq);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (pt->sample_transactions &&
|
||||
(state->type & INTEL_PT_TRANSACTION) &&
|
||||
(!pt->synth_opts.initial_skip ||
|
||||
pt->num_events++ >= pt->synth_opts.initial_skip)) {
|
||||
(state->type & INTEL_PT_TRANSACTION)) {
|
||||
err = intel_pt_synth_transaction_sample(ptq);
|
||||
if (err)
|
||||
return err;
|
||||
@ -2025,6 +2035,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
|
||||
return err;
|
||||
}
|
||||
pt->sample_transactions = true;
|
||||
pt->transactions_sample_type = attr.sample_type;
|
||||
pt->transactions_id = id;
|
||||
id += 1;
|
||||
evlist__for_each_entry(evlist, evsel) {
|
||||
|
@ -2035,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
|
||||
|
||||
if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
|
||||
pr_err("File does not contain CPU events. "
|
||||
"Remove -c option to proceed.\n");
|
||||
"Remove -C option to proceed.\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
|
||||
static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
|
||||
static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
|
||||
static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
|
||||
static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
|
||||
static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
|
||||
static struct rblist runtime_saved_values;
|
||||
static bool have_frontend_stalled;
|
||||
|
||||
@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
|
||||
memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
|
||||
memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
|
||||
memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
|
||||
memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
|
||||
memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
|
||||
|
||||
next = rb_first(&runtime_saved_values.entries);
|
||||
while (next) {
|
||||
@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
|
||||
update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
|
||||
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
|
||||
update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
|
||||
else if (perf_stat_evsel__is(counter, SMI_NUM))
|
||||
update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
|
||||
else if (perf_stat_evsel__is(counter, APERF))
|
||||
update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
|
||||
|
||||
if (counter->collect_stat) {
|
||||
struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
|
||||
@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
|
||||
return sanitize_val(1.0 - sum);
|
||||
}
|
||||
|
||||
static void print_smi_cost(int cpu, struct perf_evsel *evsel,
|
||||
struct perf_stat_output_ctx *out)
|
||||
{
|
||||
double smi_num, aperf, cycles, cost = 0.0;
|
||||
int ctx = evsel_context(evsel);
|
||||
const char *color = NULL;
|
||||
|
||||
smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
|
||||
aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
|
||||
cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
|
||||
|
||||
if ((cycles == 0) || (aperf == 0))
|
||||
return;
|
||||
|
||||
if (smi_num)
|
||||
cost = (aperf - cycles) / aperf * 100.00;
|
||||
|
||||
if (cost > 10)
|
||||
color = PERF_COLOR_RED;
|
||||
out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
|
||||
out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
|
||||
}
|
||||
|
||||
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
|
||||
double avg, int cpu,
|
||||
struct perf_stat_output_ctx *out)
|
||||
@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
|
||||
}
|
||||
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
|
||||
print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
|
||||
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
|
||||
print_smi_cost(cpu, evsel, out);
|
||||
} else {
|
||||
print_metric(ctxp, NULL, NULL, NULL, 0);
|
||||
}
|
||||
|
@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
|
||||
ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
|
||||
ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
|
||||
ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
|
||||
ID(SMI_NUM, msr/smi/),
|
||||
ID(APERF, msr/aperf/),
|
||||
};
|
||||
#undef ID
|
||||
|
||||
|
@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
|
||||
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
|
||||
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
|
||||
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
|
||||
PERF_STAT_EVSEL_ID__SMI_NUM,
|
||||
PERF_STAT_EVSEL_ID__APERF,
|
||||
PERF_STAT_EVSEL_ID__MAX,
|
||||
};
|
||||
|
||||
|
@ -343,43 +343,6 @@ int perf_event_paranoid(void)
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
bool find_process(const char *name)
|
||||
{
|
||||
size_t len = strlen(name);
|
||||
DIR *dir;
|
||||
struct dirent *d;
|
||||
int ret = -1;
|
||||
|
||||
dir = opendir(procfs__mountpoint());
|
||||
if (!dir)
|
||||
return false;
|
||||
|
||||
/* Walk through the directory. */
|
||||
while (ret && (d = readdir(dir)) != NULL) {
|
||||
char path[PATH_MAX];
|
||||
char *data;
|
||||
size_t size;
|
||||
|
||||
if ((d->d_type != DT_DIR) ||
|
||||
!strcmp(".", d->d_name) ||
|
||||
!strcmp("..", d->d_name))
|
||||
continue;
|
||||
|
||||
scnprintf(path, sizeof(path), "%s/%s/comm",
|
||||
procfs__mountpoint(), d->d_name);
|
||||
|
||||
if (filename__read_str(path, &data, &size))
|
||||
continue;
|
||||
|
||||
ret = strncmp(name, data, len);
|
||||
free(data);
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
return ret ? false : true;
|
||||
}
|
||||
|
||||
static int
|
||||
fetch_ubuntu_kernel_version(unsigned int *puint)
|
||||
{
|
||||
@ -387,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
|
||||
size_t line_len = 0;
|
||||
char *ptr, *line = NULL;
|
||||
int version, patchlevel, sublevel, err;
|
||||
FILE *vsig = fopen("/proc/version_signature", "r");
|
||||
FILE *vsig;
|
||||
|
||||
if (!puint)
|
||||
return 0;
|
||||
|
||||
vsig = fopen("/proc/version_signature", "r");
|
||||
if (!vsig) {
|
||||
pr_debug("Open /proc/version_signature failed: %s\n",
|
||||
strerror(errno));
|
||||
@ -418,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
|
||||
goto errout;
|
||||
}
|
||||
|
||||
if (puint)
|
||||
*puint = (version << 16) + (patchlevel << 8) + sublevel;
|
||||
*puint = (version << 16) + (patchlevel << 8) + sublevel;
|
||||
err = 0;
|
||||
errout:
|
||||
free(line);
|
||||
@ -446,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str,
|
||||
str[str_size - 1] = '\0';
|
||||
}
|
||||
|
||||
if (!puint || int_ver_ready)
|
||||
return 0;
|
||||
|
||||
err = sscanf(utsname.release, "%d.%d.%d",
|
||||
&version, &patchlevel, &sublevel);
|
||||
|
||||
@ -455,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str,
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (puint && !int_ver_ready)
|
||||
*puint = (version << 16) + (patchlevel << 8) + sublevel;
|
||||
*puint = (version << 16) + (patchlevel << 8) + sublevel;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
#ifndef GIT_COMPAT_UTIL_H
|
||||
#define GIT_COMPAT_UTIL_H
|
||||
|
||||
#define _ALL_SOURCE 1
|
||||
#define _BSD_SOURCE 1
|
||||
/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
|
||||
#define _DEFAULT_SOURCE 1
|
||||
@ -49,8 +48,6 @@ int hex2u64(const char *ptr, u64 *val);
|
||||
extern unsigned int page_size;
|
||||
extern int cacheline_size;
|
||||
|
||||
bool find_process(const char *name);
|
||||
|
||||
int fetch_kernel_version(unsigned int *puint,
|
||||
char *str, size_t str_sz);
|
||||
#define KVER_VERSION(x) (((x) >> 16) & 0xff)
|
||||
|
Loading…
Reference in New Issue
Block a user