perf/core improvements ad fixes:

New features:
 
 - Add support to measure SMI cost in 'perf stat' (Kan Liang)
 
 - Add support for unwinding callchains in powerpc with libdw (Paolo Bonzini)
 
 Fixes:
 
 - Fix message: cpu list option is -C not -c (Adrian Hunter)
 
 - Fix 'perf script' message: field list option is -F not -f (Adrian Hunter)
 
 - Intel PT fixes: (Adrian Hunter)
 
   o Fix missing stack clear
   o Ensure IP is zero when state is INTEL_PT_STATE_NO_IP
   o Fix last_ip usage
   o Ensure never to set 'last_ip' when packet 'count' is zero
   o Clear FUP flag on error
   o Fix transactions_sample_type
 
 Infrastructure:
 
 - Intel PT cleanups/refactorings (Adrian Hunter)
 
   o Use FUP always when scanning for an IP
   o Add missing __fallthrough
   o Remove redundant initial_skip checks
   o Allow decoding with branch tracing disabled
   o Add default config for pass-through branch enable
   o Add documentation for new config terms
   o Add decoder support for ptwrite and power event packets
   o Add reserved byte to CBR packet payload
   o Add decoder support for CBR events
 
 - Move  find_process() to the only place that uses it, skimming some
   more fat from util.[ch] (Arnaldo Carvalho de Melo)
 
 - Do parameter validation earlier on fetch_kernel_version() (Arnaldo Carvalho de Melo)
 
 - Remove unused _ALL_SOURCE define (Arnaldo Carvalho de Melo)
 
 - Add sysfs__write_int function (Kan Liang)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQIcBAABCAAGBQJZSouZAAoJENZQFvNTUqpAzIIP/jlEKCAym9EtGAmsOBxvAIiQ
 ocoV0g8/bRW+Dr8TGt7xbZNswBHCk/4e7ap+y9/NtME4KIwuZMovf8h/oImf+C1z
 ULiomjLEcJy9CQoWRDIe/kUK0LKmIVNVeNqIxisEukZmRhTW6GkpOeO3/BMALDbo
 YAoXkpSEeiJEx3OcD+yLlu6e2a0JJkJa0U2ujWxBjeNDatyWHKkM043eu4kbshvc
 YrfQUeQr4soWU45c/iWVOmRG+u9R2lfxK+T0cA1PNRYMJX70qXh0/jyGumv4iRUt
 KoKjGu2oh8WiE4xwImQrAkqEbuDwBbK3dZ71ZYsainR+RMphk25AD4KEM+5rjQnu
 DD++iGcKl5fAI+llzRxEjgSOM5CJ0iIufqHq2UB6ZB3NSL8sEUu7ehnbLA1lHF75
 ABDGStB6Tis16NRKaM0hCz2OykluEOr4+RhUOUY4Nk58+vzxCigVpFSQF1B8duWC
 WcjQP0w3GXkaKcXpq75fsDeLQC9UCrGCU2bYDuY34HrYroScIGCPsHow3qKV38ml
 lFUujzQHrHxImDNwFSRAsnNLEVIy/SsUunY9QlDCZysVwMC47/W6gpquQl8rfBMb
 KLiUA2ohLbYZsFUK5SaICWePtuu5IVcCGfEgrNqbnCYT9+9kPSFIPEPMXl+F+fAq
 UWPPqIBS4tmAA1PD9mz3
 =7oUP
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-4.13-20170621' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

 - Add support to measure SMI cost in 'perf stat' (Kan Liang)

 - Add support for unwinding callchains in powerpc with libdw (Paolo Bonzini)

Fixes:

 - Fix message: cpu list option is -C not -c (Adrian Hunter)

 - Fix 'perf script' message: field list option is -F not -f (Adrian Hunter)

 - Intel PT fixes: (Adrian Hunter)

   o Fix missing stack clear
   o Ensure IP is zero when state is INTEL_PT_STATE_NO_IP
   o Fix last_ip usage
   o Ensure never to set 'last_ip' when packet 'count' is zero
   o Clear FUP flag on error
   o Fix transactions_sample_type

Infrastructure changes:

 - Intel PT cleanups/refactorings (Adrian Hunter)

   o Use FUP always when scanning for an IP
   o Add missing __fallthrough
   o Remove redundant initial_skip checks
   o Allow decoding with branch tracing disabled
   o Add default config for pass-through branch enable
   o Add documentation for new config terms
   o Add decoder support for ptwrite and power event packets
   o Add reserved byte to CBR packet payload
   o Add decoder support for CBR events

 - Move  find_process() to the only place that uses it, skimming some
   more fat from util.[ch] (Arnaldo Carvalho de Melo)

 - Do parameter validation earlier on fetch_kernel_version() (Arnaldo Carvalho de Melo)

 - Remove unused _ALL_SOURCE define (Arnaldo Carvalho de Melo)

 - Add sysfs__write_int function (Kan Liang)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2017-06-21 20:11:53 +02:00
commit 8e70e84091
22 changed files with 710 additions and 83 deletions

View File

@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
return err;
}
int filename__write_int(const char *filename, int value)
{
int fd = open(filename, O_WRONLY), err = -1;
char buf[64];
if (fd < 0)
return err;
sprintf(buf, "%d", value);
if (write(fd, buf, sizeof(buf)) == sizeof(buf))
err = 0;
close(fd);
return err;
}
int procfs__read_str(const char *entry, char **buf, size_t *sizep)
{
char path[PATH_MAX];
@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value)
return filename__read_int(path, value);
}
int sysfs__write_int(const char *entry, int value)
{
char path[PATH_MAX];
const char *sysfs = sysfs__mountpoint();
if (!sysfs)
return -1;
if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX)
return -1;
return filename__write_int(path, value);
}

View File

@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value);
int filename__read_ull(const char *filename, unsigned long long *value);
int filename__read_str(const char *filename, char **buf, size_t *sizep);
int filename__write_int(const char *filename, int value);
int procfs__read_str(const char *entry, char **buf, size_t *sizep);
int sysctl__read_int(const char *sysctl, int *value);
@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value);
int sysfs__read_ull(const char *entry, unsigned long long *value);
int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
int sysfs__read_bool(const char *entry, bool *value);
int sysfs__write_int(const char *entry, int value);
#endif /* __API_FS__ */

View File

@ -364,6 +364,42 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc
CYC packets are not requested by default.
pt Specifies pass-through which enables the 'branch' config term.
The default config selects 'pt' if it is available, so a user will
never need to specify this term.
branch Enable branch tracing. Branch tracing is enabled by default so to
disable branch tracing use 'branch=0'.
The default config selects 'branch' if it is available.
ptw Enable PTWRITE packets which are produced when a ptwrite instruction
is executed.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/ptwrite
which contains "1" if the feature is supported and
"0" otherwise.
fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
provides the address of the ptwrite instruction. In the absence of
fup_on_ptw, the decoder will use the address of the previous branch
if branch tracing is enabled, otherwise the address will be zero.
Note that fup_on_ptw will work even when branch tracing is disabled.
pwr_evt Enable power events. The power events provide information about
changes to the CPU C-state.
Support for this feature is indicated by:
/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
which contains "1" if the feature is supported and
"0" otherwise.
new snapshot option
-------------------

View File

@ -239,6 +239,20 @@ taskset.
--no-merge::
Do not merge results from same PMUs.
--smi-cost::
Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
freeze core counters on SMI.
The aperf counter will not be effected by the setting.
The cost of SMI can be measured by (aperf - unhalted core cycles).
In practice, the percentages of SMI cycles is very useful for performance
oriented analysis. --metric_only will be applied by default.
The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
Users who wants to get the actual value can apply --no-metric-only.
EXAMPLES
--------

View File

@ -61,7 +61,7 @@ endif
# Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures
# to the check.
ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm))
ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc))
NO_LIBDW_DWARF_UNWIND := 1
endif

View File

@ -5,4 +5,6 @@ libperf-y += perf_regs.o
libperf-$(CONFIG_DWARF) += dwarf-regs.o
libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o

View File

@ -0,0 +1,73 @@
#include <elfutils/libdwfl.h>
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
#include "../../util/event.h"
/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */
static const int special_regs[3][2] = {
{ 65, PERF_REG_POWERPC_LINK },
{ 101, PERF_REG_POWERPC_XER },
{ 109, PERF_REG_POWERPC_CTR },
};
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
struct unwind_info *ui = arg;
struct regs_dump *user_regs = &ui->sample->user_regs;
Dwarf_Word dwarf_regs[32], dwarf_nip;
size_t i;
#define REG(r) ({ \
Dwarf_Word val = 0; \
perf_reg_value(&val, user_regs, PERF_REG_POWERPC_##r); \
val; \
})
dwarf_regs[0] = REG(R0);
dwarf_regs[1] = REG(R1);
dwarf_regs[2] = REG(R2);
dwarf_regs[3] = REG(R3);
dwarf_regs[4] = REG(R4);
dwarf_regs[5] = REG(R5);
dwarf_regs[6] = REG(R6);
dwarf_regs[7] = REG(R7);
dwarf_regs[8] = REG(R8);
dwarf_regs[9] = REG(R9);
dwarf_regs[10] = REG(R10);
dwarf_regs[11] = REG(R11);
dwarf_regs[12] = REG(R12);
dwarf_regs[13] = REG(R13);
dwarf_regs[14] = REG(R14);
dwarf_regs[15] = REG(R15);
dwarf_regs[16] = REG(R16);
dwarf_regs[17] = REG(R17);
dwarf_regs[18] = REG(R18);
dwarf_regs[19] = REG(R19);
dwarf_regs[20] = REG(R20);
dwarf_regs[21] = REG(R21);
dwarf_regs[22] = REG(R22);
dwarf_regs[23] = REG(R23);
dwarf_regs[24] = REG(R24);
dwarf_regs[25] = REG(R25);
dwarf_regs[26] = REG(R26);
dwarf_regs[27] = REG(R27);
dwarf_regs[28] = REG(R28);
dwarf_regs[29] = REG(R29);
dwarf_regs[30] = REG(R30);
dwarf_regs[31] = REG(R31);
if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs))
return false;
dwarf_nip = REG(NIP);
dwfl_thread_state_register_pc(thread, dwarf_nip);
for (i = 0; i < ARRAY_SIZE(special_regs); i++) {
Dwarf_Word val = 0;
perf_reg_value(&val, user_regs, special_regs[i][1]);
if (!dwfl_thread_state_registers(thread,
special_regs[i][0], 1,
&val))
return false;
}
return true;
}

View File

@ -192,6 +192,7 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
int psb_cyc, psb_periods, psb_period;
int pos = 0;
u64 config;
char c;
pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
@ -225,6 +226,10 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
}
}
if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);

View File

@ -385,7 +385,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
*/
if (!evsel && output[j].user_set && !output[j].wildcard_set) {
pr_err("%s events do not exist. "
"Remove corresponding -f option to proceed.\n",
"Remove corresponding -F option to proceed.\n",
event_type(j));
return -1;
}

View File

@ -86,6 +86,7 @@
#define DEFAULT_SEPARATOR " "
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
static void print_counters(struct timespec *ts, int argc, const char **argv);
@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
NULL,
};
static const char *smi_cost_attrs = {
"{"
"msr/aperf/,"
"msr/smi/,"
"cycles"
"}"
};
static struct perf_evlist *evsel_list;
static struct target target = {
@ -137,6 +146,8 @@ static bool null_run = false;
static int detailed_run = 0;
static bool transaction_run;
static bool topdown_run = false;
static bool smi_cost = false;
static bool smi_reset = false;
static bool big_num = true;
static int big_num_opt = -1;
static const char *csv_sep = NULL;
@ -1782,6 +1793,8 @@ static const struct option stat_options[] = {
"Only print computed metrics. No raw values", enable_metric_only),
OPT_BOOLEAN(0, "topdown", &topdown_run,
"measure topdown level 1 statistics"),
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
"measure SMI cost"),
OPT_END()
};
@ -2160,6 +2173,39 @@ static int add_default_attributes(void)
return 0;
}
if (smi_cost) {
int smi;
if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
fprintf(stderr, "freeze_on_smi is not supported.\n");
return -1;
}
if (!smi) {
if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
fprintf(stderr, "Failed to set freeze_on_smi.\n");
return -1;
}
smi_reset = true;
}
if (pmu_have_event("msr", "aperf") &&
pmu_have_event("msr", "smi")) {
if (!force_metric_only)
metric_only = true;
err = parse_events(evsel_list, smi_cost_attrs, NULL);
} else {
fprintf(stderr, "To measure SMI cost, it needs "
"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
return -1;
}
if (err) {
fprintf(stderr, "Cannot set up SMI cost events\n");
return -1;
}
return 0;
}
if (topdown_run) {
char *str = NULL;
bool warn = false;
@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
perf_stat__exit_aggr_mode();
perf_evlist__free_stats(evsel_list);
out:
if (smi_cost && smi_reset)
sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
perf_evlist__delete(evsel_list);
return status;
}

View File

@ -11,6 +11,7 @@
#include <errno.h>
#include <inttypes.h>
#include <linux/bitops.h>
#include <api/fs/fs.h>
#include <api/fs/tracing_path.h>
#include <traceevent/event-parse.h>
#include <linux/hw_breakpoint.h>
@ -19,6 +20,8 @@
#include <linux/err.h>
#include <sys/ioctl.h>
#include <sys/resource.h>
#include <sys/types.h>
#include <dirent.h>
#include "asm/bug.h"
#include "callchain.h"
#include "cgroup.h"
@ -2472,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
return false;
}
static bool find_process(const char *name)
{
size_t len = strlen(name);
DIR *dir;
struct dirent *d;
int ret = -1;
dir = opendir(procfs__mountpoint());
if (!dir)
return false;
/* Walk through the directory. */
while (ret && (d = readdir(dir)) != NULL) {
char path[PATH_MAX];
char *data;
size_t size;
if ((d->d_type != DT_DIR) ||
!strcmp(".", d->d_name) ||
!strcmp("..", d->d_name))
continue;
scnprintf(path, sizeof(path), "%s/%s/comm",
procfs__mountpoint(), d->d_name);
if (filename__read_str(path, &data, &size))
continue;
ret = strncmp(name, data, len);
free(data);
}
closedir(dir);
return ret ? false : true;
}
int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
int err, char *msg, size_t size)
{

View File

@ -64,6 +64,25 @@ enum intel_pt_pkt_state {
INTEL_PT_STATE_FUP_NO_TIP,
};
static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
{
switch (pkt_state) {
case INTEL_PT_STATE_NO_PSB:
case INTEL_PT_STATE_NO_IP:
case INTEL_PT_STATE_ERR_RESYNC:
case INTEL_PT_STATE_IN_SYNC:
case INTEL_PT_STATE_TNT:
return true;
case INTEL_PT_STATE_TIP:
case INTEL_PT_STATE_TIP_PGD:
case INTEL_PT_STATE_FUP:
case INTEL_PT_STATE_FUP_NO_TIP:
return false;
default:
return true;
};
}
#ifdef INTEL_PT_STRICT
#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
@ -87,11 +106,13 @@ struct intel_pt_decoder {
const unsigned char *buf;
size_t len;
bool return_compression;
bool branch_enable;
bool mtc_insn;
bool pge;
bool have_tma;
bool have_cyc;
bool fixup_last_mtc;
bool have_last_ip;
uint64_t pos;
uint64_t last_ip;
uint64_t ip;
@ -99,6 +120,7 @@ struct intel_pt_decoder {
uint64_t timestamp;
uint64_t tsc_timestamp;
uint64_t ref_timestamp;
uint64_t sample_timestamp;
uint64_t ret_addr;
uint64_t ctc_timestamp;
uint64_t ctc_delta;
@ -119,6 +141,7 @@ struct intel_pt_decoder {
int pkt_len;
int last_packet_type;
unsigned int cbr;
unsigned int cbr_seen;
unsigned int max_non_turbo_ratio;
double max_non_turbo_ratio_fp;
double cbr_cyc_to_tsc;
@ -136,9 +159,18 @@ struct intel_pt_decoder {
bool continuous_period;
bool overflow;
bool set_fup_tx_flags;
bool set_fup_ptw;
bool set_fup_mwait;
bool set_fup_pwre;
bool set_fup_exstop;
unsigned int fup_tx_flags;
unsigned int tx_flags;
uint64_t fup_ptw_payload;
uint64_t fup_mwait_payload;
uint64_t fup_pwre_payload;
uint64_t cbr_payload;
uint64_t timestamp_insn_cnt;
uint64_t sample_insn_cnt;
uint64_t stuck_ip;
int no_progress;
int stuck_ip_prd;
@ -192,6 +224,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
decoder->pgd_ip = params->pgd_ip;
decoder->data = params->data;
decoder->return_compression = params->return_compression;
decoder->branch_enable = params->branch_enable;
decoder->period = params->period;
decoder->period_type = params->period_type;
@ -398,6 +431,7 @@ static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
{
decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
decoder->have_last_ip = true;
}
static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
@ -635,6 +669,8 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_PAD:
case INTEL_PT_VMCS:
case INTEL_PT_MNT:
case INTEL_PT_PTWRITE:
case INTEL_PT_PTWRITE_IP:
return 0;
case INTEL_PT_MTC:
@ -733,6 +769,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_TIP_PGD:
case INTEL_PT_TRACESTOP:
case INTEL_PT_EXSTOP:
case INTEL_PT_EXSTOP_IP:
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
case INTEL_PT_OVF:
case INTEL_PT_BAD: /* Does not happen */
default:
@ -898,6 +939,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
decoder->tot_insn_cnt += insn_cnt;
decoder->timestamp_insn_cnt += insn_cnt;
decoder->sample_insn_cnt += insn_cnt;
decoder->period_insn_cnt += insn_cnt;
if (err) {
@ -990,6 +1032,57 @@ out_no_progress:
return err;
}
static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
{
bool ret = false;
if (decoder->set_fup_tx_flags) {
decoder->set_fup_tx_flags = false;
decoder->tx_flags = decoder->fup_tx_flags;
decoder->state.type = INTEL_PT_TRANSACTION;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.flags = decoder->fup_tx_flags;
return true;
}
if (decoder->set_fup_ptw) {
decoder->set_fup_ptw = false;
decoder->state.type = INTEL_PT_PTW;
decoder->state.flags |= INTEL_PT_FUP_IP;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.ptw_payload = decoder->fup_ptw_payload;
return true;
}
if (decoder->set_fup_mwait) {
decoder->set_fup_mwait = false;
decoder->state.type = INTEL_PT_MWAIT_OP;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.mwait_payload = decoder->fup_mwait_payload;
ret = true;
}
if (decoder->set_fup_pwre) {
decoder->set_fup_pwre = false;
decoder->state.type |= INTEL_PT_PWR_ENTRY;
decoder->state.type &= ~INTEL_PT_BRANCH;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.pwre_payload = decoder->fup_pwre_payload;
ret = true;
}
if (decoder->set_fup_exstop) {
decoder->set_fup_exstop = false;
decoder->state.type |= INTEL_PT_EX_STOP;
decoder->state.type &= ~INTEL_PT_BRANCH;
decoder->state.flags |= INTEL_PT_FUP_IP;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
ret = true;
}
return ret;
}
static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
{
struct intel_pt_insn intel_pt_insn;
@ -1003,15 +1096,8 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
if (err == INTEL_PT_RETURN)
return 0;
if (err == -EAGAIN) {
if (decoder->set_fup_tx_flags) {
decoder->set_fup_tx_flags = false;
decoder->tx_flags = decoder->fup_tx_flags;
decoder->state.type = INTEL_PT_TRANSACTION;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.flags = decoder->fup_tx_flags;
if (intel_pt_fup_event(decoder))
return 0;
}
return err;
}
decoder->set_fup_tx_flags = false;
@ -1360,7 +1446,9 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
{
unsigned int cbr = decoder->packet.payload;
unsigned int cbr = decoder->packet.payload & 0xff;
decoder->cbr_payload = decoder->packet.payload;
if (decoder->cbr == cbr)
return;
@ -1417,6 +1505,13 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_TRACESTOP:
case INTEL_PT_BAD:
case INTEL_PT_PSB:
case INTEL_PT_PTWRITE:
case INTEL_PT_PTWRITE_IP:
case INTEL_PT_EXSTOP:
case INTEL_PT_EXSTOP_IP:
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
return -EAGAIN;
@ -1446,7 +1541,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_FUP:
decoder->pge = true;
intel_pt_set_last_ip(decoder);
if (decoder->packet.count)
intel_pt_set_last_ip(decoder);
break;
case INTEL_PT_MODE_TSX:
@ -1497,6 +1593,13 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
case INTEL_PT_MODE_TSX:
case INTEL_PT_BAD:
case INTEL_PT_PSBEND:
case INTEL_PT_PTWRITE:
case INTEL_PT_PTWRITE_IP:
case INTEL_PT_EXSTOP:
case INTEL_PT_EXSTOP_IP:
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
return -ENOENT;
@ -1625,6 +1728,15 @@ next:
break;
}
intel_pt_set_last_ip(decoder);
if (!decoder->branch_enable) {
decoder->ip = decoder->last_ip;
if (intel_pt_fup_event(decoder))
return 0;
no_tip = false;
break;
}
if (decoder->set_fup_mwait)
no_tip = true;
err = intel_pt_walk_fup(decoder);
if (err != -EAGAIN) {
if (err)
@ -1650,6 +1762,8 @@ next:
break;
case INTEL_PT_PSB:
decoder->last_ip = 0;
decoder->have_last_ip = true;
intel_pt_clear_stack(&decoder->stack);
err = intel_pt_walk_psbend(decoder);
if (err == -EAGAIN)
@ -1696,6 +1810,16 @@ next:
case INTEL_PT_CBR:
intel_pt_calc_cbr(decoder);
if (!decoder->branch_enable &&
decoder->cbr != decoder->cbr_seen) {
decoder->cbr_seen = decoder->cbr;
decoder->state.type = INTEL_PT_CBR_CHG;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.cbr_payload =
decoder->packet.payload;
return 0;
}
break;
case INTEL_PT_MODE_EXEC:
@ -1722,6 +1846,71 @@ next:
case INTEL_PT_PAD:
break;
case INTEL_PT_PTWRITE_IP:
decoder->fup_ptw_payload = decoder->packet.payload;
err = intel_pt_get_next_packet(decoder);
if (err)
return err;
if (decoder->packet.type == INTEL_PT_FUP) {
decoder->set_fup_ptw = true;
no_tip = true;
} else {
intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
decoder->pos);
}
goto next;
case INTEL_PT_PTWRITE:
decoder->state.type = INTEL_PT_PTW;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.ptw_payload = decoder->packet.payload;
return 0;
case INTEL_PT_MWAIT:
decoder->fup_mwait_payload = decoder->packet.payload;
decoder->set_fup_mwait = true;
break;
case INTEL_PT_PWRE:
if (decoder->set_fup_mwait) {
decoder->fup_pwre_payload =
decoder->packet.payload;
decoder->set_fup_pwre = true;
break;
}
decoder->state.type = INTEL_PT_PWR_ENTRY;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.pwrx_payload = decoder->packet.payload;
return 0;
case INTEL_PT_EXSTOP_IP:
err = intel_pt_get_next_packet(decoder);
if (err)
return err;
if (decoder->packet.type == INTEL_PT_FUP) {
decoder->set_fup_exstop = true;
no_tip = true;
} else {
intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
decoder->pos);
}
goto next;
case INTEL_PT_EXSTOP:
decoder->state.type = INTEL_PT_EX_STOP;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
return 0;
case INTEL_PT_PWRX:
decoder->state.type = INTEL_PT_PWR_EXIT;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
decoder->state.pwrx_payload = decoder->packet.payload;
return 0;
default:
return intel_pt_bug(decoder);
}
@ -1730,8 +1919,9 @@ next:
static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
{
return decoder->last_ip || decoder->packet.count == 0 ||
decoder->packet.count == 3 || decoder->packet.count == 6;
return decoder->packet.count &&
(decoder->have_last_ip || decoder->packet.count == 3 ||
decoder->packet.count == 6);
}
/* Walk PSB+ packets to get in sync. */
@ -1750,6 +1940,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
__fallthrough;
case INTEL_PT_TIP_PGE:
case INTEL_PT_TIP:
case INTEL_PT_PTWRITE:
case INTEL_PT_PTWRITE_IP:
case INTEL_PT_EXSTOP:
case INTEL_PT_EXSTOP_IP:
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
intel_pt_log("ERROR: Unexpected packet\n");
return -ENOENT;
@ -1854,14 +2051,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_FUP:
if (decoder->overflow) {
if (intel_pt_have_ip(decoder))
intel_pt_set_ip(decoder);
if (decoder->ip)
return 0;
}
if (decoder->packet.count)
intel_pt_set_last_ip(decoder);
if (intel_pt_have_ip(decoder))
intel_pt_set_ip(decoder);
if (decoder->ip)
return 0;
break;
case INTEL_PT_MTC:
@ -1910,6 +2103,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_PSB:
decoder->last_ip = 0;
decoder->have_last_ip = true;
intel_pt_clear_stack(&decoder->stack);
err = intel_pt_walk_psb(decoder);
if (err)
return err;
@ -1925,6 +2121,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_VMCS:
case INTEL_PT_MNT:
case INTEL_PT_PAD:
case INTEL_PT_PTWRITE:
case INTEL_PT_PTWRITE_IP:
case INTEL_PT_EXSTOP:
case INTEL_PT_EXSTOP_IP:
case INTEL_PT_MWAIT:
case INTEL_PT_PWRE:
case INTEL_PT_PWRX:
default:
break;
}
@ -1935,6 +2138,19 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
{
int err;
decoder->set_fup_tx_flags = false;
decoder->set_fup_ptw = false;
decoder->set_fup_mwait = false;
decoder->set_fup_pwre = false;
decoder->set_fup_exstop = false;
if (!decoder->branch_enable) {
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
decoder->overflow = false;
decoder->state.type = 0; /* Do not have a sample */
return 0;
}
intel_pt_log("Scanning for full IP\n");
err = intel_pt_walk_to_ip(decoder);
if (err)
@ -2043,6 +2259,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
decoder->pge = false;
decoder->continuous_period = false;
decoder->have_last_ip = false;
decoder->last_ip = 0;
decoder->ip = 0;
intel_pt_clear_stack(&decoder->stack);
@ -2051,6 +2268,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
if (err)
return err;
decoder->have_last_ip = true;
decoder->pkt_state = INTEL_PT_STATE_NO_IP;
err = intel_pt_walk_psb(decoder);
@ -2069,7 +2287,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
{
uint64_t est = decoder->timestamp_insn_cnt << 1;
uint64_t est = decoder->sample_insn_cnt << 1;
if (!decoder->cbr || !decoder->max_non_turbo_ratio)
goto out;
@ -2077,7 +2295,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
est *= decoder->max_non_turbo_ratio;
est /= decoder->cbr;
out:
return decoder->timestamp + est;
return decoder->sample_timestamp + est;
}
const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
@ -2093,8 +2311,10 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
err = intel_pt_sync(decoder);
break;
case INTEL_PT_STATE_NO_IP:
decoder->have_last_ip = false;
decoder->last_ip = 0;
/* Fall through */
decoder->ip = 0;
__fallthrough;
case INTEL_PT_STATE_ERR_RESYNC:
err = intel_pt_sync_ip(decoder);
break;
@ -2130,15 +2350,29 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
}
} while (err == -ENOLINK);
decoder->state.err = err ? intel_pt_ext_err(err) : 0;
decoder->state.timestamp = decoder->timestamp;
if (err) {
decoder->state.err = intel_pt_ext_err(err);
decoder->state.from_ip = decoder->ip;
decoder->sample_timestamp = decoder->timestamp;
decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
} else {
decoder->state.err = 0;
if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
decoder->cbr_seen = decoder->cbr;
decoder->state.type |= INTEL_PT_CBR_CHG;
decoder->state.cbr_payload = decoder->cbr_payload;
}
if (intel_pt_sample_time(decoder->pkt_state)) {
decoder->sample_timestamp = decoder->timestamp;
decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
}
}
decoder->state.timestamp = decoder->sample_timestamp;
decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
decoder->state.cr3 = decoder->cr3;
decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
if (err)
decoder->state.from_ip = decoder->ip;
return &decoder->state;
}

View File

@ -25,11 +25,18 @@
#define INTEL_PT_IN_TX (1 << 0)
#define INTEL_PT_ABORT_TX (1 << 1)
#define INTEL_PT_ASYNC (1 << 2)
#define INTEL_PT_FUP_IP (1 << 3)
enum intel_pt_sample_type {
INTEL_PT_BRANCH = 1 << 0,
INTEL_PT_INSTRUCTION = 1 << 1,
INTEL_PT_TRANSACTION = 1 << 2,
INTEL_PT_PTW = 1 << 3,
INTEL_PT_MWAIT_OP = 1 << 4,
INTEL_PT_PWR_ENTRY = 1 << 5,
INTEL_PT_EX_STOP = 1 << 6,
INTEL_PT_PWR_EXIT = 1 << 7,
INTEL_PT_CBR_CHG = 1 << 8,
};
enum intel_pt_period_type {
@ -63,6 +70,11 @@ struct intel_pt_state {
uint64_t timestamp;
uint64_t est_timestamp;
uint64_t trace_nr;
uint64_t ptw_payload;
uint64_t mwait_payload;
uint64_t pwre_payload;
uint64_t pwrx_payload;
uint64_t cbr_payload;
uint32_t flags;
enum intel_pt_insn_op insn_op;
int insn_len;
@ -87,6 +99,7 @@ struct intel_pt_params {
bool (*pgd_ip)(uint64_t ip, void *data);
void *data;
bool return_compression;
bool branch_enable;
uint64_t period;
enum intel_pt_period_type period_type;
unsigned max_non_turbo_ratio;

View File

@ -64,6 +64,13 @@ static const char * const packet_name[] = {
[INTEL_PT_PIP] = "PIP",
[INTEL_PT_OVF] = "OVF",
[INTEL_PT_MNT] = "MNT",
[INTEL_PT_PTWRITE] = "PTWRITE",
[INTEL_PT_PTWRITE_IP] = "PTWRITE",
[INTEL_PT_EXSTOP] = "EXSTOP",
[INTEL_PT_EXSTOP_IP] = "EXSTOP",
[INTEL_PT_MWAIT] = "MWAIT",
[INTEL_PT_PWRE] = "PWRE",
[INTEL_PT_PWRX] = "PWRX",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
if (len < 4)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_CBR;
packet->payload = buf[2];
packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
return 4;
}
@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
}
}
static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
packet->count = (buf[1] >> 5) & 0x3;
packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
INTEL_PT_PTWRITE;
switch (packet->count) {
case 0:
if (len < 6)
return INTEL_PT_NEED_MORE_BYTES;
packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
return 6;
case 1:
if (len < 10)
return INTEL_PT_NEED_MORE_BYTES;
packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
return 10;
default:
return INTEL_PT_BAD_PACKET;
}
}
static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
{
packet->type = INTEL_PT_EXSTOP;
return 2;
}
static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
{
packet->type = INTEL_PT_EXSTOP_IP;
return 2;
}
static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 10)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_MWAIT;
packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
return 10;
}
static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 4)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_PWRE;
memcpy_le64(&packet->payload, buf + 2, 2);
return 4;
}
static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 7)
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_PWRX;
memcpy_le64(&packet->payload, buf + 2, 5);
return 7;
}
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
if (len < 2)
return INTEL_PT_NEED_MORE_BYTES;
if ((buf[1] & 0x1f) == 0x12)
return intel_pt_get_ptwrite(buf, len, packet);
switch (buf[1]) {
case 0xa3: /* Long TNT */
return intel_pt_get_long_tnt(buf, len, packet);
@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_tma(buf, len, packet);
case 0xC3: /* 3-byte header */
return intel_pt_get_3byte(buf, len, packet);
case 0x62: /* EXSTOP no IP */
return intel_pt_get_exstop(packet);
case 0xE2: /* EXSTOP with IP */
return intel_pt_get_exstop_ip(packet);
case 0xC2: /* MWAIT */
return intel_pt_get_mwait(buf, len, packet);
case 0x22: /* PWRE */
return intel_pt_get_pwre(buf, len, packet);
case 0xA2: /* PWRX */
return intel_pt_get_pwrx(buf, len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
name, payload, nr);
return ret;
case INTEL_PT_PTWRITE:
return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
case INTEL_PT_PTWRITE_IP:
return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
case INTEL_PT_EXSTOP:
return snprintf(buf, buf_len, "%s IP:0", name);
case INTEL_PT_EXSTOP_IP:
return snprintf(buf, buf_len, "%s IP:1", name);
case INTEL_PT_MWAIT:
return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
name, payload, (unsigned int)(payload & 0xff),
(unsigned int)((payload >> 32) & 0x3));
case INTEL_PT_PWRE:
return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
name, payload, !!(payload & 0x80),
(unsigned int)((payload >> 12) & 0xf),
(unsigned int)((payload >> 8) & 0xf));
case INTEL_PT_PWRX:
return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
name, payload,
(unsigned int)((payload >> 4) & 0xf),
(unsigned int)(payload & 0xf),
(unsigned int)((payload >> 8) & 0xf));
default:
break;
}

View File

@ -52,6 +52,13 @@ enum intel_pt_pkt_type {
INTEL_PT_PIP,
INTEL_PT_OVF,
INTEL_PT_MNT,
INTEL_PT_PTWRITE,
INTEL_PT_PTWRITE_IP,
INTEL_PT_EXSTOP,
INTEL_PT_EXSTOP_IP,
INTEL_PT_MWAIT,
INTEL_PT_PWRE,
INTEL_PT_PWRX,
};
struct intel_pt_pkt {

View File

@ -668,6 +668,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt)
return true;
}
static bool intel_pt_branch_enable(struct intel_pt *pt)
{
struct perf_evsel *evsel;
u64 config;
evlist__for_each_entry(pt->session->evlist, evsel) {
if (intel_pt_get_config(pt, &evsel->attr, &config) &&
(config & 1) && !(config & 0x2000))
return false;
}
return true;
}
static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
{
struct perf_evsel *evsel;
@ -799,6 +812,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.walk_insn = intel_pt_walk_next_insn;
params.data = ptq;
params.return_compression = intel_pt_return_compression(pt);
params.branch_enable = intel_pt_branch_enable(pt);
params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
params.mtc_period = intel_pt_mtc_period(pt);
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
@ -1308,18 +1322,14 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->have_sample = false;
if (pt->sample_instructions &&
(state->type & INTEL_PT_INSTRUCTION) &&
(!pt->synth_opts.initial_skip ||
pt->num_events++ >= pt->synth_opts.initial_skip)) {
(state->type & INTEL_PT_INSTRUCTION)) {
err = intel_pt_synth_instruction_sample(ptq);
if (err)
return err;
}
if (pt->sample_transactions &&
(state->type & INTEL_PT_TRANSACTION) &&
(!pt->synth_opts.initial_skip ||
pt->num_events++ >= pt->synth_opts.initial_skip)) {
(state->type & INTEL_PT_TRANSACTION)) {
err = intel_pt_synth_transaction_sample(ptq);
if (err)
return err;
@ -2025,6 +2035,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
return err;
}
pt->sample_transactions = true;
pt->transactions_sample_type = attr.sample_type;
pt->transactions_id = id;
id += 1;
evlist__for_each_entry(evlist, evsel) {

View File

@ -2035,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
pr_err("File does not contain CPU events. "
"Remove -c option to proceed.\n");
"Remove -C option to proceed.\n");
return -1;
}
}

View File

@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
static struct rblist runtime_saved_values;
static bool have_frontend_stalled;
@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
next = rb_first(&runtime_saved_values.entries);
while (next) {
@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, SMI_NUM))
update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, APERF))
update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
if (counter->collect_stat) {
struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
return sanitize_val(1.0 - sum);
}
static void print_smi_cost(int cpu, struct perf_evsel *evsel,
struct perf_stat_output_ctx *out)
{
double smi_num, aperf, cycles, cost = 0.0;
int ctx = evsel_context(evsel);
const char *color = NULL;
smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
if ((cycles == 0) || (aperf == 0))
return;
if (smi_num)
cost = (aperf - cycles) / aperf * 100.00;
if (cost > 10)
color = PERF_COLOR_RED;
out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
}
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out)
@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
}
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
print_smi_cost(cpu, evsel, out);
} else {
print_metric(ctxp, NULL, NULL, NULL, 0);
}

View File

@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
ID(SMI_NUM, msr/smi/),
ID(APERF, msr/aperf/),
};
#undef ID

View File

@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
PERF_STAT_EVSEL_ID__SMI_NUM,
PERF_STAT_EVSEL_ID__APERF,
PERF_STAT_EVSEL_ID__MAX,
};

View File

@ -343,43 +343,6 @@ int perf_event_paranoid(void)
return value;
}
bool find_process(const char *name)
{
size_t len = strlen(name);
DIR *dir;
struct dirent *d;
int ret = -1;
dir = opendir(procfs__mountpoint());
if (!dir)
return false;
/* Walk through the directory. */
while (ret && (d = readdir(dir)) != NULL) {
char path[PATH_MAX];
char *data;
size_t size;
if ((d->d_type != DT_DIR) ||
!strcmp(".", d->d_name) ||
!strcmp("..", d->d_name))
continue;
scnprintf(path, sizeof(path), "%s/%s/comm",
procfs__mountpoint(), d->d_name);
if (filename__read_str(path, &data, &size))
continue;
ret = strncmp(name, data, len);
free(data);
}
closedir(dir);
return ret ? false : true;
}
static int
fetch_ubuntu_kernel_version(unsigned int *puint)
{
@ -387,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
size_t line_len = 0;
char *ptr, *line = NULL;
int version, patchlevel, sublevel, err;
FILE *vsig = fopen("/proc/version_signature", "r");
FILE *vsig;
if (!puint)
return 0;
vsig = fopen("/proc/version_signature", "r");
if (!vsig) {
pr_debug("Open /proc/version_signature failed: %s\n",
strerror(errno));
@ -418,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
goto errout;
}
if (puint)
*puint = (version << 16) + (patchlevel << 8) + sublevel;
*puint = (version << 16) + (patchlevel << 8) + sublevel;
err = 0;
errout:
free(line);
@ -446,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str,
str[str_size - 1] = '\0';
}
if (!puint || int_ver_ready)
return 0;
err = sscanf(utsname.release, "%d.%d.%d",
&version, &patchlevel, &sublevel);
@ -455,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str,
return -1;
}
if (puint && !int_ver_ready)
*puint = (version << 16) + (patchlevel << 8) + sublevel;
*puint = (version << 16) + (patchlevel << 8) + sublevel;
return 0;
}

View File

@ -1,7 +1,6 @@
#ifndef GIT_COMPAT_UTIL_H
#define GIT_COMPAT_UTIL_H
#define _ALL_SOURCE 1
#define _BSD_SOURCE 1
/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
#define _DEFAULT_SOURCE 1
@ -49,8 +48,6 @@ int hex2u64(const char *ptr, u64 *val);
extern unsigned int page_size;
extern int cacheline_size;
bool find_process(const char *name);
int fetch_kernel_version(unsigned int *puint,
char *str, size_t str_sz);
#define KVER_VERSION(x) (((x) >> 16) & 0xff)