From 69807a638f91524ed75027f808cd277417ecee7a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 21 Nov 2015 12:22:47 -0500 Subject: [PATCH 01/20] tools/power turbostat: decode more CPUID fields for debugging, dump a few more fields: CPUID(1): SSE3 MONITOR EIST TM2 TSC MSR ACPI-TM TM cpu0: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST MONITOR) Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 28 ++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dac7e05a6ac..7ef8b9feb7f2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2673,6 +2673,19 @@ guess: return 0; } + +void decode_misc_enable_msr(void) +{ + unsigned long long msr; + + if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) + fprintf(stderr, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n", + base_cpu, msr, + msr & (1 << 3) ? "TCC" : "", + msr & (1 << 16) ? "EIST" : "", + msr & (1 << 18) ? "MONITOR" : ""); +} + void process_cpuid() { unsigned int eax, ebx, ecx, edx, max_level; @@ -2696,9 +2709,19 @@ void process_cpuid() if (family == 6 || family == 0xf) model += ((fms >> 16) & 0xf) << 4; - if (debug) + if (debug) { fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); + fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n", + ecx & (1 << 0) ? "SSE3" : "-", + ecx & (1 << 3) ? "MONITOR" : "-", + ecx & (1 << 7) ? "EIST" : "-", + ecx & (1 << 8) ? "TM2" : "-", + edx & (1 << 4) ? "TSC" : "-", + edx & (1 << 5) ? "MSR" : "-", + edx & (1 << 22) ? "ACPI-TM" : "-", + edx & (1 << 29) ? "TM" : "-"); + } if (!(edx & (1 << 5))) errx(1, "CPUID: no MSR"); @@ -2739,6 +2762,9 @@ void process_cpuid() do_ptm ? "" : "No ", has_epb ? "" : "No "); + if (debug) + decode_misc_enable_msr(); + if (max_level > 0x15) { unsigned int eax_crystal; unsigned int ebx_tsc; From 61a87ba7893a256d86c7eea6a7ab10d38ccac9b2 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 23 Nov 2015 02:30:51 -0500 Subject: [PATCH 02/20] tools/power turbostat: CPUID(0x16) leaf shows base, max, and bus frequency This CPUID leaf is available on Skylake: CPUID(0x16): base_mhz: 1500 max_mhz: 2200 bus_mhz: 100 Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7ef8b9feb7f2..4c4470f3ac65 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2688,7 +2688,7 @@ void decode_misc_enable_msr(void) void process_cpuid() { - unsigned int eax, ebx, ecx, edx, max_level; + unsigned int eax, ebx, ecx, edx, max_level, max_extended_level; unsigned int fms, family, model, stepping; eax = ebx = ecx = edx = 0; @@ -2732,9 +2732,9 @@ void process_cpuid() * This check is valid for both Intel and AMD. */ ebx = ecx = edx = 0; - __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); + __get_cpuid(0x80000000, &max_extended_level, &ebx, &ecx, &edx); - if (max_level >= 0x80000007) { + if (max_extended_level >= 0x80000007) { /* * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 @@ -2765,7 +2765,7 @@ void process_cpuid() if (debug) decode_misc_enable_msr(); - if (max_level > 0x15) { + if (max_level >= 0x15) { unsigned int eax_crystal; unsigned int ebx_tsc; @@ -2799,6 +2799,19 @@ void process_cpuid() } } } + if (max_level >= 0x16) { + unsigned int base_mhz, max_mhz, bus_mhz, edx; + + /* + * CPUID 16H Base MHz, Max MHz, Bus MHz + */ + base_mhz = max_mhz = bus_mhz = edx = 0; + + __get_cpuid(0x16, &base_mhz, &max_mhz, &bus_mhz, &edx); + if (debug) + fprintf(stderr, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", + base_mhz, max_mhz, bus_mhz); + } if (has_aperf) aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); @@ -3151,7 +3164,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(stderr, "turbostat version 4.8 26-Sep, 2015" + fprintf(stderr, "turbostat version 4.9 22 Nov, 2015" " - Len Brown \n"); } From 670e27d809a9a29943e1d2e45823fa4fc16c29f0 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 1 Dec 2015 01:36:39 -0500 Subject: [PATCH 03/20] x86 msr-index: Simplify syntax for HWP fields syntax only, no functional change Signed-off-by: Len Brown --- arch/x86/include/asm/msr-index.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 690b4027e17c..5c5e7e53d824 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -230,10 +230,10 @@ #define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) /* IA32_HWP_CAPABILITIES */ -#define HWP_HIGHEST_PERF(x) (x & 0xff) -#define HWP_GUARANTEED_PERF(x) ((x & (0xff << 8)) >>8) -#define HWP_MOSTEFFICIENT_PERF(x) ((x & (0xff << 16)) >>16) -#define HWP_LOWEST_PERF(x) ((x & (0xff << 24)) >>24) +#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) +#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) +#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) +#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) /* IA32_HWP_REQUEST */ #define HWP_MIN_PERF(x) (x & 0xff) From 7f5c258e1ce1e0909d3195694ac79f143051e513 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 1 Dec 2015 01:36:39 -0500 Subject: [PATCH 04/20] tools/power turbostat: decode HWP registers # turbostat --debug ... CPUID(6): ... HWP, HWPnotify, HWPwindow, HWPepp, HWPpkg ... ... cpu0: MSR_PM_ENABLE: 0x00000001 (HWP) cpu0: MSR_HWP_CAPABILITIES: 0x01050916 (high 0x16 guar 0x9 eff 0x5 low 0x1) cpu0: MSR_HWP_REQUEST: 0x80001604 (min 0x4 max 0x16 des 0x0 epp 0x80 window 0x0 pkg 0x0) cpu0: MSR_HWP_INTERRUPT: 0x00000001 (EN_Guaranteed_Perf_Change, Dis_Excursion_Min) cpu0: MSR_HWP_STATUS: 0x00000000 (No-Guaranteed_Perf_Change, No-Excursion_Min) Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 124 ++++++++++++++++++++++++-- 1 file changed, 118 insertions(+), 6 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4c4470f3ac65..af3b955aad1d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -98,6 +98,12 @@ unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; double discover_bclk(unsigned int family, unsigned int model); +unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ + /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ +unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ +unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ +unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ +unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -2041,6 +2047,97 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; } +/* + * print_hwp() + * Decode the MSR_HWP_CAPABILITIES + */ +int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + int cpu; + + if (!has_hwp) + return 0; + + cpu = t->cpu_id; + + /* MSR_HWP_CAPABILITIES is per-package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (get_msr(cpu, MSR_PM_ENABLE, &msr)) + return 0; + + fprintf(stderr, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", + cpu, msr, (msr & (1 << 0)) ? "" : "No-"); + + /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ + if ((msr & (1 << 0)) == 0) + return 0; + + if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) + return 0; + + fprintf(stderr, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " + "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n", + cpu, msr, + (unsigned int)HWP_HIGHEST_PERF(msr), + (unsigned int)HWP_GUARANTEED_PERF(msr), + (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), + (unsigned int)HWP_LOWEST_PERF(msr)); + + if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) + return 0; + + fprintf(stderr, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " + "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n", + cpu, msr, + (unsigned int)(((msr) >> 0) & 0xff), + (unsigned int)(((msr) >> 8) & 0xff), + (unsigned int)(((msr) >> 16) & 0xff), + (unsigned int)(((msr) >> 24) & 0xff), + (unsigned int)(((msr) >> 32) & 0xff3), + (unsigned int)(((msr) >> 42) & 0x1)); + + if (has_hwp_pkg) { + if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) + return 0; + + fprintf(stderr, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " + "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n", + cpu, msr, + (unsigned int)(((msr) >> 0) & 0xff), + (unsigned int)(((msr) >> 8) & 0xff), + (unsigned int)(((msr) >> 16) & 0xff), + (unsigned int)(((msr) >> 24) & 0xff), + (unsigned int)(((msr) >> 32) & 0xff3)); + } + if (has_hwp_notify) { + if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) + return 0; + + fprintf(stderr, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " + "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", + cpu, msr, + ((msr) & 0x1) ? "EN" : "Dis", + ((msr) & 0x2) ? "EN" : "Dis"); + } + if (get_msr(cpu, MSR_HWP_STATUS, &msr)) + return 0; + + fprintf(stderr, "cpu%d: MSR_HWP_STATUS: 0x%08llx " + "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", + cpu, msr, + ((msr) & 0x1) ? "" : "No-", + ((msr) & 0x2) ? "" : "No-"); + + return 0; +} /* * print_perf_limit() @@ -2686,6 +2783,7 @@ void decode_misc_enable_msr(void) msr & (1 << 18) ? "MONITOR" : ""); } + void process_cpuid() { unsigned int eax, ebx, ecx, edx, max_level, max_extended_level; @@ -2753,14 +2851,25 @@ void process_cpuid() has_aperf = ecx & (1 << 0); do_dts = eax & (1 << 0); do_ptm = eax & (1 << 6); + has_hwp = eax & (1 << 7); + has_hwp_notify = eax & (1 << 8); + has_hwp_activity_window = eax & (1 << 9); + has_hwp_epp = eax & (1 << 10); + has_hwp_pkg = eax & (1 << 11); has_epb = ecx & (1 << 3); if (debug) - fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", - has_aperf ? "" : "No ", - do_dts ? "" : "No ", - do_ptm ? "" : "No ", - has_epb ? "" : "No "); + fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, " + "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", + has_aperf ? "" : "No-", + do_dts ? "" : "No-", + do_ptm ? "" : "No-", + has_hwp ? "" : "No-", + has_hwp_notify ? "" : "No-", + has_hwp_activity_window ? "" : "No-", + has_hwp_epp ? "" : "No-", + has_hwp_pkg ? "" : "No-", + has_epb ? "" : "No-"); if (debug) decode_misc_enable_msr(); @@ -3087,6 +3196,9 @@ void turbostat_init() process_cpuid(); + if (debug) + for_all_cpus(print_hwp, ODD_COUNTERS); + if (debug) for_all_cpus(print_epb, ODD_COUNTERS); @@ -3164,7 +3276,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(stderr, "turbostat version 4.9 22 Nov, 2015" + fprintf(stderr, "turbostat version 4.10 10 Dec, 2015" " - Len Brown \n"); } From f0057310b40efe9f797ff337e9464e6a6fb9d782 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 3 Dec 2015 01:35:36 -0500 Subject: [PATCH 05/20] tools/power turbostat: Decode MSR_MISC_PWR_MGMT This MSR is helpful to show if P-state HW coordination is enabled or disabled. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index af3b955aad1d..c600340dfc4e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2783,6 +2783,26 @@ void decode_misc_enable_msr(void) msr & (1 << 18) ? "MONITOR" : ""); } +/* + * Decode MSR_MISC_PWR_MGMT + * + * Decode the bits according to the Nehalem documentation + * bit[0] seems to continue to have same meaning going forward + * bit[1] less so... + */ +void decode_misc_pwr_mgmt_msr(void) +{ + unsigned long long msr; + + if (!do_nhm_platform_info) + return; + + if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) + fprintf(stderr, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n", + base_cpu, msr, + msr & (1 << 0) ? "DIS" : "EN", + msr & (1 << 1) ? "EN" : "DIS"); +} void process_cpuid() { @@ -2936,6 +2956,9 @@ void process_cpuid() do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); + if (debug) + decode_misc_pwr_mgmt_msr(); + rapl_probe(family, model); perf_limit_reasons_probe(family, model); From 2a0609c02e6558df6075f258af98a54a74b050ff Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 12 Feb 2016 22:44:48 -0500 Subject: [PATCH 06/20] tools/power turbostat: allow sub-sec intervals turbostat -i interval_sec will sample and display statistics every interval_sec. interval_sec used to be a whole number of seconds, but now we accept a decimal, as small as 0.001 sec (1 ms). Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 +- tools/power/x86/turbostat/turbostat.c | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 622db685b4f9..4d8f198f348f 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -34,7 +34,7 @@ name as necessary to disambiguate it from others is necessary. Note that option \fB--debug\fP displays additional system configuration information. Invoking this parameter more than once may also enable internal turbostat debug information. .PP -\fB--interval seconds\fP overrides the default 5-second measurement interval. +\fB--interval seconds\fP overrides the default 5.0 second measurement interval. .PP \fB--help\fP displays usage for the most common parameters. .PP diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c600340dfc4e..e411cc435137 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -38,12 +38,13 @@ #include #include #include +#include #include #include #include char *proc_stat = "/proc/stat"; -unsigned int interval_sec = 5; +struct timespec interval_ts = {5, 0}; unsigned int debug; unsigned int rapl_joules; unsigned int summary_only; @@ -1728,7 +1729,7 @@ restart: re_initialize(); goto restart; } - sleep(interval_sec); + nanosleep(&interval_ts, NULL); retval = for_all_cpus(get_counters, ODD_COUNTERS); if (retval < -1) { exit(retval); @@ -1742,7 +1743,7 @@ restart: compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); flush_stdout(); - sleep(interval_sec); + nanosleep(&interval_ts, NULL); retval = for_all_cpus(get_counters, EVEN_COUNTERS); if (retval < -1) { exit(retval); @@ -3347,7 +3348,18 @@ void cmdline(int argc, char **argv) help(); exit(1); case 'i': - interval_sec = atoi(optarg); + { + double interval = strtod(optarg, NULL); + + if (interval < 0.001) { + fprintf(stderr, "interval %f seconds is too small\n", + interval); + exit(2); + } + + interval_ts.tv_sec = interval; + interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; + } break; case 'J': rapl_joules++; From 121b48bb77187cf2ed3053e147d2e6c1e864083c Mon Sep 17 00:00:00 2001 From: "Chrzaniuk, Hubert" Date: Wed, 10 Feb 2016 16:35:17 +0100 Subject: [PATCH 07/20] tools/power turbostat: Intel Xeon x200: fix erroneous bclk value x200 does not enable any way to programmatically obtain bus clock speed. Bclk for the architecture has a fixed value of 100 MHz. At the same time x200 cannot be included in has_snb_msrs since it does not support C7 idle state. prior to this patch, MHz values reported on this chip were erroneously calculated using bclk of 133MHz, causing MHz values to be reported 33% higher than actual. Signed-off-by: Hubert Chrzaniuk Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e411cc435137..652d08df5d12 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2697,7 +2697,7 @@ double slm_bclk(void) double discover_bclk(unsigned int family, unsigned int model) { - if (has_snb_msrs(family, model)) + if (has_snb_msrs(family, model) || is_knl(family, model)) return 100.00; else if (is_slm(family, model)) return slm_bclk(); From cbf97abaf3689652bcddc0741dc49629d1838142 Mon Sep 17 00:00:00 2001 From: Hubert Chrzaniuk Date: Wed, 10 Feb 2016 14:55:22 +0100 Subject: [PATCH 08/20] tools/power turbostat: Intel Xeon x200: fix turbo-ratio decoding Following changes have been made: - changed MSR_NHM_TURBO_RATIO_LIMIT to MSR_TURBO_RATIO_LIMIT in debug print for consistency with Developer Manual - updated definition of bitfields in MSR_TURBO_RATIO_LIMIT and appropriate parsing code - added x200 to list of architectures that do not support Nahlem compatible definition of MSR_TURBO_RATIO_LIMIT register (x200 has the register but bits definition is custom) - fixed typo in code that parses MSR_TURBO_RATIO_LIMIT (logical instead of bitwise operator) - changed MSR_TURBO_RATIO_LIMIT parsing algorithm so the print out had the same order as implementations for other platforms Signed-off-by: Hubert Chrzaniuk Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 49 +++++++++++++-------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 652d08df5d12..4e5386d04709 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1328,21 +1328,23 @@ dump_nhm_turbo_ratio_limits(void) static void dump_knl_turbo_ratio_limits(void) { - int cores; - unsigned int ratio; + const unsigned int buckets_no = 7; + unsigned long long msr; - int delta_cores; - int delta_ratio; - int i; + int delta_cores, delta_ratio; + int i, b_nr; + unsigned int cores[buckets_no]; + unsigned int ratio[buckets_no]; get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); - fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", + fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); /** * Turbo encoding in KNL is as follows: - * [7:0] -- Base value of number of active cores of bucket 1. + * [0] -- Reserved + * [7:1] -- Base value of number of active cores of bucket 1. * [15:8] -- Base value of freq ratio of bucket 1. * [20:16] -- +ve delta of number of active cores of bucket 2. * i.e. active cores of bucket 2 = @@ -1361,29 +1363,25 @@ dump_knl_turbo_ratio_limits(void) * [60:56]-- +ve delta of number of active cores of bucket 7. * [63:61]-- -ve delta of freq ratio of bucket 7. */ - cores = msr & 0xFF; - ratio = (msr >> 8) && 0xFF; - if (ratio > 0) - fprintf(stderr, - "%d * %.0f = %.0f MHz max turbo %d active cores\n", - ratio, bclk, ratio * bclk, cores); - for (i = 16; i < 64; i = i + 8) { + b_nr = 0; + cores[b_nr] = (msr & 0xFF) >> 1; + ratio[b_nr] = (msr >> 8) & 0xFF; + + for (i = 16; i < 64; i += 8) { delta_cores = (msr >> i) & 0x1F; - delta_ratio = (msr >> (i + 5)) && 0x7; - if (!delta_cores || !delta_ratio) - return; - cores = cores + delta_cores; - ratio = ratio - delta_ratio; + delta_ratio = (msr >> (i + 5)) & 0x7; - /** -ve ratios will make successive ratio calculations - * negative. Hence return instead of carrying on. - */ - if (ratio > 0) + cores[b_nr + 1] = cores[b_nr] + delta_cores; + ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; + b_nr++; + } + + for (i = buckets_no - 1; i >= 0; i--) + if (i > 0 ? ratio[i] != ratio[i - 1] : 1) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo %d active cores\n", - ratio, bclk, ratio * bclk, cores); - } + ratio[i], bclk, ratio[i] * bclk, cores[i]); } static void @@ -1896,6 +1894,7 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) /* Nehalem compatible, but do not include turbo-ratio limit support */ case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ + case 0x57: /* PHI - Knights Landing (different MSR definition) */ return 0; default: return 1; From 75d2e44e60490ba1fee076a5f4dcfbdc8598e8c4 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 13 Feb 2016 23:41:53 -0500 Subject: [PATCH 09/20] tools/power turbostat: re-name "%Busy" field to "Busy%" some tools processing turbostat output have difficulty with items that begin with %... Reported-by: Jacob Pan Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 16 ++++++++-------- tools/power/x86/turbostat/turbostat.c | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 4d8f198f348f..7771eea3cdec 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -61,7 +61,7 @@ displays the statistics gathered since it was forked. .nf \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. \fBAVG_MHz\fP number of cycles executed divided by time elapsed. -\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. +\fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. .fi @@ -89,7 +89,7 @@ Without any parameters, turbostat displays statistics ever 5 seconds. for turbostat to fork). .nf [root@hsw]# ./turbostat - CPU Avg_MHz %Busy Bzy_MHz TSC_MHz + CPU Avg_MHz Busy% Bzy_MHz TSC_MHz - 488 12.51 3898 3498 0 0 0.01 3885 3498 4 3897 99.99 3898 3498 @@ -145,7 +145,7 @@ cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1) cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) - Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt + Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 0 4 3897 99.98 3898 3498 0 0.02 @@ -178,7 +178,7 @@ until ^C while the other CPUs are mostly idle: .nf root@hsw: turbostat cat /dev/zero > /dev/null ^C - CPU Avg_MHz %Busy Bzy_MHz TSC_MHz + CPU Avg_MHz Busy% Bzy_MHz TSC_MHz - 482 12.51 3854 3498 0 0 0.01 1960 3498 4 0 0.00 2128 3498 @@ -192,12 +192,12 @@ root@hsw: turbostat cat /dev/zero > /dev/null .fi Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. -The first row shows the average MHz and %Busy across all the processors in the system. +The first row shows the average MHz and Busy% across all the processors in the system. Note that the Avg_MHz column reflects the total number of cycles executed -divided by the measurement interval. If the %Busy column is 100%, +divided by the measurement interval. If the Busy% column is 100%, then the processor was running at that speed the entire interval. -The Avg_MHz multiplied by the %Busy results in the Bzy_MHz -- +The Avg_MHz multiplied by the Busy% results in the Bzy_MHz -- which is the average frequency while the processor was executing -- not including any non-busy idle time. @@ -233,7 +233,7 @@ in the brand string in /proc/cpuinfo. On a system where the TSC stops in idle, TSC_MHz will drop below the processor's base frequency. -%Busy = MPERF_delta/TSC_delta +Busy% = MPERF_delta/TSC_delta Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4e5386d04709..947239b53e17 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -293,7 +293,7 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) /* * Example Format w/ field column widths: * - * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt + * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 */ @@ -308,7 +308,7 @@ void print_header(void) if (has_aperf) outp += sprintf(outp, " Avg_MHz"); if (has_aperf) - outp += sprintf(outp, " %%Busy"); + outp += sprintf(outp, " Busy%%"); if (has_aperf) outp += sprintf(outp, " Bzy_MHz"); outp += sprintf(outp, " TSC_MHz"); @@ -511,7 +511,7 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%8.0f", 1.0 / units * t->aperf / interval_float); - /* %Busy */ + /* Busy% */ if (has_aperf) { if (!skip_c0) outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); From b7d8c1483bbf6ec9d2dd76d6a1c91a38c3f6ac35 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 13 Feb 2016 23:36:17 -0500 Subject: [PATCH 10/20] tools/power turbostat: add --out option for saving output in a file By default... Turbostat --debug gconfiguration info goes to stderr. In FORK mode, turbostat statistics go to stderr. In PERIODIC mode, turbostat statistics go to stdout. These defaults do not change, but an option "--out file" will send all output above only to the specified file. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 14 +- tools/power/x86/turbostat/turbostat.c | 281 ++++++++++++++------------ 2 files changed, 160 insertions(+), 135 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 7771eea3cdec..89a55d5e32f3 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -36,6 +36,9 @@ more than once may also enable internal turbostat debug information. .PP \fB--interval seconds\fP overrides the default 5.0 second measurement interval. .PP +\fB--out output_file\fP turbostat output is written to the specified output_file. +The file is truncated if it already exists, and it is created if it does not exist. +.PP \fB--help\fP displays usage for the most common parameters. .PP \fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts. @@ -83,10 +86,11 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. .fi .PP -.SH EXAMPLE +.SH PERIODIC EXAMPLE Without any parameters, turbostat displays statistics ever 5 seconds. -(override interval with "-i sec" option, or specify a command -for turbostat to fork). +Periodic output goes to stdout, by default, unless --out is used to specify an output file. +The 5-second interval can be changed with th "-i sec" option. +Or a command may be specified as in "FORK EXAMPLE" below. .nf [root@hsw]# ./turbostat CPU Avg_MHz Busy% Bzy_MHz TSC_MHz @@ -171,7 +175,9 @@ The --debug option adds additional columns to the measurement ouput, including C See the field definitions above. .SH FORK EXAMPLE If turbostat is invoked with a command, it will fork that command -and output the statistics gathered when the command exits. +and output the statistics gathered after the command exits. +In this case, turbostat output goes to stderr, by default. +Output can instead be saved to a file using the --out option. eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds until ^C while the other CPUs are mostly idle: diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 947239b53e17..5f9f41a3bf91 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -44,6 +44,7 @@ #include char *proc_stat = "/proc/stat"; +FILE *outf; struct timespec interval_ts = {5, 0}; unsigned int debug; unsigned int rapl_joules; @@ -652,15 +653,24 @@ done: return 0; } -void flush_stdout() +void flush_output_stdout(void) { - fputs(output_buffer, stdout); - fflush(stdout); + FILE *filep; + + if (outf == stderr) + filep = stdout; + else + filep = outf; + + fputs(output_buffer, filep); + fflush(filep); + outp = output_buffer; } -void flush_stderr() +void flush_output_stderr(void) { - fputs(output_buffer, stderr); + fputs(output_buffer, outf); + fflush(outf); outp = output_buffer; } void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) @@ -752,9 +762,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, } else { if (!aperf_mperf_unstable) { - fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); - fprintf(stderr, "* Frequency results do not cover entire interval *\n"); - fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); + fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname); + fprintf(outf, "* Frequency results do not cover entire interval *\n"); + fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n"); aperf_mperf_unstable = 1; } @@ -789,7 +799,8 @@ delta_thread(struct thread_data *new, struct thread_data *old, } if (old->mperf == 0) { - if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); + if (debug > 1) + fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); old->mperf = 1; /* divide by 0 protection */ } @@ -989,7 +1000,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) unsigned long long msr; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } @@ -1182,18 +1193,18 @@ dump_nhm_platform_info(void) get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); - fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 40) & 0xFF; - fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", + fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; - fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", + fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n", ratio, bclk, ratio * bclk); get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); - fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", + fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); return; @@ -1207,16 +1218,16 @@ dump_hsw_turbo_ratio_limits(void) get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); - fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 8) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); return; } @@ -1229,46 +1240,46 @@ dump_ivt_turbo_ratio_limits(void) get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); - fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 56) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 48) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 40) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 32) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 24) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 16) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); return; } @@ -1281,46 +1292,46 @@ dump_nhm_turbo_ratio_limits(void) get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); - fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 56) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 48) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 40) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 32) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 24) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 16) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0xFF; if (ratio) - fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", + fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", ratio, bclk, ratio * bclk); return; } @@ -1338,7 +1349,7 @@ dump_knl_turbo_ratio_limits(void) get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); - fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); /** @@ -1379,7 +1390,7 @@ dump_knl_turbo_ratio_limits(void) for (i = buckets_no - 1; i >= 0; i--) if (i > 0 ? ratio[i] != ratio[i - 1] : 1) - fprintf(stderr, + fprintf(outf, "%d * %.0f = %.0f MHz max turbo %d active cores\n", ratio[i], bclk, ratio[i] * bclk, cores[i]); } @@ -1394,9 +1405,9 @@ dump_nhm_cst_cfg(void) #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); - fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", + fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", @@ -1413,41 +1424,41 @@ dump_config_tdp(void) unsigned long long msr; get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); - fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); + fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF); get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); if (msr) { - fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF); - fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF); - fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); - fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF); + fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF); + fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF); + fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); + fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF); } - fprintf(stderr, ")\n"); + fprintf(outf, ")\n"); get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); if (msr) { - fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF); - fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF); - fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); - fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF); + fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF); + fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF); + fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); + fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF); } - fprintf(stderr, ")\n"); + fprintf(outf, ")\n"); get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); - fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); if ((msr) & 0x3) - fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); - fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); - fprintf(stderr, ")\n"); + fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); + fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); + fprintf(outf, ")\n"); get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); - fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); - fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); - fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); - fprintf(stderr, ")\n"); + fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); + fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); + fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); + fprintf(outf, ")\n"); } void free_all_buffers(void) @@ -1486,7 +1497,7 @@ void free_all_buffers(void) */ FILE *fopen_or_die(const char *path, const char *mode) { - FILE *filep = fopen(path, "r"); + FILE *filep = fopen(path, mode); if (!filep) err(1, "%s: open failed", path); return filep; @@ -1740,7 +1751,7 @@ restart: for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); - flush_stdout(); + flush_output_stdout(); nanosleep(&interval_ts, NULL); retval = for_all_cpus(get_counters, EVEN_COUNTERS); if (retval < -1) { @@ -1754,7 +1765,7 @@ restart: for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); compute_average(ODD_COUNTERS); format_all_counters(ODD_COUNTERS); - flush_stdout(); + flush_output_stdout(); } } @@ -2022,7 +2033,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } @@ -2043,7 +2054,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) epb_string = "custom"; break; } - fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); + fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); return 0; } @@ -2066,14 +2077,14 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } if (get_msr(cpu, MSR_PM_ENABLE, &msr)) return 0; - fprintf(stderr, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", + fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ @@ -2083,7 +2094,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) return 0; - fprintf(stderr, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " + fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n", cpu, msr, (unsigned int)HWP_HIGHEST_PERF(msr), @@ -2094,7 +2105,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) return 0; - fprintf(stderr, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " + fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n", cpu, msr, (unsigned int)(((msr) >> 0) & 0xff), @@ -2108,7 +2119,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) return 0; - fprintf(stderr, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " + fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n", cpu, msr, (unsigned int)(((msr) >> 0) & 0xff), @@ -2121,7 +2132,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) return 0; - fprintf(stderr, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " + fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", @@ -2130,7 +2141,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_HWP_STATUS, &msr)) return 0; - fprintf(stderr, "cpu%d: MSR_HWP_STATUS: 0x%08llx " + fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "" : "No-", @@ -2154,14 +2165,14 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data return 0; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } if (do_core_perf_limit_reasons) { get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); - fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); - fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", + fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", (msr & 1 << 15) ? "bit15, " : "", (msr & 1 << 14) ? "bit14, " : "", (msr & 1 << 13) ? "Transitions, " : "", @@ -2176,7 +2187,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 2) ? "bit2, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); - fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", (msr & 1 << 31) ? "bit31, " : "", (msr & 1 << 30) ? "bit30, " : "", (msr & 1 << 29) ? "Transitions, " : "", @@ -2195,8 +2206,8 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data } if (do_gfx_perf_limit_reasons) { get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); - fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); - fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", + fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", (msr & 1 << 0) ? "PROCHOT, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 4) ? "Graphics, " : "", @@ -2205,7 +2216,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 9) ? "GFXPwr, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); - fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", + fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 20) ? "Graphics, " : "", @@ -2217,15 +2228,15 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data } if (do_ring_perf_limit_reasons) { get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); - fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); - fprintf(stderr, " (Active: %s%s%s%s%s%s)", + fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(outf, " (Active: %s%s%s%s%s%s)", (msr & 1 << 0) ? "PROCHOT, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 8) ? "Amps, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); - fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", + fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 22) ? "VR-Therm, " : "", @@ -2348,7 +2359,7 @@ void rapl_probe(unsigned int family, unsigned int model) rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; if (debug) - fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); + fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); return; } @@ -2390,7 +2401,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } @@ -2399,7 +2410,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; dts = (msr >> 16) & 0x7F; - fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tcc_activation_temp - dts); #ifdef THERM_DEBUG @@ -2408,7 +2419,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; - fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); #endif } @@ -2422,7 +2433,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; resolution = (msr >> 27) & 0xF; - fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", + fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", cpu, msr, tcc_activation_temp - dts, resolution); #ifdef THERM_DEBUG @@ -2431,7 +2442,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; - fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); #endif } @@ -2441,7 +2452,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p void print_power_limit_msr(int cpu, unsigned long long msr, char *label) { - fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", + fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", cpu, label, ((msr >> 15) & 1) ? "EN" : "DIS", ((msr >> 0) & 0x7FFF) * rapl_power_units, @@ -2465,7 +2476,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu = t->cpu_id; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } @@ -2473,7 +2484,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -1; if (debug) { - fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " + fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, rapl_power_units, rapl_energy_units, rapl_time_units); } @@ -2483,7 +2494,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -5; - fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", + fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, @@ -2496,11 +2507,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) return -9; - fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", + fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 63) & 1 ? "": "UN"); print_power_limit_msr(cpu, msr, "PKG Limit #1"); - fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", + fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", cpu, ((msr >> 47) & 1) ? "EN" : "DIS", ((msr >> 32) & 0x7FFF) * rapl_power_units, @@ -2512,7 +2523,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) return -6; - fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", + fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, @@ -2522,7 +2533,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (do_rapl & RAPL_DRAM) { if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; - fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", + fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "": "UN"); print_power_limit_msr(cpu, msr, "DRAM Limit"); @@ -2532,7 +2543,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PP0_POLICY, &msr)) return -7; - fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); + fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); } } if (do_rapl & RAPL_CORES) { @@ -2540,7 +2551,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; - fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", + fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "": "UN"); print_power_limit_msr(cpu, msr, "Cores Limit"); } @@ -2550,11 +2561,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PP1_POLICY, &msr)) return -8; - fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); + fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) return -9; - fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", + fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "": "UN"); print_power_limit_msr(cpu, msr, "GFX Limit"); } @@ -2680,16 +2691,16 @@ double slm_bclk(void) double freq; if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) - fprintf(stderr, "SLM BCLK: unknown\n"); + fprintf(outf, "SLM BCLK: unknown\n"); i = msr & 0xf; if (i >= SLM_BCLK_FREQS) { - fprintf(stderr, "SLM BCLK[%d] invalid\n", i); + fprintf(outf, "SLM BCLK[%d] invalid\n", i); msr = 3; } freq = slm_freq_table[i]; - fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq); + fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); return freq; } @@ -2732,13 +2743,13 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk cpu = t->cpu_id; if (cpu_migrate(cpu)) { - fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } if (tcc_activation_temp_override != 0) { tcc_activation_temp = tcc_activation_temp_override; - fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", + fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tcc_activation_temp); return 0; } @@ -2753,7 +2764,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk target_c_local = (msr >> 16) & 0xFF; if (debug) - fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, target_c_local); if (!target_c_local) @@ -2765,7 +2776,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk guess: tcc_activation_temp = TJMAX_DEFAULT; - fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", + fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tcc_activation_temp); return 0; @@ -2776,7 +2787,7 @@ void decode_misc_enable_msr(void) unsigned long long msr; if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) - fprintf(stderr, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n", + fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n", base_cpu, msr, msr & (1 << 3) ? "TCC" : "", msr & (1 << 16) ? "EIST" : "", @@ -2798,7 +2809,7 @@ void decode_misc_pwr_mgmt_msr(void) return; if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) - fprintf(stderr, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n", + fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n", base_cpu, msr, msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS"); @@ -2817,7 +2828,7 @@ void process_cpuid() genuine_intel = 1; if (debug) - fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", + fprintf(outf, "CPUID(0): %.4s%.4s%.4s ", (char *)&ebx, (char *)&edx, (char *)&ecx); __get_cpuid(1, &fms, &ebx, &ecx, &edx); @@ -2828,9 +2839,9 @@ void process_cpuid() model += ((fms >> 16) & 0xf) << 4; if (debug) { - fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", + fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); - fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n", + fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s\n", ecx & (1 << 0) ? "SSE3" : "-", ecx & (1 << 3) ? "MONITOR" : "-", ecx & (1 << 7) ? "EIST" : "-", @@ -2879,7 +2890,7 @@ void process_cpuid() has_epb = ecx & (1 << 3); if (debug) - fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, " + fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, " "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", has_aperf ? "" : "No-", do_dts ? "" : "No-", @@ -2907,7 +2918,7 @@ void process_cpuid() if (ebx_tsc != 0) { if (debug && (ebx != 0)) - fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", + fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", eax_crystal, ebx_tsc, crystal_hz); if (crystal_hz == 0) @@ -2923,7 +2934,7 @@ void process_cpuid() if (crystal_hz) { tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; if (debug) - fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", + fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); } } @@ -2938,7 +2949,7 @@ void process_cpuid() __get_cpuid(0x16, &base_mhz, &max_mhz, &bus_mhz, &edx); if (debug) - fprintf(stderr, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", + fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", base_mhz, max_mhz, bus_mhz); } @@ -2973,7 +2984,7 @@ void process_cpuid() void help() { - fprintf(stderr, + fprintf(outf, "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" "\n" "Turbostat forks the specified COMMAND and prints statistics\n" @@ -2985,6 +2996,7 @@ void help() "--help print this help message\n" "--counter msr print 32-bit counter at address \"msr\"\n" "--Counter msr print 64-bit Counter at address \"msr\"\n" + "--out file create or truncate \"file\" for all output\n" "--msr msr print 32-bit value at address \"msr\"\n" "--MSR msr print 64-bit Value at address \"msr\"\n" "--version print version information\n" @@ -3029,7 +3041,7 @@ void topology_probe() show_cpu = 1; if (debug > 1) - fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); + fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); if (cpus == NULL) @@ -3064,7 +3076,7 @@ void topology_probe() if (cpu_is_not_present(i)) { if (debug > 1) - fprintf(stderr, "cpu%d NOT PRESENT\n", i); + fprintf(outf, "cpu%d NOT PRESENT\n", i); continue; } cpus[i].core_id = get_core_id(i); @@ -3079,26 +3091,26 @@ void topology_probe() if (siblings > max_siblings) max_siblings = siblings; if (debug > 1) - fprintf(stderr, "cpu %d pkg %d core %d\n", + fprintf(outf, "cpu %d pkg %d core %d\n", i, cpus[i].physical_package_id, cpus[i].core_id); } topo.num_cores_per_pkg = max_core_id + 1; if (debug > 1) - fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", + fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.num_cores_per_pkg); if (debug && !summary_only && topo.num_cores_per_pkg > 1) show_core = 1; topo.num_packages = max_package_id + 1; if (debug > 1) - fprintf(stderr, "max_package_id %d, sizing for %d packages\n", + fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); if (debug && !summary_only && topo.num_packages > 1) show_pkg = 1; topo.num_threads_per_core = max_siblings; if (debug > 1) - fprintf(stderr, "max_siblings %d\n", max_siblings); + fprintf(outf, "max_siblings %d\n", max_siblings); free(cpus); } @@ -3207,7 +3219,7 @@ void set_base_cpu(void) err(-ENODEV, "No valid cpus found"); if (debug > 1) - fprintf(stderr, "base_cpu = %d\n", base_cpu); + fprintf(outf, "base_cpu = %d\n", base_cpu); } void turbostat_init() @@ -3274,9 +3286,10 @@ int fork_it(char **argv) for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); - flush_stderr(); - fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); + fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); + + flush_output_stderr(); return status; } @@ -3293,13 +3306,13 @@ int get_and_dump_counters(void) if (status) return status; - flush_stdout(); + flush_output_stdout(); return status; } void print_version() { - fprintf(stderr, "turbostat version 4.10 10 Dec, 2015" + fprintf(outf, "turbostat version 4.10 10 Dec, 2015" " - Len Brown \n"); } @@ -3317,6 +3330,7 @@ void cmdline(int argc, char **argv) {"Joules", no_argument, 0, 'J'}, {"MSR", required_argument, 0, 'M'}, {"msr", required_argument, 0, 'm'}, + {"out", required_argument, 0, 'o'}, {"Package", no_argument, 0, 'p'}, {"processor", no_argument, 0, 'p'}, {"Summary", no_argument, 0, 'S'}, @@ -3327,7 +3341,7 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v", + while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v", long_options, &option_index)) != -1) { switch (opt) { case 'C': @@ -3351,7 +3365,7 @@ void cmdline(int argc, char **argv) double interval = strtod(optarg, NULL); if (interval < 0.001) { - fprintf(stderr, "interval %f seconds is too small\n", + fprintf(outf, "interval %f seconds is too small\n", interval); exit(2); } @@ -3369,6 +3383,9 @@ void cmdline(int argc, char **argv) case 'm': sscanf(optarg, "%x", &extra_msr_offset32); break; + case 'o': + outf = fopen_or_die(optarg, "w"); + break; case 'P': show_pkg_only++; break; @@ -3391,6 +3408,8 @@ void cmdline(int argc, char **argv) int main(int argc, char **argv) { + outf = stderr; + cmdline(argc, argv); if (debug) From 58cc30a4e6086d542302e04eea39b2a438e4c5dd Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 24 Feb 2016 18:16:40 -0500 Subject: [PATCH 11/20] tools/power turbostat: fix compiler warnings Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5f9f41a3bf91..ad5bd5600467 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1987,7 +1987,7 @@ int has_config_tdp(unsigned int family, unsigned int model) } static void -dump_cstate_pstate_config_info(family, model) +dump_cstate_pstate_config_info(int family, int model) { if (!do_nhm_platform_info) return; @@ -2250,7 +2250,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ -double get_tdp(model) +double get_tdp(int model) { unsigned long long msr; @@ -2364,7 +2364,7 @@ void rapl_probe(unsigned int family, unsigned int model) return; } -void perf_limit_reasons_probe(family, model) +void perf_limit_reasons_probe(int family, int model) { if (!genuine_intel) return; @@ -2974,7 +2974,7 @@ void process_cpuid() perf_limit_reasons_probe(family, model); if (debug) - dump_cstate_pstate_config_info(); + dump_cstate_pstate_config_info(family, model); if (has_skl_msrs(family, model)) calculate_tsc_tweak(); From 36229897ba966bb0dc9e060222ff17b198252367 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 26 Feb 2016 20:51:02 -0500 Subject: [PATCH 12/20] tools/power turbostat: make fewer systems calls skip the open(2)/close(2) on each msr read by keeping the /dev/cpu/*/msr files open. The remaining read(2) is generally far fewer cycles than the removed open(2) system call. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 51 +++++++++++++++++++++------ 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ad5bd5600467..2e47c2bc3e27 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -45,6 +45,7 @@ char *proc_stat = "/proc/stat"; FILE *outf; +int *fd_percpu; struct timespec interval_ts = {5, 0}; unsigned int debug; unsigned int rapl_joules; @@ -270,23 +271,34 @@ int cpu_migrate(int cpu) else return 0; } - -int get_msr(int cpu, off_t offset, unsigned long long *msr) +int get_msr_fd(int cpu) { - ssize_t retval; char pathname[32]; int fd; + fd = fd_percpu[cpu]; + + if (fd) + return fd; + sprintf(pathname, "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); if (fd < 0) err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); - retval = pread(fd, msr, sizeof *msr, offset); - close(fd); + fd_percpu[cpu] = fd; + + return fd; +} + +int get_msr(int cpu, off_t offset, unsigned long long *msr) +{ + ssize_t retval; + + retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); if (retval != sizeof *msr) - err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); + err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset); return 0; } @@ -1453,19 +1465,30 @@ dump_config_tdp(void) fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); fprintf(outf, ")\n"); - + get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); fprintf(outf, ")\n"); } +void free_fd_percpu(void) +{ + int i; + + for (i = 0; i < topo.max_cpu_num; ++i) { + if (fd_percpu[i] != 0) + close(fd_percpu[i]); + } + + free(fd_percpu); +} void free_all_buffers(void) { CPU_FREE(cpu_present_set); cpu_present_set = NULL; - cpu_present_set = 0; + cpu_present_setsize = 0; CPU_FREE(cpu_affinity_set); cpu_affinity_set = NULL; @@ -1490,6 +1513,8 @@ void free_all_buffers(void) free(output_buffer); output_buffer = NULL; outp = NULL; + + free_fd_percpu(); } /* @@ -2449,7 +2474,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; } - + void print_power_limit_msr(int cpu, unsigned long long msr, char *label) { fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", @@ -3202,10 +3227,16 @@ void allocate_output_buffer() if (outp == NULL) err(-1, "calloc output buffer"); } - +void allocate_fd_percpu(void) +{ + fd_percpu = calloc(topo.max_cpu_num, sizeof(int)); + if (fd_percpu == NULL) + err(-1, "calloc fd_percpu"); +} void setup_all_buffers(void) { topology_probe(); + allocate_fd_percpu(); allocate_counters(&thread_even, &core_even, &package_even); allocate_counters(&thread_odd, &core_odd, &package_odd); allocate_output_buffer(); From 562a2d377bb9882c49debc9e1be7127a1717e242 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 26 Feb 2016 23:48:05 -0500 Subject: [PATCH 13/20] tools/power turbostat: show IRQs per CPU The new IRQ column shows how many interrupts have occurred on each CPU during the measurement inteval. This information comes from the difference between /proc/interrupts shapshots made before and after the measurement interval. The first row, the system summary, shows the sum of the IRQS for all CPUs during that interval. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 126 +++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 2e47c2bc3e27..c6793268d81f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -75,6 +75,7 @@ unsigned int extra_msr_offset64; unsigned int extra_delta_offset32; unsigned int extra_delta_offset64; unsigned int aperf_mperf_multiplier = 1; +int do_irq = 1; int do_smi; double bclk; double base_hz; @@ -154,6 +155,7 @@ struct thread_data { unsigned long long extra_delta64; unsigned long long extra_msr32; unsigned long long extra_delta32; + unsigned int irq_count; unsigned int smi_count; unsigned int cpu_id; unsigned int flags; @@ -221,6 +223,9 @@ struct topo_params { struct timeval tv_even, tv_odd, tv_delta; +int *irq_column_2_cpu; /* /proc/interrupts column numbers */ +int *irqs_per_cpu; /* indexed by cpu_num */ + void setup_all_buffers(void); int cpu_is_not_present(int cpu) @@ -306,8 +311,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) /* * Example Format w/ field column widths: * - * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt - * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 + * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt + * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 */ void print_header(void) @@ -338,6 +343,8 @@ void print_header(void) if (!debug) goto done; + if (do_irq) + outp += sprintf(outp, " IRQ"); if (do_smi) outp += sprintf(outp, " SMI"); @@ -429,6 +436,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, extra_msr_offset32, t->extra_msr32); outp += sprintf(outp, "msr0x%x: %016llX\n", extra_msr_offset64, t->extra_msr64); + if (do_irq) + outp += sprintf(outp, "IRQ: %08X\n", t->irq_count); if (do_smi) outp += sprintf(outp, "SMI: %08X\n", t->smi_count); } @@ -562,6 +571,10 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!debug) goto done; + /* IRQ */ + if (do_irq) + outp += sprintf(outp, "%8d", t->irq_count); + /* SMI */ if (do_smi) outp += sprintf(outp, "%8d", t->smi_count); @@ -827,6 +840,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->extra_msr32 = new->extra_msr32; old->extra_msr64 = new->extra_msr64; + if (do_irq) + old->irq_count = new->irq_count - old->irq_count; + if (do_smi) old->smi_count = new->smi_count - old->smi_count; } @@ -856,10 +872,12 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->mperf = 0; t->c1 = 0; - t->smi_count = 0; t->extra_delta32 = 0; t->extra_delta64 = 0; + t->irq_count = 0; + t->smi_count = 0; + /* tells format_counters to dump all fields from this set */ t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; @@ -903,6 +921,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.threads.extra_delta32 += t->extra_delta32; average.threads.extra_delta64 += t->extra_delta64; + average.threads.irq_count += t->irq_count; + average.threads.smi_count += t->smi_count; + /* sum per-core values only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; @@ -1000,7 +1021,6 @@ static unsigned long long rdtsc(void) return low | ((unsigned long long)high) << 32; } - /* * get_counters(...) * migrate to cpu @@ -1027,6 +1047,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) t->mperf = t->mperf * aperf_mperf_multiplier; } + if (do_irq) + t->irq_count = irqs_per_cpu[cpu]; if (do_smi) { if (get_msr(cpu, MSR_SMI_COUNT, &msr)) return -5; @@ -1515,6 +1537,9 @@ void free_all_buffers(void) outp = NULL; free_fd_percpu(); + + free(irq_column_2_cpu); + free(irqs_per_cpu); } /* @@ -1737,6 +1762,83 @@ int mark_cpu_present(int cpu) return 0; } +/* + * snapshot_proc_interrupts() + * + * read and record summary of /proc/interrupts + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_proc_interrupts(void) +{ + static FILE *fp; + int column, retval; + + if (fp == NULL) + fp = fopen_or_die("/proc/interrupts", "r"); + else + rewind(fp); + + /* read 1st line of /proc/interrupts to get cpu* name for each column */ + for (column = 0; column < topo.num_cpus; ++column) { + int cpu_number; + + retval = fscanf(fp, " CPU%d", &cpu_number); + if (retval != 1) + break; + + if (cpu_number > topo.max_cpu_num) { + warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); + return 1; + } + + irq_column_2_cpu[column] = cpu_number; + irqs_per_cpu[cpu_number] = 0; + } + + /* read /proc/interrupt count lines and sum up irqs per cpu */ + while (1) { + int column; + char buf[64]; + + retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ + if (retval != 1) + break; + + /* read the count per cpu */ + for (column = 0; column < topo.num_cpus; ++column) { + + int cpu_number, irq_count; + + retval = fscanf(fp, " %d", &irq_count); + if (retval != 1) + break; + + cpu_number = irq_column_2_cpu[column]; + irqs_per_cpu[cpu_number] += irq_count; + + } + + while (getc(fp) != '\n') + ; /* flush interrupt description */ + + } + return 0; +} + +/* + * snapshot /proc and /sys files + * + * return 1 if configuration restart needed, else return 0 + */ +int snapshot_proc_sysfs_files(void) +{ + if (snapshot_proc_interrupts()) + return 1; + + return 0; +} + void turbostat_loop() { int retval; @@ -1745,6 +1847,7 @@ void turbostat_loop() restart: restarted++; + snapshot_proc_sysfs_files(); retval = for_all_cpus(get_counters, EVEN_COUNTERS); if (retval < -1) { exit(retval); @@ -1764,6 +1867,8 @@ restart: goto restart; } nanosleep(&interval_ts, NULL); + if (snapshot_proc_sysfs_files()) + goto restart; retval = for_all_cpus(get_counters, ODD_COUNTERS); if (retval < -1) { exit(retval); @@ -1778,6 +1883,8 @@ restart: format_all_counters(EVEN_COUNTERS); flush_output_stdout(); nanosleep(&interval_ts, NULL); + if (snapshot_proc_sysfs_files()) + goto restart; retval = for_all_cpus(get_counters, EVEN_COUNTERS); if (retval < -1) { exit(retval); @@ -3233,9 +3340,20 @@ void allocate_fd_percpu(void) if (fd_percpu == NULL) err(-1, "calloc fd_percpu"); } +void allocate_irq_buffers(void) +{ + irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); + if (irq_column_2_cpu == NULL) + err(-1, "calloc %d", topo.num_cpus); + + irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int)); + if (irqs_per_cpu == NULL) + err(-1, "calloc %d", topo.max_cpu_num); +} void setup_all_buffers(void) { topology_probe(); + allocate_irq_buffers(); allocate_fd_percpu(); allocate_counters(&thread_even, &core_even, &package_even); allocate_counters(&thread_odd, &core_odd, &package_odd); From 27d47356b6dfa92042a17a0b474f08910d4c8e8f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 27 Feb 2016 00:37:54 -0500 Subject: [PATCH 14/20] tools/power turbostat: show GFXMHz Under the column "GFXMHz", show a snapshot of this attribute: /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz This is an instantaneous snapshot of what sysfs presents at the end of the measurement interval. turbostat does not average or otherwise perform any math on this value. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 50 ++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c6793268d81f..d599f9131844 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -90,6 +90,8 @@ char *output_buffer, *outp; unsigned int do_rapl; unsigned int do_dts; unsigned int do_ptm; +unsigned int do_gfx_mhz; +unsigned int gfx_cur_mhz; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; double rapl_power_units, rapl_time_units; @@ -183,6 +185,7 @@ struct pkg_data { unsigned long long pkg_any_core_c0; unsigned long long pkg_any_gfxe_c0; unsigned long long pkg_both_core_gfxe_c0; + unsigned int gfx_mhz; unsigned int package_id; unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ @@ -311,7 +314,7 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) /* * Example Format w/ field column widths: * - * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt + * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 */ @@ -362,6 +365,9 @@ void print_header(void) if (do_ptm) outp += sprintf(outp, " PkgTmp"); + if (do_gfx_mhz) + outp += sprintf(outp, " GFXMHz"); + if (do_skl_residency) { outp += sprintf(outp, " Totl%%C0"); outp += sprintf(outp, " Any%%C0"); @@ -608,6 +614,10 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_ptm) outp += sprintf(outp, "%8d", p->pkg_temp_c); + /* GFXMHz */ + if (do_gfx_mhz) + outp += sprintf(outp, "%8d", p->gfx_mhz); + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (do_skl_residency) { outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); @@ -746,6 +756,8 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->pc10 = new->pc10 - old->pc10; old->pkg_temp_c = new->pkg_temp_c; + old->gfx_mhz = new->gfx_mhz; + DELTA_WRAP32(new->energy_pkg, old->energy_pkg); DELTA_WRAP32(new->energy_cores, old->energy_cores); DELTA_WRAP32(new->energy_gfx, old->energy_gfx); @@ -909,6 +921,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->rapl_pkg_perf_status = 0; p->rapl_dram_perf_status = 0; p->pkg_temp_c = 0; + + p->gfx_mhz = 0; } int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) @@ -961,6 +975,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.energy_cores += p->energy_cores; average.packages.energy_gfx += p->energy_gfx; + average.packages.gfx_mhz = p->gfx_mhz; + average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; @@ -1176,6 +1192,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -17; p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); } + if (do_gfx_mhz) + p->gfx_mhz = gfx_cur_mhz; + return 0; } @@ -1825,6 +1844,30 @@ int snapshot_proc_interrupts(void) } return 0; } +/* + * snapshot_gfx_mhz() + * + * record snapshot of + * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_mhz(void) +{ + static FILE *fp; + int retval; + + if (fp == NULL) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); + else + rewind(fp); + + retval = fscanf(fp, "%d", &gfx_cur_mhz); + if (retval != 1) + err(1, "GFX MHz"); + + return 0; +} /* * snapshot /proc and /sys files @@ -1836,6 +1879,9 @@ int snapshot_proc_sysfs_files(void) if (snapshot_proc_interrupts()) return 1; + if (do_gfx_mhz) + snapshot_gfx_mhz(); + return 0; } @@ -3111,6 +3157,8 @@ void process_cpuid() if (has_skl_msrs(family, model)) calculate_tsc_tweak(); + do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK); + return; } From fdf676e51f301d207586d9bac509b8ce055bae8a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 27 Feb 2016 01:28:12 -0500 Subject: [PATCH 15/20] tools/power turbostat: show GFX%rc6 The column "GFX%c6" show the percentage of time the GPU is in the "render C6" state, rc6. Deep package C-states on several systems depend on the GPU being in RC6. This information comes from the counter /sys/class/drm/card0/power/rc6_residency_ms, as read before and after the measurement interval. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 45 +++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d599f9131844..9896619e4382 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -90,6 +90,8 @@ char *output_buffer, *outp; unsigned int do_rapl; unsigned int do_dts; unsigned int do_ptm; +unsigned int do_gfx_rc6_ms; +unsigned long long gfx_cur_rc6_ms; unsigned int do_gfx_mhz; unsigned int gfx_cur_mhz; unsigned int tcc_activation_temp; @@ -185,6 +187,7 @@ struct pkg_data { unsigned long long pkg_any_core_c0; unsigned long long pkg_any_gfxe_c0; unsigned long long pkg_both_core_gfxe_c0; + unsigned long long gfx_rc6_ms; unsigned int gfx_mhz; unsigned int package_id; unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ @@ -365,6 +368,9 @@ void print_header(void) if (do_ptm) outp += sprintf(outp, " PkgTmp"); + if (do_gfx_rc6_ms) + outp += sprintf(outp, " GFX%%rc6"); + if (do_gfx_mhz) outp += sprintf(outp, " GFXMHz"); @@ -614,6 +620,10 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_ptm) outp += sprintf(outp, "%8d", p->pkg_temp_c); + /* GFXrc6 */ + if (do_gfx_rc6_ms) + outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float); + /* GFXMHz */ if (do_gfx_mhz) outp += sprintf(outp, "%8d", p->gfx_mhz); @@ -756,6 +766,7 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->pc10 = new->pc10 - old->pc10; old->pkg_temp_c = new->pkg_temp_c; + old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; old->gfx_mhz = new->gfx_mhz; DELTA_WRAP32(new->energy_pkg, old->energy_pkg); @@ -922,6 +933,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->rapl_dram_perf_status = 0; p->pkg_temp_c = 0; + p->gfx_rc6_ms = 0; p->gfx_mhz = 0; } int sum_counters(struct thread_data *t, struct core_data *c, @@ -975,6 +987,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.energy_cores += p->energy_cores; average.packages.energy_gfx += p->energy_gfx; + average.packages.gfx_rc6_ms = p->gfx_rc6_ms; average.packages.gfx_mhz = p->gfx_mhz; average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); @@ -1192,6 +1205,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -17; p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); } + + if (do_gfx_rc6_ms) + p->gfx_rc6_ms = gfx_cur_rc6_ms; + if (do_gfx_mhz) p->gfx_mhz = gfx_cur_mhz; @@ -1844,6 +1861,29 @@ int snapshot_proc_interrupts(void) } return 0; } +/* + * snapshot_gfx_rc6_ms() + * + * record snapshot of + * /sys/class/drm/card0/power/rc6_residency_ms + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_rc6_ms(void) +{ + FILE *fp; + int retval; + + fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r"); + + retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms); + if (retval != 1) + err(1, "GFX rc6"); + + fclose(fp); + + return 0; +} /* * snapshot_gfx_mhz() * @@ -1879,6 +1919,9 @@ int snapshot_proc_sysfs_files(void) if (snapshot_proc_interrupts()) return 1; + if (do_gfx_rc6_ms) + snapshot_gfx_rc6_ms(); + if (do_gfx_mhz) snapshot_gfx_mhz(); @@ -3157,6 +3200,8 @@ void process_cpuid() if (has_skl_msrs(family, model)) calculate_tsc_tweak(); + do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK); + do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK); return; From 0102b06747c7d24e334d2b27c4b43eed693676f1 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 27 Feb 2016 03:11:29 -0500 Subject: [PATCH 16/20] tools/power turbostat: detect and work around syscall jitter The accuracy of Bzy_Mhz and Busy% depend on reading the TSC, APERF, and MPERF close together in time. When there is a very short measurement interval, or a large system is profoundly idle, the changes in APERF and MPERF may be very small. They can be small enough that an expensive interrupt between reading APERF and MPERF can cause the APERF/MPERF ratio to become inaccurate, resulting in invalid calculation and display of Bzy_MHz. A dummy APERF read of APERF makes this problem much more rare. Apparently this 1st systemn call after exiting a long stretch of idle is when we typically see expensive timer interrupts that cause large jitter. For the cases that dummy APERF read fails to prevent, we compare the latency of the APERF and MPERF reads. If they differ by more than 2x, we re-issue them. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 51 ++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9896619e4382..43a6dda434ef 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1059,19 +1059,68 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int cpu = t->cpu_id; unsigned long long msr; + int aperf_mperf_retry_count = 0; if (cpu_migrate(cpu)) { fprintf(outf, "Could not migrate to CPU %d\n", cpu); return -1; } +retry: t->tsc = rdtsc(); /* we are running on local CPU of interest */ if (has_aperf) { + unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; + + /* + * The TSC, APERF and MPERF must be read together for + * APERF/MPERF and MPERF/TSC to give accurate results. + * + * Unfortunately, APERF and MPERF are read by + * individual system call, so delays may occur + * between them. If the time to read them + * varies by a large amount, we re-read them. + */ + + /* + * This initial dummy APERF read has been seen to + * reduce jitter in the subsequent reads. + */ + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) return -3; + + t->tsc = rdtsc(); /* re-read close to APERF */ + + tsc_before = t->tsc; + + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) + return -3; + + tsc_between = rdtsc(); + if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) return -4; + + tsc_after = rdtsc(); + + aperf_time = tsc_between - tsc_before; + mperf_time = tsc_after - tsc_between; + + /* + * If the system call latency to read APERF and MPERF + * differ by more than 2x, then try again. + */ + if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { + aperf_mperf_retry_count++; + if (aperf_mperf_retry_count < 5) + goto retry; + else + warnx("cpu%d jitter %lld %lld", + cpu, aperf_time, mperf_time); + } + aperf_mperf_retry_count = 0; + t->aperf = t->aperf * aperf_mperf_multiplier; t->mperf = t->mperf * aperf_mperf_multiplier; } @@ -3554,7 +3603,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 4.10 10 Dec, 2015" + fprintf(outf, "turbostat version 4.11 27 Feb 2016" " - Len Brown \n"); } From aa8d8cc79af16e16da04efff1c1a72b1ea4a9e7e Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 11 Mar 2016 13:26:03 -0500 Subject: [PATCH 17/20] tools/power turbostat: indicate SMX and SGX support SGX presence is related to a SKL power workaround, so lets show when that is enabled. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 28 ++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 43a6dda434ef..db9c9d100042 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3052,6 +3052,17 @@ guess: return 0; } +void decode_feature_control_msr(void) +{ + unsigned long long msr; + + if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr)) + fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", + base_cpu, msr, + msr & FEATURE_CONTROL_LOCKED ? "" : "UN-", + msr & (1 << 18) ? "SGX" : ""); +} + void decode_misc_enable_msr(void) { unsigned long long msr; @@ -3111,9 +3122,10 @@ void process_cpuid() if (debug) { fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); - fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s\n", + fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n", ecx & (1 << 0) ? "SSE3" : "-", ecx & (1 << 3) ? "MONITOR" : "-", + ecx & (1 << 6) ? "SMX" : "-", ecx & (1 << 7) ? "EIST" : "-", ecx & (1 << 8) ? "TM2" : "-", edx & (1 << 4) ? "TSC" : "-", @@ -3175,6 +3187,20 @@ void process_cpuid() if (debug) decode_misc_enable_msr(); + if (max_level >= 0x7) { + int has_sgx; + + ecx = 0; + + __cpuid_count(0x7, 0, eax, ebx, ecx, edx); + + has_sgx = ebx & (1 << 2); + fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-"); + + if (has_sgx) + decode_feature_control_msr(); + } + if (max_level >= 0x15) { unsigned int eax_crystal; unsigned int ebx_tsc; From 5aea2f7f645b27635b856311dee5b775d277c686 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 13 Mar 2016 03:14:35 -0400 Subject: [PATCH 18/20] tools/power turbostat: call __cpuid() instead of __get_cpuid() turbostat already checks whether calling each cpuid leavf is legal, and it doesn't look at the function return value, so call the simpler gcc intrinsic __cpuid() instead of __get_cpuid(). syntax only, no functional change Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index db9c9d100042..b34241c3c92e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3103,7 +3103,7 @@ void process_cpuid() eax = ebx = ecx = edx = 0; - __get_cpuid(0, &max_level, &ebx, &ecx, &edx); + __cpuid(0, max_level, ebx, ecx, edx); if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) genuine_intel = 1; @@ -3112,7 +3112,7 @@ void process_cpuid() fprintf(outf, "CPUID(0): %.4s%.4s%.4s ", (char *)&ebx, (char *)&edx, (char *)&ecx); - __get_cpuid(1, &fms, &ebx, &ecx, &edx); + __cpuid(1, fms, ebx, ecx, edx); family = (fms >> 8) & 0xf; model = (fms >> 4) & 0xf; stepping = fms & 0xf; @@ -3143,7 +3143,7 @@ void process_cpuid() * This check is valid for both Intel and AMD. */ ebx = ecx = edx = 0; - __get_cpuid(0x80000000, &max_extended_level, &ebx, &ecx, &edx); + __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); if (max_extended_level >= 0x80000007) { @@ -3151,7 +3151,7 @@ void process_cpuid() * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 * this check is valid for both Intel and AMD */ - __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); + __cpuid(0x80000007, eax, ebx, ecx, edx); has_invariant_tsc = edx & (1 << 8); } @@ -3160,7 +3160,7 @@ void process_cpuid() * this check is valid for both Intel and AMD */ - __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); + __cpuid(0x6, eax, ebx, ecx, edx); has_aperf = ecx & (1 << 0); do_dts = eax & (1 << 0); do_ptm = eax & (1 << 6); @@ -3209,7 +3209,7 @@ void process_cpuid() * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz */ eax_crystal = ebx_tsc = crystal_hz = edx = 0; - __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); + __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); if (ebx_tsc != 0) { @@ -3243,7 +3243,7 @@ void process_cpuid() */ base_mhz = max_mhz = bus_mhz = edx = 0; - __get_cpuid(0x16, &base_mhz, &max_mhz, &bus_mhz, &edx); + __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); if (debug) fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", base_mhz, max_mhz, bus_mhz); From 6c34f160df82ae07bfff5b4c51f50622e9c2759e Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 13 Mar 2016 03:21:22 -0400 Subject: [PATCH 19/20] tools/power turbostat: correct output for MSR_NHM_SNB_PKG_CST_CFG_CTL dump MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008008 (...pkg-cstate-limit=0: unlimited) should print as MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008008 (...pkg-cstate-limit=8: unlimited) Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b34241c3c92e..a551630a82b6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1532,7 +1532,7 @@ dump_nhm_cst_cfg(void) (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", (msr & (1 << 15)) ? "" : "UN", - (unsigned int)msr & 7, + (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); return; } From 685b535b2cdb9cdf354321f8af9ed17dcf19d19f Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sun, 13 Dec 2015 21:09:31 +0800 Subject: [PATCH 20/20] tools/power turbostat: bugfix: TDP MSRs print bits fixing MSR_CONFIG_TDP_NOMINAL: should print all 8 bits of base_ratio (bit 0:7) 0xFF MSR_CONFIG_TDP_LEVEL_1: should print all 15 bits of PKG_MIN_PWR_LVL1 (bit 48:62) 0x7FFF should print all 15 bits of PKG_MAX_PWR_LVL1 (bit 32:46) 0x7FFF should print all 8 bits of LVL1_RATIO (bit 16:23) 0xFF should print all 15 bits of PKG_TDP_LVL1 (bit 0:14) 0x7FFF And the same modification to MSR_CONFIG_TDP_LEVEL_2. MSR_TURBO_ACTIVATION_RATIO: should print all 8 bits of MAX_NON_TURBO_RATIO (bit 0:7) 0xFF Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index a551630a82b6..ee1551b6fa01 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1544,25 +1544,25 @@ dump_config_tdp(void) get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); - fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF); + fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); if (msr) { - fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF); - fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF); - fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); - fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF); + fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); + fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); + fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); + fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); } fprintf(outf, ")\n"); get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); if (msr) { - fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF); - fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF); - fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); - fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF); + fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); + fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); + fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); + fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); } fprintf(outf, ")\n"); @@ -1575,7 +1575,7 @@ dump_config_tdp(void) get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); - fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); + fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); fprintf(outf, ")\n"); }