cpufreq: AMD "frequency sensitivity feedback" powersave bias for ondemand governor
Future AMD processors, starting with Family 16h, can provide software with feedback on how the workload may respond to frequency change -- memory-bound workloads will not benefit from higher frequency, where as compute-bound workloads will. This patch enables this "frequency sensitivity feedback" to aid the ondemand governor to make better frequency change decisions by hooking into the powersave bias. Signed-off-by: Jacob Shin <jacob.shin@amd.com> Acked-by: Thomas Renninger <trenn@suse.de> Acked-by: Borislav Petkov <bp@suse.de> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
This commit is contained in:
parent
fb30809efa
commit
9c5320c8ea
@ -167,6 +167,27 @@ of load evaluation and helping the CPU stay at its top speed when truly
|
||||
busy, rather than shifting back and forth in speed. This tunable has no
|
||||
effect on behavior at lower speeds/lower CPU loads.
|
||||
|
||||
powersave_bias: this parameter takes a value between 0 to 1000. It
|
||||
defines the percentage (times 10) value of the target frequency that
|
||||
will be shaved off of the target. For example, when set to 100 -- 10%,
|
||||
when ondemand governor would have targeted 1000 MHz, it will target
|
||||
1000 MHz - (10% of 1000 MHz) = 900 MHz instead. This is set to 0
|
||||
(disabled) by default.
|
||||
When AMD frequency sensitivity powersave bias driver --
|
||||
drivers/cpufreq/amd_freq_sensitivity.c is loaded, this parameter
|
||||
defines the workload frequency sensitivity threshold in which a lower
|
||||
frequency is chosen instead of ondemand governor's original target.
|
||||
The frequency sensitivity is a hardware reported (on AMD Family 16h
|
||||
Processors and above) value between 0 to 100% that tells software how
|
||||
the performance of the workload running on a CPU will change when
|
||||
frequency changes. A workload with sensitivity of 0% (memory/IO-bound)
|
||||
will not perform any better on higher core frequency, whereas a
|
||||
workload with sensitivity of 100% (CPU-bound) will perform better
|
||||
higher the frequency. When the driver is loaded, this is set to 400
|
||||
by default -- for CPUs running workloads with sensitivity value below
|
||||
40%, a lower frequency is chosen. Unloading the driver or writing 0
|
||||
will disable this feature.
|
||||
|
||||
|
||||
2.5 Conservative
|
||||
----------------
|
||||
|
@ -182,6 +182,7 @@
|
||||
#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
|
||||
#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */
|
||||
#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
|
||||
#define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */
|
||||
|
||||
/* Virtualization flags: Linux defined, word 8 */
|
||||
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
|
||||
|
@ -39,8 +39,9 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
|
||||
{ X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
|
||||
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
|
||||
{ X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
|
||||
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
|
||||
{ X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
|
||||
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
|
||||
{ X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 },
|
||||
{ X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
|
||||
|
@ -129,6 +129,23 @@ config X86_POWERNOW_K8
|
||||
|
||||
For details, take a look at <file:Documentation/cpu-freq/>.
|
||||
|
||||
config X86_AMD_FREQ_SENSITIVITY
|
||||
tristate "AMD frequency sensitivity feedback powersave bias"
|
||||
depends on CPU_FREQ_GOV_ONDEMAND && X86_ACPI_CPUFREQ && CPU_SUP_AMD
|
||||
help
|
||||
This adds AMD-specific powersave bias function to the ondemand
|
||||
governor, which allows it to make more power-conscious frequency
|
||||
change decisions based on feedback from hardware (availble on AMD
|
||||
Family 16h and above).
|
||||
|
||||
Hardware feedback tells software how "sensitive" to frequency changes
|
||||
the CPUs' workloads are. CPU-bound workloads will be more sensitive
|
||||
-- they will perform better as frequency increases. Memory/IO-bound
|
||||
workloads will be less sensitive -- they will not necessarily perform
|
||||
better as frequency increases.
|
||||
|
||||
If in doubt, say N.
|
||||
|
||||
config X86_GX_SUSPMOD
|
||||
tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation"
|
||||
depends on X86_32 && PCI
|
||||
|
@ -41,6 +41,7 @@ obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o
|
||||
obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o
|
||||
obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o
|
||||
obj-$(CONFIG_X86_INTEL_PSTATE) += intel_pstate.o
|
||||
obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o
|
||||
|
||||
##################################################################################
|
||||
# ARM SoC drivers
|
||||
|
148
drivers/cpufreq/amd_freq_sensitivity.c
Normal file
148
drivers/cpufreq/amd_freq_sensitivity.c
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
* amd_freq_sensitivity.c: AMD frequency sensitivity feedback powersave bias
|
||||
* for the ondemand governor.
|
||||
*
|
||||
* Copyright (C) 2013 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Author: Jacob Shin <jacob.shin@amd.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/percpu-defs.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/mod_devicetable.h>
|
||||
|
||||
#include <asm/msr.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#include "cpufreq_governor.h"
|
||||
|
||||
#define MSR_AMD64_FREQ_SENSITIVITY_ACTUAL 0xc0010080
|
||||
#define MSR_AMD64_FREQ_SENSITIVITY_REFERENCE 0xc0010081
|
||||
#define CLASS_CODE_SHIFT 56
|
||||
#define POWERSAVE_BIAS_MAX 1000
|
||||
#define POWERSAVE_BIAS_DEF 400
|
||||
|
||||
struct cpu_data_t {
|
||||
u64 actual;
|
||||
u64 reference;
|
||||
unsigned int freq_prev;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct cpu_data_t, cpu_data);
|
||||
|
||||
static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
|
||||
unsigned int freq_next,
|
||||
unsigned int relation)
|
||||
{
|
||||
int sensitivity;
|
||||
long d_actual, d_reference;
|
||||
struct msr actual, reference;
|
||||
struct cpu_data_t *data = &per_cpu(cpu_data, policy->cpu);
|
||||
struct dbs_data *od_data = policy->governor_data;
|
||||
struct od_dbs_tuners *od_tuners = od_data->tuners;
|
||||
struct od_cpu_dbs_info_s *od_info =
|
||||
od_data->cdata->get_cpu_dbs_info_s(policy->cpu);
|
||||
|
||||
if (!od_info->freq_table)
|
||||
return freq_next;
|
||||
|
||||
rdmsr_on_cpu(policy->cpu, MSR_AMD64_FREQ_SENSITIVITY_ACTUAL,
|
||||
&actual.l, &actual.h);
|
||||
rdmsr_on_cpu(policy->cpu, MSR_AMD64_FREQ_SENSITIVITY_REFERENCE,
|
||||
&reference.l, &reference.h);
|
||||
actual.h &= 0x00ffffff;
|
||||
reference.h &= 0x00ffffff;
|
||||
|
||||
/* counter wrapped around, so stay on current frequency */
|
||||
if (actual.q < data->actual || reference.q < data->reference) {
|
||||
freq_next = policy->cur;
|
||||
goto out;
|
||||
}
|
||||
|
||||
d_actual = actual.q - data->actual;
|
||||
d_reference = reference.q - data->reference;
|
||||
|
||||
/* divide by 0, so stay on current frequency as well */
|
||||
if (d_reference == 0) {
|
||||
freq_next = policy->cur;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sensitivity = POWERSAVE_BIAS_MAX -
|
||||
(POWERSAVE_BIAS_MAX * (d_reference - d_actual) / d_reference);
|
||||
|
||||
clamp(sensitivity, 0, POWERSAVE_BIAS_MAX);
|
||||
|
||||
/* this workload is not CPU bound, so choose a lower freq */
|
||||
if (sensitivity < od_tuners->powersave_bias) {
|
||||
if (data->freq_prev == policy->cur)
|
||||
freq_next = policy->cur;
|
||||
|
||||
if (freq_next > policy->cur)
|
||||
freq_next = policy->cur;
|
||||
else if (freq_next < policy->cur)
|
||||
freq_next = policy->min;
|
||||
else {
|
||||
unsigned int index;
|
||||
|
||||
cpufreq_frequency_table_target(policy,
|
||||
od_info->freq_table, policy->cur - 1,
|
||||
CPUFREQ_RELATION_H, &index);
|
||||
freq_next = od_info->freq_table[index].frequency;
|
||||
}
|
||||
|
||||
data->freq_prev = freq_next;
|
||||
} else
|
||||
data->freq_prev = 0;
|
||||
|
||||
out:
|
||||
data->actual = actual.q;
|
||||
data->reference = reference.q;
|
||||
return freq_next;
|
||||
}
|
||||
|
||||
static int __init amd_freq_sensitivity_init(void)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
||||
return -ENODEV;
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_PROC_FEEDBACK))
|
||||
return -ENODEV;
|
||||
|
||||
if (rdmsrl_safe(MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, &val))
|
||||
return -ENODEV;
|
||||
|
||||
if (!(val >> CLASS_CODE_SHIFT))
|
||||
return -ENODEV;
|
||||
|
||||
od_register_powersave_bias_handler(amd_powersave_bias_target,
|
||||
POWERSAVE_BIAS_DEF);
|
||||
return 0;
|
||||
}
|
||||
late_initcall(amd_freq_sensitivity_init);
|
||||
|
||||
static void __exit amd_freq_sensitivity_exit(void)
|
||||
{
|
||||
od_unregister_powersave_bias_handler();
|
||||
}
|
||||
module_exit(amd_freq_sensitivity_exit);
|
||||
|
||||
static const struct x86_cpu_id amd_freq_sensitivity_ids[] = {
|
||||
X86_FEATURE_MATCH(X86_FEATURE_PROC_FEEDBACK),
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, amd_freq_sensitivity_ids);
|
||||
|
||||
MODULE_AUTHOR("Jacob Shin <jacob.shin@amd.com>");
|
||||
MODULE_DESCRIPTION("AMD frequency sensitivity feedback powersave bias for "
|
||||
"the ondemand governor.");
|
||||
MODULE_LICENSE("GPL");
|
Loading…
Reference in New Issue
Block a user