Merge branches 'pm-cpuidle' and 'pm-cpufreq'
Merge CPU power management updates for 6.6-rc1: - Rework the menu and teo cpuidle governors to avoid calling tick_nohz_get_sleep_length(), which is likely to become quite expensive going forward, too often and improve making decisions regarding whether or not to stop the scheduler tick in the teo governor (Rafael Wysocki). - Improve the performance of cpufreq_stats_create_table() in some cases (Liao Chang). - Fix two issues in the amd-pstate-ut cpufreq driver (Swapnil Sapkal). - Use clamp() helper macro to improve the code readability in cpufreq_verify_within_limits() (Liao Chang). - Set stale CPU frequency to minimum in intel_pstate (Doug Smythies). * pm-cpuidle: cpuidle: teo: Avoid unnecessary variable assignments cpuidle: menu: Skip tick_nohz_get_sleep_length() call in some cases cpuidle: teo: Gather statistics regarding whether or not to stop the tick cpuidle: teo: Skip tick_nohz_get_sleep_length() call in some cases cpuidle: teo: Do not call tick_nohz_get_sleep_length() upfront cpuidle: teo: Drop utilized from struct teo_cpu cpuidle: teo: Avoid stopping the tick unnecessarily when bailing out cpuidle: teo: Update idle duration estimate when choosing shallower state * pm-cpufreq: cpufreq: amd-pstate-ut: Fix kernel panic when loading the driver cpufreq: amd-pstate-ut: Remove module parameter access cpufreq: Use clamp() helper macro to improve the code readability cpufreq: intel_pstate: set stale CPU frequency to minimum cpufreq: stats: Improve the performance of cpufreq_stats_create_table()
This commit is contained in:
commit
1201c50c1e
@ -64,27 +64,9 @@ static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
|
||||
static bool get_shared_mem(void)
|
||||
{
|
||||
bool result = false;
|
||||
char path[] = "/sys/module/amd_pstate/parameters/shared_mem";
|
||||
char buf[5] = {0};
|
||||
struct file *filp = NULL;
|
||||
loff_t pos = 0;
|
||||
ssize_t ret;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_CPPC)) {
|
||||
filp = filp_open(path, O_RDONLY, 0);
|
||||
if (IS_ERR(filp))
|
||||
pr_err("%s unable to open %s file!\n", __func__, path);
|
||||
else {
|
||||
ret = kernel_read(filp, &buf, sizeof(buf), &pos);
|
||||
if (ret < 0)
|
||||
pr_err("%s read %s file fail ret=%ld!\n",
|
||||
__func__, path, (long)ret);
|
||||
filp_close(filp, NULL);
|
||||
}
|
||||
|
||||
if ('Y' == *buf)
|
||||
result = true;
|
||||
}
|
||||
if (!boot_cpu_has(X86_FEATURE_CPPC))
|
||||
result = true;
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -158,7 +140,7 @@ static void amd_pstate_ut_check_perf(u32 index)
|
||||
if (ret) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
|
||||
return;
|
||||
goto skip_test;
|
||||
}
|
||||
|
||||
nominal_perf = cppc_perf.nominal_perf;
|
||||
@ -169,7 +151,7 @@ static void amd_pstate_ut_check_perf(u32 index)
|
||||
if (ret) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
|
||||
return;
|
||||
goto skip_test;
|
||||
}
|
||||
|
||||
nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
|
||||
@ -187,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32 index)
|
||||
nominal_perf, cpudata->nominal_perf,
|
||||
lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf,
|
||||
lowest_perf, cpudata->lowest_perf);
|
||||
return;
|
||||
goto skip_test;
|
||||
}
|
||||
|
||||
if (!((highest_perf >= nominal_perf) &&
|
||||
@ -198,11 +180,15 @@ static void amd_pstate_ut_check_perf(u32 index)
|
||||
pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
|
||||
__func__, cpu, highest_perf, nominal_perf,
|
||||
lowest_nonlinear_perf, lowest_perf);
|
||||
return;
|
||||
goto skip_test;
|
||||
}
|
||||
cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
return;
|
||||
skip_test:
|
||||
cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -230,14 +216,14 @@ static void amd_pstate_ut_check_freq(u32 index)
|
||||
pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
|
||||
__func__, cpu, cpudata->max_freq, cpudata->nominal_freq,
|
||||
cpudata->lowest_nonlinear_freq, cpudata->min_freq);
|
||||
return;
|
||||
goto skip_test;
|
||||
}
|
||||
|
||||
if (cpudata->min_freq != policy->min) {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n",
|
||||
__func__, cpu, cpudata->min_freq, policy->min);
|
||||
return;
|
||||
goto skip_test;
|
||||
}
|
||||
|
||||
if (cpudata->boost_supported) {
|
||||
@ -249,16 +235,20 @@ static void amd_pstate_ut_check_freq(u32 index)
|
||||
pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
|
||||
__func__, cpu, policy->max, cpudata->max_freq,
|
||||
cpudata->nominal_freq);
|
||||
return;
|
||||
goto skip_test;
|
||||
}
|
||||
} else {
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
|
||||
pr_err("%s cpu%d must support boost!\n", __func__, cpu);
|
||||
return;
|
||||
goto skip_test;
|
||||
}
|
||||
cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
|
||||
return;
|
||||
skip_test:
|
||||
cpufreq_cpu_put(policy);
|
||||
}
|
||||
|
||||
static int __init amd_pstate_ut_init(void)
|
||||
|
@ -243,7 +243,8 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
|
||||
|
||||
/* Find valid-unique entries */
|
||||
cpufreq_for_each_valid_entry(pos, policy->freq_table)
|
||||
if (freq_table_get_index(stats, pos->frequency) == -1)
|
||||
if (policy->freq_table_sorted != CPUFREQ_TABLE_UNSORTED ||
|
||||
freq_table_get_index(stats, pos->frequency) == -1)
|
||||
stats->freq_table[i++] = pos->frequency;
|
||||
|
||||
stats->state_num = i;
|
||||
|
@ -2609,6 +2609,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
|
||||
intel_pstate_clear_update_util_hook(policy->cpu);
|
||||
intel_pstate_hwp_set(policy->cpu);
|
||||
}
|
||||
/*
|
||||
* policy->cur is never updated with the intel_pstate driver, but it
|
||||
* is used as a stale frequency value. So, keep it within limits.
|
||||
*/
|
||||
policy->cur = policy->min;
|
||||
|
||||
mutex_unlock(&intel_pstate_limits_lock);
|
||||
|
||||
|
14
drivers/cpuidle/governors/gov.h
Normal file
14
drivers/cpuidle/governors/gov.h
Normal file
@ -0,0 +1,14 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
/* Common definitions for cpuidle governors. */
|
||||
|
||||
#ifndef __CPUIDLE_GOVERNOR_H
|
||||
#define __CPUIDLE_GOVERNOR_H
|
||||
|
||||
/*
|
||||
* Idle state target residency threshold used for deciding whether or not to
|
||||
* check the time till the closest expected timer event.
|
||||
*/
|
||||
#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC)
|
||||
|
||||
#endif /* __CPUIDLE_GOVERNOR_H */
|
@ -19,6 +19,8 @@
|
||||
#include <linux/sched/stat.h>
|
||||
#include <linux/math64.h>
|
||||
|
||||
#include "gov.h"
|
||||
|
||||
#define BUCKETS 12
|
||||
#define INTERVAL_SHIFT 3
|
||||
#define INTERVALS (1UL << INTERVAL_SHIFT)
|
||||
@ -166,8 +168,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
|
||||
* of points is below a threshold. If it is... then use the
|
||||
* average of these 8 points as the estimated value.
|
||||
*/
|
||||
static unsigned int get_typical_interval(struct menu_device *data,
|
||||
unsigned int predicted_us)
|
||||
static unsigned int get_typical_interval(struct menu_device *data)
|
||||
{
|
||||
int i, divisor;
|
||||
unsigned int min, max, thresh, avg;
|
||||
@ -195,11 +196,7 @@ again:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the result of the computation is going to be discarded anyway,
|
||||
* avoid the computation altogether.
|
||||
*/
|
||||
if (min >= predicted_us)
|
||||
if (!max)
|
||||
return UINT_MAX;
|
||||
|
||||
if (divisor == INTERVALS)
|
||||
@ -267,7 +264,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
{
|
||||
struct menu_device *data = this_cpu_ptr(&menu_devices);
|
||||
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
|
||||
unsigned int predicted_us;
|
||||
u64 predicted_ns;
|
||||
u64 interactivity_req;
|
||||
unsigned int nr_iowaiters;
|
||||
@ -279,16 +275,41 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
data->needs_update = 0;
|
||||
}
|
||||
|
||||
/* determine the expected residency time, round up */
|
||||
delta = tick_nohz_get_sleep_length(&delta_tick);
|
||||
if (unlikely(delta < 0)) {
|
||||
delta = 0;
|
||||
delta_tick = 0;
|
||||
}
|
||||
data->next_timer_ns = delta;
|
||||
|
||||
nr_iowaiters = nr_iowait_cpu(dev->cpu);
|
||||
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
|
||||
|
||||
/* Find the shortest expected idle interval. */
|
||||
predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
|
||||
if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
|
||||
unsigned int timer_us;
|
||||
|
||||
/* Determine the time till the closest timer. */
|
||||
delta = tick_nohz_get_sleep_length(&delta_tick);
|
||||
if (unlikely(delta < 0)) {
|
||||
delta = 0;
|
||||
delta_tick = 0;
|
||||
}
|
||||
|
||||
data->next_timer_ns = delta;
|
||||
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
|
||||
|
||||
/* Round up the result for half microseconds. */
|
||||
timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
|
||||
data->next_timer_ns *
|
||||
data->correction_factor[data->bucket],
|
||||
RESOLUTION * DECAY * NSEC_PER_USEC);
|
||||
/* Use the lowest expected idle interval to pick the idle state. */
|
||||
predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
|
||||
} else {
|
||||
/*
|
||||
* Because the next timer event is not going to be determined
|
||||
* in this case, assume that without the tick the closest timer
|
||||
* will be in distant future and that the closest tick will occur
|
||||
* after 1/2 of the tick period.
|
||||
*/
|
||||
data->next_timer_ns = KTIME_MAX;
|
||||
delta_tick = TICK_NSEC / 2;
|
||||
data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
|
||||
}
|
||||
|
||||
if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
|
||||
((data->next_timer_ns < drv->states[1].target_residency_ns ||
|
||||
@ -303,16 +324,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Round up the result for half microseconds. */
|
||||
predicted_us = div_u64(data->next_timer_ns *
|
||||
data->correction_factor[data->bucket] +
|
||||
(RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
|
||||
RESOLUTION * DECAY * NSEC_PER_USEC);
|
||||
/* Use the lowest expected idle interval to pick the idle state. */
|
||||
predicted_ns = (u64)min(predicted_us,
|
||||
get_typical_interval(data, predicted_us)) *
|
||||
NSEC_PER_USEC;
|
||||
|
||||
if (tick_nohz_tick_stopped()) {
|
||||
/*
|
||||
* If the tick is already stopped, the cost of possible short
|
||||
|
@ -140,6 +140,8 @@
|
||||
#include <linux/sched/topology.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
#include "gov.h"
|
||||
|
||||
/*
|
||||
* The number of bits to shift the CPU's capacity by in order to determine
|
||||
* the utilized threshold.
|
||||
@ -152,7 +154,6 @@
|
||||
*/
|
||||
#define UTIL_THRESHOLD_SHIFT 6
|
||||
|
||||
|
||||
/*
|
||||
* The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
|
||||
* is used for decreasing metrics on a regular basis.
|
||||
@ -186,8 +187,8 @@ struct teo_bin {
|
||||
* @total: Grand total of the "intercepts" and "hits" metrics for all bins.
|
||||
* @next_recent_idx: Index of the next @recent_idx entry to update.
|
||||
* @recent_idx: Indices of bins corresponding to recent "intercepts".
|
||||
* @tick_hits: Number of "hits" after TICK_NSEC.
|
||||
* @util_threshold: Threshold above which the CPU is considered utilized
|
||||
* @utilized: Whether the last sleep on the CPU happened while utilized
|
||||
*/
|
||||
struct teo_cpu {
|
||||
s64 time_span_ns;
|
||||
@ -196,8 +197,8 @@ struct teo_cpu {
|
||||
unsigned int total;
|
||||
int next_recent_idx;
|
||||
int recent_idx[NR_RECENT];
|
||||
unsigned int tick_hits;
|
||||
unsigned long util_threshold;
|
||||
bool utilized;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
|
||||
@ -228,6 +229,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||
{
|
||||
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
|
||||
int i, idx_timer = 0, idx_duration = 0;
|
||||
s64 target_residency_ns;
|
||||
u64 measured_ns;
|
||||
|
||||
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
|
||||
@ -268,7 +270,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||
* fall into.
|
||||
*/
|
||||
for (i = 0; i < drv->state_count; i++) {
|
||||
s64 target_residency_ns = drv->states[i].target_residency_ns;
|
||||
struct teo_bin *bin = &cpu_data->state_bins[i];
|
||||
|
||||
bin->hits -= bin->hits >> DECAY_SHIFT;
|
||||
@ -276,6 +277,8 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||
|
||||
cpu_data->total += bin->hits + bin->intercepts;
|
||||
|
||||
target_residency_ns = drv->states[i].target_residency_ns;
|
||||
|
||||
if (target_residency_ns <= cpu_data->sleep_length_ns) {
|
||||
idx_timer = i;
|
||||
if (target_residency_ns <= measured_ns)
|
||||
@ -290,6 +293,26 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||
if (cpu_data->recent_idx[i] >= 0)
|
||||
cpu_data->state_bins[cpu_data->recent_idx[i]].recent--;
|
||||
|
||||
/*
|
||||
* If the deepest state's target residency is below the tick length,
|
||||
* make a record of it to help teo_select() decide whether or not
|
||||
* to stop the tick. This effectively adds an extra hits-only bin
|
||||
* beyond the last state-related one.
|
||||
*/
|
||||
if (target_residency_ns < TICK_NSEC) {
|
||||
cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT;
|
||||
|
||||
cpu_data->total += cpu_data->tick_hits;
|
||||
|
||||
if (TICK_NSEC <= cpu_data->sleep_length_ns) {
|
||||
idx_timer = drv->state_count;
|
||||
if (TICK_NSEC <= measured_ns) {
|
||||
cpu_data->tick_hits += PULSE;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the measured idle duration falls into the same bin as the sleep
|
||||
* length, this is a "hit", so update the "hits" metric for that bin.
|
||||
@ -305,18 +328,14 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
|
||||
cpu_data->recent_idx[i] = idx_duration;
|
||||
}
|
||||
|
||||
end:
|
||||
cpu_data->total += PULSE;
|
||||
}
|
||||
|
||||
static bool teo_time_ok(u64 interval_ns)
|
||||
static bool teo_state_ok(int i, struct cpuidle_driver *drv)
|
||||
{
|
||||
return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
|
||||
}
|
||||
|
||||
static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
|
||||
{
|
||||
return (drv->states[idx].target_residency_ns +
|
||||
drv->states[idx+1].target_residency_ns) / 2;
|
||||
return !tick_nohz_tick_stopped() ||
|
||||
drv->states[i].target_residency_ns >= TICK_NSEC;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -356,6 +375,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
{
|
||||
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
|
||||
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
|
||||
ktime_t delta_tick = TICK_NSEC / 2;
|
||||
unsigned int tick_intercept_sum = 0;
|
||||
unsigned int idx_intercept_sum = 0;
|
||||
unsigned int intercept_sum = 0;
|
||||
unsigned int idx_recent_sum = 0;
|
||||
@ -365,7 +386,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
int constraint_idx = 0;
|
||||
int idx0 = 0, idx = -1;
|
||||
bool alt_intercepts, alt_recent;
|
||||
ktime_t delta_tick;
|
||||
bool cpu_utilized;
|
||||
s64 duration_ns;
|
||||
int i;
|
||||
|
||||
@ -375,44 +396,48 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
}
|
||||
|
||||
cpu_data->time_span_ns = local_clock();
|
||||
|
||||
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
|
||||
cpu_data->sleep_length_ns = duration_ns;
|
||||
/*
|
||||
* Set the expected sleep length to infinity in case of an early
|
||||
* return.
|
||||
*/
|
||||
cpu_data->sleep_length_ns = KTIME_MAX;
|
||||
|
||||
/* Check if there is any choice in the first place. */
|
||||
if (drv->state_count < 2) {
|
||||
idx = 0;
|
||||
goto end;
|
||||
}
|
||||
if (!dev->states_usage[0].disable) {
|
||||
idx = 0;
|
||||
if (drv->states[1].target_residency_ns > duration_ns)
|
||||
goto end;
|
||||
goto out_tick;
|
||||
}
|
||||
|
||||
cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
|
||||
if (!dev->states_usage[0].disable)
|
||||
idx = 0;
|
||||
|
||||
cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
|
||||
/*
|
||||
* If the CPU is being utilized over the threshold and there are only 2
|
||||
* states to choose from, the metrics need not be considered, so choose
|
||||
* the shallowest non-polling state and exit.
|
||||
*/
|
||||
if (drv->state_count < 3 && cpu_data->utilized) {
|
||||
for (i = 0; i < drv->state_count; ++i) {
|
||||
if (!dev->states_usage[i].disable &&
|
||||
!(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) {
|
||||
idx = i;
|
||||
goto end;
|
||||
}
|
||||
if (drv->state_count < 3 && cpu_utilized) {
|
||||
/*
|
||||
* If state 0 is enabled and it is not a polling one, select it
|
||||
* right away unless the scheduler tick has been stopped, in
|
||||
* which case care needs to be taken to leave the CPU in a deep
|
||||
* enough state in case it is not woken up any time soon after
|
||||
* all. If state 1 is disabled, though, state 0 must be used
|
||||
* anyway.
|
||||
*/
|
||||
if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
|
||||
teo_state_ok(0, drv)) || dev->states_usage[1].disable) {
|
||||
idx = 0;
|
||||
goto out_tick;
|
||||
}
|
||||
/* Assume that state 1 is not a polling one and use it. */
|
||||
idx = 1;
|
||||
duration_ns = drv->states[1].target_residency_ns;
|
||||
goto end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the deepest idle state whose target residency does not exceed
|
||||
* the current sleep length and the deepest idle state not deeper than
|
||||
* the former whose exit latency does not exceed the current latency
|
||||
* constraint. Compute the sums of metrics for early wakeup pattern
|
||||
* detection.
|
||||
*/
|
||||
/* Compute the sums of metrics for early wakeup pattern detection. */
|
||||
for (i = 1; i < drv->state_count; i++) {
|
||||
struct teo_bin *prev_bin = &cpu_data->state_bins[i-1];
|
||||
struct cpuidle_state *s = &drv->states[i];
|
||||
@ -428,19 +453,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
if (dev->states_usage[i].disable)
|
||||
continue;
|
||||
|
||||
if (idx < 0) {
|
||||
idx = i; /* first enabled state */
|
||||
idx0 = i;
|
||||
}
|
||||
|
||||
if (s->target_residency_ns > duration_ns)
|
||||
break;
|
||||
if (idx < 0)
|
||||
idx0 = i; /* first enabled state */
|
||||
|
||||
idx = i;
|
||||
|
||||
if (s->exit_latency_ns <= latency_req)
|
||||
constraint_idx = i;
|
||||
|
||||
/* Save the sums for the current state. */
|
||||
idx_intercept_sum = intercept_sum;
|
||||
idx_hit_sum = hit_sum;
|
||||
idx_recent_sum = recent_sum;
|
||||
@ -449,11 +470,21 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
/* Avoid unnecessary overhead. */
|
||||
if (idx < 0) {
|
||||
idx = 0; /* No states enabled, must use 0. */
|
||||
goto end;
|
||||
} else if (idx == idx0) {
|
||||
goto out_tick;
|
||||
}
|
||||
|
||||
if (idx == idx0) {
|
||||
/*
|
||||
* Only one idle state is enabled, so use it, but do not
|
||||
* allow the tick to be stopped it is shallow enough.
|
||||
*/
|
||||
duration_ns = drv->states[idx].target_residency_ns;
|
||||
goto end;
|
||||
}
|
||||
|
||||
tick_intercept_sum = intercept_sum +
|
||||
cpu_data->state_bins[drv->state_count-1].intercepts;
|
||||
|
||||
/*
|
||||
* If the sum of the intercepts metric for all of the idle states
|
||||
* shallower than the current candidate one (idx) is greater than the
|
||||
@ -461,13 +492,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
* all of the deeper states, or the sum of the numbers of recent
|
||||
* intercepts over all of the states shallower than the candidate one
|
||||
* is greater than a half of the number of recent events taken into
|
||||
* account, the CPU is likely to wake up early, so find an alternative
|
||||
* idle state to select.
|
||||
* account, a shallower idle state is likely to be a better choice.
|
||||
*/
|
||||
alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum;
|
||||
alt_recent = idx_recent_sum > NR_RECENT / 2;
|
||||
if (alt_recent || alt_intercepts) {
|
||||
s64 first_suitable_span_ns = duration_ns;
|
||||
int first_suitable_idx = idx;
|
||||
|
||||
/*
|
||||
@ -476,44 +505,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
* cases (both with respect to intercepts overall and with
|
||||
* respect to the recent intercepts only) in the past.
|
||||
*
|
||||
* Take the possible latency constraint and duration limitation
|
||||
* present if the tick has been stopped already into account.
|
||||
* Take the possible duration limitation present if the tick
|
||||
* has been stopped already into account.
|
||||
*/
|
||||
intercept_sum = 0;
|
||||
recent_sum = 0;
|
||||
|
||||
for (i = idx - 1; i >= 0; i--) {
|
||||
struct teo_bin *bin = &cpu_data->state_bins[i];
|
||||
s64 span_ns;
|
||||
|
||||
intercept_sum += bin->intercepts;
|
||||
recent_sum += bin->recent;
|
||||
|
||||
span_ns = teo_middle_of_bin(i, drv);
|
||||
|
||||
if ((!alt_recent || 2 * recent_sum > idx_recent_sum) &&
|
||||
(!alt_intercepts ||
|
||||
2 * intercept_sum > idx_intercept_sum)) {
|
||||
if (teo_time_ok(span_ns) &&
|
||||
!dev->states_usage[i].disable) {
|
||||
/*
|
||||
* Use the current state unless it is too
|
||||
* shallow or disabled, in which case take the
|
||||
* first enabled state that is deep enough.
|
||||
*/
|
||||
if (teo_state_ok(i, drv) &&
|
||||
!dev->states_usage[i].disable)
|
||||
idx = i;
|
||||
duration_ns = span_ns;
|
||||
} else {
|
||||
/*
|
||||
* The current state is too shallow or
|
||||
* disabled, so take the first enabled
|
||||
* deeper state with suitable time span.
|
||||
*/
|
||||
else
|
||||
idx = first_suitable_idx;
|
||||
duration_ns = first_suitable_span_ns;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (dev->states_usage[i].disable)
|
||||
continue;
|
||||
|
||||
if (!teo_time_ok(span_ns)) {
|
||||
if (!teo_state_ok(i, drv)) {
|
||||
/*
|
||||
* The current state is too shallow, but if an
|
||||
* alternative candidate state has been found,
|
||||
@ -525,7 +549,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
break;
|
||||
}
|
||||
|
||||
first_suitable_span_ns = span_ns;
|
||||
first_suitable_idx = i;
|
||||
}
|
||||
}
|
||||
@ -539,31 +562,75 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
|
||||
/*
|
||||
* If the CPU is being utilized over the threshold, choose a shallower
|
||||
* non-polling state to improve latency
|
||||
* non-polling state to improve latency, unless the scheduler tick has
|
||||
* been stopped already and the shallower state's target residency is
|
||||
* not sufficiently large.
|
||||
*/
|
||||
if (cpu_data->utilized)
|
||||
idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
|
||||
if (cpu_utilized) {
|
||||
i = teo_find_shallower_state(drv, dev, idx, KTIME_MAX, true);
|
||||
if (teo_state_ok(i, drv))
|
||||
idx = i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip the timers check if state 0 is the current candidate one,
|
||||
* because an immediate non-timer wakeup is expected in that case.
|
||||
*/
|
||||
if (!idx)
|
||||
goto out_tick;
|
||||
|
||||
/*
|
||||
* If state 0 is a polling one, check if the target residency of
|
||||
* the current candidate state is low enough and skip the timers
|
||||
* check in that case too.
|
||||
*/
|
||||
if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
|
||||
drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS)
|
||||
goto out_tick;
|
||||
|
||||
duration_ns = tick_nohz_get_sleep_length(&delta_tick);
|
||||
cpu_data->sleep_length_ns = duration_ns;
|
||||
|
||||
/*
|
||||
* If the closest expected timer is before the terget residency of the
|
||||
* candidate state, a shallower one needs to be found.
|
||||
*/
|
||||
if (drv->states[idx].target_residency_ns > duration_ns) {
|
||||
i = teo_find_shallower_state(drv, dev, idx, duration_ns, false);
|
||||
if (teo_state_ok(i, drv))
|
||||
idx = i;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the selected state's target residency is below the tick length
|
||||
* and intercepts occurring before the tick length are the majority of
|
||||
* total wakeup events, do not stop the tick.
|
||||
*/
|
||||
if (drv->states[idx].target_residency_ns < TICK_NSEC &&
|
||||
tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8)
|
||||
duration_ns = TICK_NSEC / 2;
|
||||
|
||||
end:
|
||||
/*
|
||||
* Don't stop the tick if the selected state is a polling one or if the
|
||||
* expected idle duration is shorter than the tick period length.
|
||||
* Allow the tick to be stopped unless the selected state is a polling
|
||||
* one or the expected idle duration is shorter than the tick period
|
||||
* length.
|
||||
*/
|
||||
if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
|
||||
duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
|
||||
*stop_tick = false;
|
||||
if ((!(drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
|
||||
duration_ns >= TICK_NSEC) || tick_nohz_tick_stopped())
|
||||
return idx;
|
||||
|
||||
/*
|
||||
* The tick is not going to be stopped, so if the target
|
||||
* residency of the state to be returned is not within the time
|
||||
* till the closest timer including the tick, try to correct
|
||||
* that.
|
||||
*/
|
||||
if (idx > idx0 &&
|
||||
drv->states[idx].target_residency_ns > delta_tick)
|
||||
idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
|
||||
}
|
||||
/*
|
||||
* The tick is not going to be stopped, so if the target residency of
|
||||
* the state to be returned is not within the time till the closest
|
||||
* timer including the tick, try to correct that.
|
||||
*/
|
||||
if (idx > idx0 &&
|
||||
drv->states[idx].target_residency_ns > delta_tick)
|
||||
idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
|
||||
|
||||
out_tick:
|
||||
*stop_tick = false;
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/pm_qos.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/minmax.h>
|
||||
|
||||
/*********************************************************************
|
||||
* CPUFREQ INTERFACE *
|
||||
@ -467,17 +468,8 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *poli
|
||||
unsigned int min,
|
||||
unsigned int max)
|
||||
{
|
||||
if (policy->min < min)
|
||||
policy->min = min;
|
||||
if (policy->max < min)
|
||||
policy->max = min;
|
||||
if (policy->min > max)
|
||||
policy->min = max;
|
||||
if (policy->max > max)
|
||||
policy->max = max;
|
||||
if (policy->min > policy->max)
|
||||
policy->min = policy->max;
|
||||
return;
|
||||
policy->max = clamp(policy->max, min, max);
|
||||
policy->min = clamp(policy->min, min, policy->max);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
Loading…
Reference in New Issue
Block a user