From a4a008e53c9e5ac8d6070bd1b2eddfc1ae0d2338 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 31 Aug 2018 11:22:29 +0300 Subject: [PATCH 01/14] intel_idle: Get rid of custom ICPU() macro Replace custom grown macro with generic INTEL_CPU_FAM6() one. No functional change intended. Signed-off-by: Andy Shevchenko Acked-by: Jacob Pan Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 75 +++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 39 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b2ccce5fb071..791b8a366e6e 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1066,46 +1066,43 @@ static const struct idle_cpu idle_cpu_dnv = { .disable_promotion_to_c1e = true, }; -#define ICPU(model, cpu) \ - { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&cpu } - static const struct x86_cpu_id intel_idle_ids[] __initconst = { - ICPU(INTEL_FAM6_NEHALEM_EP, idle_cpu_nehalem), - ICPU(INTEL_FAM6_NEHALEM, idle_cpu_nehalem), - ICPU(INTEL_FAM6_NEHALEM_G, idle_cpu_nehalem), - ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem), - ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem), - ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem), - ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom), - ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft), - ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem), - ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb), - ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb), - ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom), - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt), - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier), - ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht), - ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb), - ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt), - ICPU(INTEL_FAM6_HASWELL_CORE, idle_cpu_hsw), - ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw), - ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw), - ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw), - ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn), - ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw), - ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw), - ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw), - ICPU(INTEL_FAM6_BROADWELL_XEON_D, idle_cpu_bdw), - ICPU(INTEL_FAM6_SKYLAKE_MOBILE, idle_cpu_skl), - ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, idle_cpu_skl), - ICPU(INTEL_FAM6_KABYLAKE_MOBILE, idle_cpu_skl), - ICPU(INTEL_FAM6_KABYLAKE_DESKTOP, idle_cpu_skl), - ICPU(INTEL_FAM6_SKYLAKE_X, idle_cpu_skx), - ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), - ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl), - ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, idle_cpu_bxt), - ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), + INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nehalem), + INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), + INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), + INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), + INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nehalem), + INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nehalem), + INTEL_CPU_FAM6(ATOM_PINEVIEW, idle_cpu_atom), + INTEL_CPU_FAM6(ATOM_LINCROFT, idle_cpu_lincroft), + INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nehalem), + INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), + INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snb), + INTEL_CPU_FAM6(ATOM_CEDARVIEW, idle_cpu_atom), + INTEL_CPU_FAM6(ATOM_SILVERMONT1, idle_cpu_byt), + INTEL_CPU_FAM6(ATOM_MERRIFIELD, idle_cpu_tangier), + INTEL_CPU_FAM6(ATOM_AIRMONT, idle_cpu_cht), + INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), + INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), + INTEL_CPU_FAM6(HASWELL_CORE, idle_cpu_hsw), + INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsw), + INTEL_CPU_FAM6(HASWELL_ULT, idle_cpu_hsw), + INTEL_CPU_FAM6(HASWELL_GT3E, idle_cpu_hsw), + INTEL_CPU_FAM6(ATOM_SILVERMONT2, idle_cpu_avn), + INTEL_CPU_FAM6(BROADWELL_CORE, idle_cpu_bdw), + INTEL_CPU_FAM6(BROADWELL_GT3E, idle_cpu_bdw), + INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdw), + INTEL_CPU_FAM6(BROADWELL_XEON_D, idle_cpu_bdw), + INTEL_CPU_FAM6(SKYLAKE_MOBILE, idle_cpu_skl), + INTEL_CPU_FAM6(SKYLAKE_DESKTOP, idle_cpu_skl), + INTEL_CPU_FAM6(KABYLAKE_MOBILE, idle_cpu_skl), + INTEL_CPU_FAM6(KABYLAKE_DESKTOP, idle_cpu_skl), + INTEL_CPU_FAM6(SKYLAKE_X, idle_cpu_skx), + INTEL_CPU_FAM6(XEON_PHI_KNL, idle_cpu_knl), + INTEL_CPU_FAM6(XEON_PHI_KNM, idle_cpu_knl), + INTEL_CPU_FAM6(ATOM_GOLDMONT, idle_cpu_bxt), + INTEL_CPU_FAM6(ATOM_GEMINI_LAKE, idle_cpu_bxt), + INTEL_CPU_FAM6(ATOM_DENVERTON, idle_cpu_dnv), {} }; From 6a5f95b5a4f4ff29e4071bc5b95f8f3a2aef046b Mon Sep 17 00:00:00 2001 From: Fieah Lim Date: Tue, 11 Sep 2018 03:59:01 +0800 Subject: [PATCH 02/14] cpuidle: Remove unnecessary wrapper cpuidle_get_last_residency() cpuidle_get_last_residency() is just a wrapper for retrieving the last_residency member of struct cpuidle_device. It's also weirdly the only wrapper function for accessing cpuidle_* struct member (by my best guess is it could be a leftover from v2.x). Anyhow, since the only two users (the ladder and menu governors) can access dev->last_residency directly, and it's more intuitive to do it that way, let's just get rid of the wrapper. This patch tidies up CPU idle code a bit without functional changes. Signed-off-by: Fieah Lim [ rjw: Changelog cleanup ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/ladder.c | 2 +- drivers/cpuidle/governors/menu.c | 2 +- include/linux/cpuidle.h | 10 ---------- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 704880a6612a..f0dddc66af26 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -80,7 +80,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state = &ldev->states[last_idx]; - last_residency = cpuidle_get_last_residency(dev) - drv->states[last_idx].exit_latency; + last_residency = dev->last_residency - drv->states[last_idx].exit_latency; /* consider promotion */ if (last_idx < drv->state_count - 1 && diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index e26a40971b26..021b08dd139c 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -514,7 +514,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) measured_us = 9 * MAX_INTERESTING / 10; } else { /* measured value */ - measured_us = cpuidle_get_last_residency(dev); + measured_us = dev->last_residency; /* Deduct exit latency */ if (measured_us > 2 * target->exit_latency) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 4325d6fdde9b..d262f620890e 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -99,16 +99,6 @@ struct cpuidle_device { DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev); -/** - * cpuidle_get_last_residency - retrieves the last state's residency time - * @dev: the target CPU - */ -static inline int cpuidle_get_last_residency(struct cpuidle_device *dev) -{ - return dev->last_residency; -} - - /**************************** * CPUIDLE DRIVER INTERFACE * ****************************/ From 7037b43e0076cce1c07c72540e219fb5db7ea01f Mon Sep 17 00:00:00 2001 From: Fieah Lim Date: Tue, 11 Sep 2018 05:47:25 +0800 Subject: [PATCH 03/14] cpuidle: enter_state: Don't needlessly calculate diff time Currently, ktime_us_delta() is invoked unconditionally to compute the idle residency of the CPU, but it only makes sense to do that if a valid idle state has been entered, so move the ktime_us_delta() invocation after the entered_state >= 0 check. While at it, merge two comment blocks in there into one and drop a space between type casting of diff. This patch has no functional changes. Signed-off-by: Fieah Lim [ rjw: Changelog cleanup, comment format fix ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 6df894d65d9e..4a97446f66d8 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -247,17 +247,17 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, if (!cpuidle_state_is_coupled(drv, index)) local_irq_enable(); - diff = ktime_us_delta(time_end, time_start); - if (diff > INT_MAX) - diff = INT_MAX; - - dev->last_residency = (int) diff; - if (entered_state >= 0) { - /* Update cpuidle counters */ - /* This can be moved to within driver enter routine + /* + * Update cpuidle counters + * This can be moved to within driver enter routine, * but that results in multiple copies of same code. */ + diff = ktime_us_delta(time_end, time_start); + if (diff > INT_MAX) + diff = INT_MAX; + + dev->last_residency = (int)diff; dev->states_usage[entered_state].time += dev->last_residency; dev->states_usage[entered_state].usage++; } else { From 03dba278043358bbbf4f029be169e1e73d2fbe2b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 1 Oct 2018 11:56:21 +0200 Subject: [PATCH 04/14] cpuidle: menu: Replace data->predicted_us with local variable The predicted_us field in struct menu_device is only accessed in menu_select(), so replace it with a local variable in that function. With that, stop using expected_interval instead of predicted_us to store the new predicted idle duration value if it is set to the selected state's target residency which is quite confusing. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano --- drivers/cpuidle/governors/menu.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 021b08dd139c..b6684fd89085 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -124,7 +124,6 @@ struct menu_device { int tick_wakeup; unsigned int next_timer_us; - unsigned int predicted_us; unsigned int bucket; unsigned int correction_factor[BUCKETS]; unsigned int intervals[INTERVALS]; @@ -290,6 +289,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int idx; unsigned int interactivity_req; unsigned int expected_interval; + unsigned int predicted_us; unsigned long nr_iowaiters, cpu_load; ktime_t delta_next; @@ -315,7 +315,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * operands are 32 bits. * Make sure to round up for half microseconds. */ - data->predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us * + predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); @@ -341,7 +341,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* * Use the lowest expected idle interval to pick the idle state. */ - data->predicted_us = min(data->predicted_us, expected_interval); + predicted_us = min(predicted_us, expected_interval); if (tick_nohz_tick_stopped()) { /* @@ -352,19 +352,18 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * the known time till the closest timer event for the idle * state selection. */ - if (data->predicted_us < TICK_USEC) - data->predicted_us = ktime_to_us(delta_next); + if (predicted_us < TICK_USEC) + predicted_us = ktime_to_us(delta_next); } else { /* * Use the performance multiplier and the user-configurable * latency_req to determine the maximum exit latency. */ - interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load); + interactivity_req = predicted_us / performance_multiplier(nr_iowaiters, cpu_load); if (latency_req > interactivity_req) latency_req = interactivity_req; } - expected_interval = data->predicted_us; /* * Find the idle state with the lowest power while satisfying * our constraints. @@ -378,8 +377,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, continue; if (idx == -1) idx = i; /* first enabled state */ - if (s->target_residency > data->predicted_us) { - if (data->predicted_us < TICK_USEC) + if (s->target_residency > predicted_us) { + if (predicted_us < TICK_USEC) break; if (!tick_nohz_tick_stopped()) { @@ -389,7 +388,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * tick in that case and let the governor run * again in the next iteration of the loop. */ - expected_interval = drv->states[idx].target_residency; + predicted_us = drv->states[idx].target_residency; break; } @@ -412,7 +411,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * expected idle duration so that the tick is retained * as long as that target residency is low enough. */ - expected_interval = drv->states[idx].target_residency; + predicted_us = drv->states[idx].target_residency; break; } idx = i; @@ -426,7 +425,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * expected idle duration is shorter than the tick period length. */ if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) || - expected_interval < TICK_USEC) && !tick_nohz_tick_stopped()) { + predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) { unsigned int delta_next_us = ktime_to_us(delta_next); *stop_tick = false; From 5f26bdceb9c0a5e6c696aa2899d077cd3ae93413 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Oct 2018 23:42:02 +0200 Subject: [PATCH 05/14] cpuidle: menu: Fix wakeup statistics updates for polling state If the CPU exits the "polling" state due to the time limit in the loop in poll_idle(), this is not a real wakeup and it just means that the "polling" state selection was not adequate. The governor mispredicted short idle duration, but had a more suitable state been selected, the CPU might have spent more time in it. In fact, there is no reason to expect that there would have been a wakeup event earlier than the next timer in that case. Handling such cases as regular wakeups in menu_update() may cause the menu governor to make suboptimal decisions going forward, but ignoring them altogether would not be correct either, because every time menu_select() is invoked, it makes a separate new attempt to predict the idle duration taking distinct time to the closest timer event as input and the outcomes of all those attempts should be recorded. For this reason, make menu_update() always assume that if the "polling" state was exited due to the time limit, the next proper wakeup event for the CPU would be the next timer event (not including the tick). Fixes: a37b969a61c1 "cpuidle: poll_state: Add time limit to poll_idle()" Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Lezcano --- drivers/cpuidle/governors/menu.c | 10 ++++++++++ drivers/cpuidle/poll_state.c | 6 +++++- include/linux/cpuidle.h | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b6684fd89085..8b3f9c7bf221 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -511,6 +511,16 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * duration predictor do a better job next time. */ measured_us = 9 * MAX_INTERESTING / 10; + } else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) && + dev->poll_time_limit) { + /* + * The CPU exited the "polling" state due to a time limit, so + * the idle duration prediction leading to the selection of that + * state was inaccurate. If a better prediction had been made, + * the CPU might have been woken up from idle by the next timer. + * Assume that to be the case. + */ + measured_us = data->next_timer_us; } else { /* measured value */ measured_us = dev->last_residency; diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index 3f86d23c592e..36ff5a1d9422 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -17,6 +17,8 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, { u64 time_start = local_clock(); + dev->poll_time_limit = false; + local_irq_enable(); if (!current_set_polling_and_test()) { unsigned int loop_count = 0; @@ -27,8 +29,10 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, continue; loop_count = 0; - if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT) + if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT) { + dev->poll_time_limit = true; break; + } } } current_clr_polling(); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index d262f620890e..faed7a8977e8 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -81,6 +81,7 @@ struct cpuidle_device { unsigned int registered:1; unsigned int enabled:1; unsigned int use_deepest_state:1; + unsigned int poll_time_limit:1; unsigned int cpu; int last_residency; From 23e8ceb9ce766c81d62434053aef6e7efea6fcc3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Oct 2018 23:42:56 +0200 Subject: [PATCH 06/14] cpuidle: menu: Compute first_idx when latency_req is known Since menu_select() can only set first_idx to 1 if the exit latency of the second state is not greater than the latency limit, it should first determine that limit. Thus first_idx should be computed after the "interactivity" factor has been taken into account. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewedy-by: Daniel Lezcano --- drivers/cpuidle/governors/menu.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 8b3f9c7bf221..98a39c57acec 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -322,22 +322,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, expected_interval = get_typical_interval(data); expected_interval = min(expected_interval, data->next_timer_us); - first_idx = 0; - if (drv->states[0].flags & CPUIDLE_FLAG_POLLING) { - struct cpuidle_state *s = &drv->states[1]; - unsigned int polling_threshold; - - /* - * Default to a physical idle state, not to busy polling, unless - * a timer is going to trigger really really soon. - */ - polling_threshold = max_t(unsigned int, 20, s->target_residency); - if (data->next_timer_us > polling_threshold && - latency_req > s->exit_latency && !s->disabled && - !dev->states_usage[1].disable) - first_idx = 1; - } - /* * Use the lowest expected idle interval to pick the idle state. */ @@ -364,6 +348,22 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, latency_req = interactivity_req; } + first_idx = 0; + if (drv->states[0].flags & CPUIDLE_FLAG_POLLING) { + struct cpuidle_state *s = &drv->states[1]; + unsigned int polling_threshold; + + /* + * Default to a physical idle state, not to busy polling, unless + * a timer is going to trigger really really soon. + */ + polling_threshold = max_t(unsigned int, 20, s->target_residency); + if (data->next_timer_us > polling_threshold && + latency_req > s->exit_latency && !s->disabled && + !dev->states_usage[1].disable) + first_idx = 1; + } + /* * Find the idle state with the lowest power while satisfying * our constraints. From 96c3d11df15323e7f6e55242f7bda610c4bef402 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Oct 2018 23:44:06 +0200 Subject: [PATCH 07/14] cpuidle: menu: Get rid of first_idx from menu_select() Rearrange the code in menu_select() so that the loop over idle states always starts from 0 and get rid of the first_idx variable. While at it, add two empty lines to separate conditional statements from one another. No intentional behavior changes. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Lezcano --- drivers/cpuidle/governors/menu.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 98a39c57acec..5c7fafcb1135 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -285,7 +285,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, struct menu_device *data = this_cpu_ptr(&menu_devices); int latency_req = cpuidle_governor_latency_req(dev->cpu); int i; - int first_idx; int idx; unsigned int interactivity_req; unsigned int expected_interval; @@ -348,36 +347,33 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, latency_req = interactivity_req; } - first_idx = 0; - if (drv->states[0].flags & CPUIDLE_FLAG_POLLING) { - struct cpuidle_state *s = &drv->states[1]; - unsigned int polling_threshold; - - /* - * Default to a physical idle state, not to busy polling, unless - * a timer is going to trigger really really soon. - */ - polling_threshold = max_t(unsigned int, 20, s->target_residency); - if (data->next_timer_us > polling_threshold && - latency_req > s->exit_latency && !s->disabled && - !dev->states_usage[1].disable) - first_idx = 1; - } - /* * Find the idle state with the lowest power while satisfying * our constraints. */ idx = -1; - for (i = first_idx; i < drv->state_count; i++) { + for (i = 0; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state_usage *su = &dev->states_usage[i]; if (s->disabled || su->disable) continue; + if (idx == -1) idx = i; /* first enabled state */ + if (s->target_residency > predicted_us) { + /* + * Use a physical idle state, not busy polling, unless + * a timer is going to trigger really really soon. + */ + if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && + i == idx + 1 && latency_req > s->exit_latency && + data->next_timer_us > max_t(unsigned int, 20, + s->target_residency)) { + idx = i; + break; + } if (predicted_us < TICK_USEC) break; From eb40a380bff28f84b6583bba6786b46ef26ef548 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Oct 2018 23:45:07 +0200 Subject: [PATCH 08/14] cpuidle: menu: Do not update last_state_idx in menu_select() It is not necessary to update data->last_state_idx in menu_select() as it only is used in menu_update() which only runs when data->needs_update is set and that is set only when updating data->last_state_idx in menu_reflect(). Accordingly, drop the update of data->last_state_idx from menu_select() and get rid of the (now redundant) "out" label from it. No intentional behavior changes. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Lezcano --- drivers/cpuidle/governors/menu.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 5c7fafcb1135..968379d4f207 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -398,7 +398,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, s->target_residency <= ktime_to_us(delta_next)) idx = i; - goto out; + return idx; } if (s->exit_latency > latency_req) { /* @@ -445,10 +445,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, } } -out: - data->last_state_idx = idx; - - return data->last_state_idx; + return idx; } /** From 8b007ebec9a5d120d25c00d9b771aadf45ac5177 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Oct 2018 23:46:28 +0200 Subject: [PATCH 09/14] cpuidle: menu: Avoid computations for very close timers If the next timer event (not including the tick) is closer than the target residency of the second state or the PM QoS latency constraint is below its exit latency, state[0] will be used regardless of any other factors, so skip the computations in menu_select() then and return 0 straight away from it. Still, do that after the bucket has been determined to avoid updating the correction factor for a stale bucket. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- drivers/cpuidle/governors/menu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 968379d4f207..667606c6e284 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -309,6 +309,18 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, get_iowait_load(&nr_iowaiters, &cpu_load); data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); + if (unlikely(drv->state_count <= 1) || + ((data->next_timer_us < drv->states[1].target_residency || + latency_req < drv->states[1].exit_latency) && + !drv->states[0].disabled && !dev->states_usage[0].disable)) { + /* + * In this case state[0] will be used no matter what, so return + * it right away and keep the tick running. + */ + *stop_tick = false; + return 0; + } + /* * Force the result of multiplication to be 64 bits even if both * operands are 32 bits. From 53812cdc9100e19f2e782851964355f2db5583de Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Oct 2018 23:47:43 +0200 Subject: [PATCH 10/14] cpuidle: menu: Move the latency_req == 0 special case check It is better to always update data->bucket before returning from menu_select() to avoid updating the correction factor for a stale bucket, so combine the latency_req == 0 special check with the more general check below. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- drivers/cpuidle/governors/menu.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 667606c6e284..b754296eb0c5 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -297,19 +297,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, data->needs_update = 0; } - /* Special case when user has set very strict latency requirement */ - if (unlikely(latency_req == 0)) { - *stop_tick = false; - return 0; - } - /* determine the expected residency time, round up */ data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next)); get_iowait_load(&nr_iowaiters, &cpu_load); data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); - if (unlikely(drv->state_count <= 1) || + if (unlikely(drv->state_count <= 1 || latency_req == 0) || ((data->next_timer_us < drv->states[1].target_residency || latency_req < drv->states[1].exit_latency) && !drv->states[0].disabled && !dev->states_usage[0].disable)) { From 01bad1c6896db021db82042e71c2bf1f97cc026b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Oct 2018 23:50:30 +0200 Subject: [PATCH 11/14] cpuidle: poll_state: Revise loop termination condition If need_resched() returns "false", breaking out of the loop in poll_idle() will cause a new idle state to be selected, so in fact it usually doesn't make sense to spin in it longer than the target residency of the second state. [Note that the "polling" state is used only if there is at least one "real" state defined in addition to it, so the second state is always there.] On the other hand, breaking out of it early (say in case the next state is disabled) shouldn't hurt as it is polling anyway. For this reason, make the loop in poll_idle() break if the CPU has been spinning longer than the target residency of the second state (the "polling" state can only be state[0]). Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/poll_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index 36ff5a1d9422..85792d371add 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -9,7 +9,6 @@ #include #include -#define POLL_IDLE_TIME_LIMIT (TICK_NSEC / 16) #define POLL_IDLE_RELAX_COUNT 200 static int __cpuidle poll_idle(struct cpuidle_device *dev, @@ -21,6 +20,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, local_irq_enable(); if (!current_set_polling_and_test()) { + u64 limit = (u64)drv->states[1].target_residency * NSEC_PER_USEC; unsigned int loop_count = 0; while (!need_resched()) { @@ -29,7 +29,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, continue; loop_count = 0; - if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT) { + if (local_clock() - time_start > limit) { dev->poll_time_limit = true; break; } From bde091ece2ad3f78d0896870f041bc52761ea3c1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 10 Oct 2018 14:16:38 +0200 Subject: [PATCH 12/14] cpuidle: menu: Simplify checks related to the polling state After some recent menu governor changes, the promotion of the "polling" state to a physical one is mostly controlled by the latency limit (resulting from the "interactivity" factor) and not by the time to the closest timer event, so it should be sufficient to check the exit latency of that state for this purpose (of course, its target residency still needs to be within the next timer event range for energy-efficiency). Also, the physical state the "polling" one is promoted to need not be the next one in principle (in case the next state is disabled, for example). For these reasons, simplify the checks made to decide whether or not to promote the "polling" state to a physical one and update the target idle duration when it is promoted in case the residency of the new state turns out to be above the tick boundary (in which case there is no reason to stop the tick). Tested-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b754296eb0c5..4ed8004643f8 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -371,12 +371,12 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (s->target_residency > predicted_us) { /* * Use a physical idle state, not busy polling, unless - * a timer is going to trigger really really soon. + * a timer is going to trigger soon enough. */ if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && - i == idx + 1 && latency_req > s->exit_latency && - data->next_timer_us > max_t(unsigned int, 20, - s->target_residency)) { + s->exit_latency <= latency_req && + s->target_residency <= data->next_timer_us) { + predicted_us = s->target_residency; idx = i; break; } From 12b65eadf0bd41cf0dbf460f13bcd310a81afe2b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Oct 2018 13:51:45 +0200 Subject: [PATCH 13/14] cpuidle: menu: Drop redundant comparison Since the correction factor cannot be greater than RESOLUTION * DECAY, the result of the predicted_us computation in menu_select() cannot be greater than data->next_timer_us, so it is not necessary to compare the "typical interval" value coming from get_typical_interval() with data->next_timer_us separately. It is sufficient to copmare predicted_us with the return value of get_typical_interval() directly, so do that and drop the now redundant expected_interval variable. No intentional changes of behavior. Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 4ed8004643f8..3d283211ad77 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -287,7 +287,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int i; int idx; unsigned int interactivity_req; - unsigned int expected_interval; unsigned int predicted_us; unsigned long nr_iowaiters, cpu_load; ktime_t delta_next; @@ -323,14 +322,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); - - expected_interval = get_typical_interval(data); - expected_interval = min(expected_interval, data->next_timer_us); - /* * Use the lowest expected idle interval to pick the idle state. */ - predicted_us = min(predicted_us, expected_interval); + predicted_us = min(predicted_us, get_typical_interval(data)); if (tick_nohz_tick_stopped()) { /* From f1c8e410cdac5df42a7806e49efde2859a10fecd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 15 Oct 2018 13:53:25 +0200 Subject: [PATCH 14/14] cpuidle: menu: Avoid computations when result will be discarded If the minimum interval taken into account in the average computation loop in get_typical_interval() is less than the expected idle duration determined so far, the resultant average cannot be greater than that value as well and the entire return result of the function is going to be discarded anyway going forward. In that case, it is a waste of time to carry out the remaining computations in get_typical_interval(), so avoid that by returning early if the minimum interval is not below the expected idle duration. No intentional changes of behavior. Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 3d283211ad77..575a68f31761 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -196,10 +196,11 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev); * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static unsigned int get_typical_interval(struct menu_device *data) +static unsigned int get_typical_interval(struct menu_device *data, + unsigned int predicted_us) { int i, divisor; - unsigned int max, thresh, avg; + unsigned int min, max, thresh, avg; uint64_t sum, variance; thresh = UINT_MAX; /* Discard outliers above this value */ @@ -207,6 +208,7 @@ static unsigned int get_typical_interval(struct menu_device *data) again: /* First calculate the average of past intervals */ + min = UINT_MAX; max = 0; sum = 0; divisor = 0; @@ -217,8 +219,19 @@ again: divisor++; if (value > max) max = value; + + if (value < min) + min = value; } } + + /* + * If the result of the computation is going to be discarded anyway, + * avoid the computation altogether. + */ + if (min >= predicted_us) + return UINT_MAX; + if (divisor == INTERVALS) avg = sum >> INTERVAL_SHIFT; else @@ -325,7 +338,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* * Use the lowest expected idle interval to pick the idle state. */ - predicted_us = min(predicted_us, get_typical_interval(data)); + predicted_us = min(predicted_us, get_typical_interval(data, predicted_us)); if (tick_nohz_tick_stopped()) { /*