diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index c256bd7fbd78..c6bb9f1257c9 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -732,9 +732,8 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) static bool acpi_idle_fallback_to_c1(struct acpi_processor *pr) { - return IS_ENABLED(CONFIG_HOTPLUG_CPU) && num_online_cpus() > 1 && - !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED) && - !pr->flags.has_cst; + return IS_ENABLED(CONFIG_HOTPLUG_CPU) && !pr->flags.has_cst && + !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED); } static int c3_cpu_count; @@ -744,9 +743,10 @@ static DEFINE_RAW_SPINLOCK(c3_lock); * acpi_idle_enter_bm - enters C3 with proper BM handling * @pr: Target processor * @cx: Target state context + * @timer_bc: Whether or not to change timer mode to broadcast */ static void acpi_idle_enter_bm(struct acpi_processor *pr, - struct acpi_processor_cx *cx) + struct acpi_processor_cx *cx, bool timer_bc) { acpi_unlazy_tlb(smp_processor_id()); @@ -754,7 +754,8 @@ static void acpi_idle_enter_bm(struct acpi_processor *pr, * Must be done before busmaster disable as we might need to * access HPET ! */ - lapic_timer_state_broadcast(pr, cx, 1); + if (timer_bc) + lapic_timer_state_broadcast(pr, cx, 1); /* * disable bus master @@ -784,7 +785,8 @@ static void acpi_idle_enter_bm(struct acpi_processor *pr, raw_spin_unlock(&c3_lock); } - lapic_timer_state_broadcast(pr, cx, 0); + if (timer_bc) + lapic_timer_state_broadcast(pr, cx, 0); } static int acpi_idle_enter(struct cpuidle_device *dev, @@ -798,12 +800,12 @@ static int acpi_idle_enter(struct cpuidle_device *dev, return -EINVAL; if (cx->type != ACPI_STATE_C1) { - if (acpi_idle_fallback_to_c1(pr)) { + if (acpi_idle_fallback_to_c1(pr) && num_online_cpus() > 1) { index = CPUIDLE_DRIVER_STATE_START; cx = per_cpu(acpi_cstate[index], dev->cpu); } else if (cx->type == ACPI_STATE_C3 && pr->flags.bm_check) { if (cx->bm_sts_skip || !acpi_idle_bm_check()) { - acpi_idle_enter_bm(pr, cx); + acpi_idle_enter_bm(pr, cx, true); return index; } else if (drv->safe_state_index >= 0) { index = drv->safe_state_index; @@ -827,6 +829,27 @@ static int acpi_idle_enter(struct cpuidle_device *dev, return index; } +static void acpi_idle_enter_freeze(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); + + if (cx->type == ACPI_STATE_C3) { + struct acpi_processor *pr = __this_cpu_read(processors); + + if (unlikely(!pr)) + return; + + if (pr->flags.bm_check) { + acpi_idle_enter_bm(pr, cx, false); + return; + } else { + ACPI_FLUSH_CPU_CACHE(); + } + } + acpi_idle_do_entry(cx); +} + struct cpuidle_driver acpi_idle_driver = { .name = "acpi_idle", .owner = THIS_MODULE, @@ -925,6 +948,15 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr) state->enter_dead = acpi_idle_play_dead; drv->safe_state_index = count; } + /* + * Halt-induced C1 is not good for ->enter_freeze, because it + * re-enables interrupts on exit. Moreover, C1 is generally not + * particularly interesting from the suspend-to-idle angle, so + * avoid C1 and the situations in which we may need to fall back + * to it altogether. + */ + if (cx->type != ACPI_STATE_C1 && !acpi_idle_fallback_to_c1(pr)) + state->enter_freeze = acpi_idle_enter_freeze; count++; if (count == CPUIDLE_STATE_MAX) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 125150dc6e81..4d534582514e 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include "cpuidle.h" @@ -32,7 +34,6 @@ LIST_HEAD(cpuidle_detected_devices); static int enabled_devices; static int off __read_mostly; static int initialized __read_mostly; -static bool use_deepest_state __read_mostly; int cpuidle_disabled(void) { @@ -66,36 +67,23 @@ int cpuidle_play_dead(void) } /** - * cpuidle_use_deepest_state - Enable/disable the "deepest idle" mode. - * @enable: Whether enable or disable the feature. - * - * If the "deepest idle" mode is enabled, cpuidle will ignore the governor and - * always use the state with the greatest exit latency (out of the states that - * are not disabled). - * - * This function can only be called after cpuidle_pause() to avoid races. - */ -void cpuidle_use_deepest_state(bool enable) -{ - use_deepest_state = enable; -} - -/** - * cpuidle_find_deepest_state - Find the state of the greatest exit latency. - * @drv: cpuidle driver for a given CPU. - * @dev: cpuidle device for a given CPU. + * cpuidle_find_deepest_state - Find deepest state meeting specific conditions. + * @drv: cpuidle driver for the given CPU. + * @dev: cpuidle device for the given CPU. + * @freeze: Whether or not the state should be suitable for suspend-to-idle. */ static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, bool freeze) { unsigned int latency_req = 0; - int i, ret = CPUIDLE_DRIVER_STATE_START - 1; + int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1; for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; struct cpuidle_state_usage *su = &dev->states_usage[i]; - if (s->disabled || su->disable || s->exit_latency <= latency_req) + if (s->disabled || su->disable || s->exit_latency <= latency_req + || (freeze && !s->enter_freeze)) continue; latency_req = s->exit_latency; @@ -104,6 +92,63 @@ static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, return ret; } +static void enter_freeze_proper(struct cpuidle_driver *drv, + struct cpuidle_device *dev, int index) +{ + tick_freeze(); + /* + * The state used here cannot be a "coupled" one, because the "coupled" + * cpuidle mechanism enables interrupts and doing that with timekeeping + * suspended is generally unsafe. + */ + drv->states[index].enter_freeze(dev, drv, index); + WARN_ON(!irqs_disabled()); + /* + * timekeeping_resume() that will be called by tick_unfreeze() for the + * last CPU executing it calls functions containing RCU read-side + * critical sections, so tell RCU about that. + */ + RCU_NONIDLE(tick_unfreeze()); +} + +/** + * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle. + * + * If there are states with the ->enter_freeze callback, find the deepest of + * them and enter it with frozen tick. Otherwise, find the deepest state + * available and enter it normally. + */ +void cpuidle_enter_freeze(void) +{ + struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + int index; + + /* + * Find the deepest state with ->enter_freeze present, which guarantees + * that interrupts won't be enabled when it exits and allows the tick to + * be frozen safely. + */ + index = cpuidle_find_deepest_state(drv, dev, true); + if (index >= 0) { + enter_freeze_proper(drv, dev, index); + return; + } + + /* + * It is not safe to freeze the tick, find the deepest state available + * at all and try to enter it normally. + */ + index = cpuidle_find_deepest_state(drv, dev, false); + if (index >= 0) + cpuidle_enter(drv, dev, index); + else + arch_cpu_idle(); + + /* Interrupts are enabled again here. */ + local_irq_disable(); +} + /** * cpuidle_enter_state - enter the state and update stats * @dev: cpuidle device for this cpu @@ -166,9 +211,6 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (!drv || !dev || !dev->enabled) return -EBUSY; - if (unlikely(use_deepest_state)) - return cpuidle_find_deepest_state(drv, dev); - return cpuidle_curr_governor->select(drv, dev); } @@ -200,7 +242,7 @@ int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ void cpuidle_reflect(struct cpuidle_device *dev, int index) { - if (cpuidle_curr_governor->reflect && !unlikely(use_deepest_state)) + if (cpuidle_curr_governor->reflect) cpuidle_curr_governor->reflect(dev, index); } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 1bc0c170f12a..b0e58522780d 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -97,6 +97,8 @@ static const struct idle_cpu *icpu; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static int intel_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); +static void intel_idle_freeze(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index); static int intel_idle_cpu_init(int cpu); static struct cpuidle_state *cpuidle_state_table; @@ -131,28 +133,32 @@ static struct cpuidle_state nehalem_cstates[] = { .flags = MWAIT2flg(0x00), .exit_latency = 3, .target_residency = 6, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C1E-NHM", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01), .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C3-NHM", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .target_residency = 80, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6-NHM", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .target_residency = 800, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -164,35 +170,40 @@ static struct cpuidle_state snb_cstates[] = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C1E-SNB", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01), .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C3-SNB", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 211, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6-SNB", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 104, .target_residency = 345, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C7-SNB", .desc = "MWAIT 0x30", .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 109, .target_residency = 345, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -204,42 +215,48 @@ static struct cpuidle_state byt_cstates[] = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C1E-BYT", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01), .exit_latency = 15, .target_residency = 30, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6N-BYT", .desc = "MWAIT 0x58", .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 40, .target_residency = 275, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6S-BYT", .desc = "MWAIT 0x52", .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 560, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C7-BYT", .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 1200, .target_residency = 1500, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C7S-BYT", .desc = "MWAIT 0x64", .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 10000, .target_residency = 20000, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -251,35 +268,40 @@ static struct cpuidle_state ivb_cstates[] = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C1E-IVB", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01), .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C3-IVB", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 156, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6-IVB", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 80, .target_residency = 300, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C7-IVB", .desc = "MWAIT 0x30", .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 87, .target_residency = 300, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -291,28 +313,32 @@ static struct cpuidle_state ivt_cstates[] = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C1E-IVT", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01), .exit_latency = 10, .target_residency = 80, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C3-IVT", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 156, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6-IVT", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 82, .target_residency = 300, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -324,28 +350,32 @@ static struct cpuidle_state ivt_cstates_4s[] = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C1E-IVT-4S", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01), .exit_latency = 10, .target_residency = 250, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C3-IVT-4S", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 300, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6-IVT-4S", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 84, .target_residency = 400, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -357,28 +387,32 @@ static struct cpuidle_state ivt_cstates_8s[] = { .flags = MWAIT2flg(0x00), .exit_latency = 1, .target_residency = 1, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C1E-IVT-8S", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01), .exit_latency = 10, .target_residency = 500, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C3-IVT-8S", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 59, .target_residency = 600, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6-IVT-8S", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 88, .target_residency = 700, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -390,56 +424,64 @@ static struct cpuidle_state hsw_cstates[] = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C1E-HSW", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01), .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C3-HSW", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 33, .target_residency = 100, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6-HSW", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, .target_residency = 400, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C7s-HSW", .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, .target_residency = 500, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C8-HSW", .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, .target_residency = 900, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C9-HSW", .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, .target_residency = 1800, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C10-HSW", .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, .target_residency = 7700, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -450,56 +492,64 @@ static struct cpuidle_state bdw_cstates[] = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C1E-BDW", .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01), .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C3-BDW", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 40, .target_residency = 100, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6-BDW", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, .target_residency = 400, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C7s-BDW", .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, .target_residency = 500, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C8-BDW", .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, .target_residency = 900, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C9-BDW", .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, .target_residency = 1800, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C10-BDW", .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, .target_residency = 7700, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -511,28 +561,32 @@ static struct cpuidle_state atom_cstates[] = { .flags = MWAIT2flg(0x00), .exit_latency = 10, .target_residency = 20, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C2-ATM", .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10), .exit_latency = 20, .target_residency = 80, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C4-ATM", .desc = "MWAIT 0x30", .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .target_residency = 400, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6-ATM", .desc = "MWAIT 0x52", .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 140, .target_residency = 560, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -543,14 +597,16 @@ static struct cpuidle_state avn_cstates[] = { .flags = MWAIT2flg(0x00), .exit_latency = 2, .target_residency = 2, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C6-AVN", .desc = "MWAIT 0x51", .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 15, .target_residency = 45, - .enter = &intel_idle }, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .enter = NULL } }; @@ -592,6 +648,21 @@ static int intel_idle(struct cpuidle_device *dev, return index; } +/** + * intel_idle_freeze - simplified "enter" callback routine for suspend-to-idle + * @dev: cpuidle_device + * @drv: cpuidle driver + * @index: state index + */ +static void intel_idle_freeze(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + unsigned long ecx = 1; /* break on interrupt flag */ + unsigned long eax = flg2MWAIT(drv->states[index].flags); + + mwait_idle_with_hints(eax, ecx); +} + static void __setup_broadcast_timer(void *arg) { unsigned long reason = (unsigned long)arg; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index ab70f3bc44ad..f551a9299ac9 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -50,6 +50,15 @@ struct cpuidle_state { int index); int (*enter_dead) (struct cpuidle_device *dev, int index); + + /* + * CPUs execute ->enter_freeze with the local tick or entire timekeeping + * suspended, so it must not re-enable interrupts at any point (even + * temporarily) or attempt to change states of clock event devices. + */ + void (*enter_freeze) (struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index); }; /* Idle State Flags */ @@ -141,7 +150,7 @@ extern void cpuidle_resume(void); extern int cpuidle_enable_device(struct cpuidle_device *dev); extern void cpuidle_disable_device(struct cpuidle_device *dev); extern int cpuidle_play_dead(void); -extern void cpuidle_use_deepest_state(bool enable); +extern void cpuidle_enter_freeze(void); extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); #else @@ -174,7 +183,7 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev) {return -ENODEV; } static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } static inline int cpuidle_play_dead(void) {return -ENODEV; } -static inline void cpuidle_use_deepest_state(bool enable) {} +static inline void cpuidle_enter_freeze(void) { } static inline struct cpuidle_driver *cpuidle_get_cpu_driver( struct cpuidle_device *dev) {return NULL; } #endif diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3388c1b6f7d8..5efe743ce1e8 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -201,6 +201,21 @@ struct platform_freeze_ops { */ extern void suspend_set_ops(const struct platform_suspend_ops *ops); extern int suspend_valid_only_mem(suspend_state_t state); + +/* Suspend-to-idle state machnine. */ +enum freeze_state { + FREEZE_STATE_NONE, /* Not suspended/suspending. */ + FREEZE_STATE_ENTER, /* Enter suspend-to-idle. */ + FREEZE_STATE_WAKE, /* Wake up from suspend-to-idle. */ +}; + +extern enum freeze_state __read_mostly suspend_freeze_state; + +static inline bool idle_should_freeze(void) +{ + return unlikely(suspend_freeze_state == FREEZE_STATE_ENTER); +} + extern void freeze_set_ops(const struct platform_freeze_ops *ops); extern void freeze_wake(void); @@ -228,6 +243,7 @@ extern int pm_suspend(suspend_state_t state); static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } +static inline bool idle_should_freeze(void) { return false; } static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {} static inline void freeze_wake(void) {} #endif /* !CONFIG_SUSPEND */ diff --git a/include/linux/tick.h b/include/linux/tick.h index eda850ca757a..9c085dc12ae9 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -79,6 +79,9 @@ extern void __init tick_init(void); extern int tick_is_oneshot_available(void); extern struct tick_device *tick_get_device(int cpu); +extern void tick_freeze(void); +extern void tick_unfreeze(void); + # ifdef CONFIG_HIGH_RES_TIMERS extern int tick_init_highres(void); extern int tick_program_event(ktime_t expires, int force); @@ -119,6 +122,8 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } +static inline void tick_freeze(void) { } +static inline void tick_unfreeze(void) { } static inline void tick_cancel_sched_timer(int cpu) { } static inline void tick_clock_notify(void) { } static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } @@ -226,5 +231,4 @@ static inline void tick_nohz_task_switch(struct task_struct *tsk) __tick_nohz_task_switch(tsk); } - #endif diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c347e3ce3a55..b7d6b3a721b1 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -37,7 +37,9 @@ const char *pm_states[PM_SUSPEND_MAX]; static const struct platform_suspend_ops *suspend_ops; static const struct platform_freeze_ops *freeze_ops; static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head); -static bool suspend_freeze_wake; + +enum freeze_state __read_mostly suspend_freeze_state; +static DEFINE_SPINLOCK(suspend_freeze_lock); void freeze_set_ops(const struct platform_freeze_ops *ops) { @@ -48,22 +50,49 @@ void freeze_set_ops(const struct platform_freeze_ops *ops) static void freeze_begin(void) { - suspend_freeze_wake = false; + suspend_freeze_state = FREEZE_STATE_NONE; } static void freeze_enter(void) { - cpuidle_use_deepest_state(true); + spin_lock_irq(&suspend_freeze_lock); + if (pm_wakeup_pending()) + goto out; + + suspend_freeze_state = FREEZE_STATE_ENTER; + spin_unlock_irq(&suspend_freeze_lock); + + get_online_cpus(); cpuidle_resume(); - wait_event(suspend_freeze_wait_head, suspend_freeze_wake); + + /* Push all the CPUs into the idle loop. */ + wake_up_all_idle_cpus(); + pr_debug("PM: suspend-to-idle\n"); + /* Make the current CPU wait so it can enter the idle loop too. */ + wait_event(suspend_freeze_wait_head, + suspend_freeze_state == FREEZE_STATE_WAKE); + pr_debug("PM: resume from suspend-to-idle\n"); + cpuidle_pause(); - cpuidle_use_deepest_state(false); + put_online_cpus(); + + spin_lock_irq(&suspend_freeze_lock); + + out: + suspend_freeze_state = FREEZE_STATE_NONE; + spin_unlock_irq(&suspend_freeze_lock); } void freeze_wake(void) { - suspend_freeze_wake = true; - wake_up(&suspend_freeze_wait_head); + unsigned long flags; + + spin_lock_irqsave(&suspend_freeze_lock, flags); + if (suspend_freeze_state > FREEZE_STATE_NONE) { + suspend_freeze_state = FREEZE_STATE_WAKE; + wake_up(&suspend_freeze_wait_head); + } + spin_unlock_irqrestore(&suspend_freeze_lock, flags); } EXPORT_SYMBOL_GPL(freeze_wake); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index aaf1c1d5cf5d..94b2d7b88a27 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -104,6 +105,21 @@ static void cpuidle_idle_call(void) */ rcu_idle_enter(); + /* + * Suspend-to-idle ("freeze") is a system state in which all user space + * has been frozen, all I/O devices have been suspended and the only + * activity happens here and in iterrupts (if any). In that case bypass + * the cpuidle governor and go stratight for the deepest idle state + * available. Possibly also suspend the local tick and the entire + * timekeeping to prevent timer interrupts from kicking us out of idle + * until a proper wakeup interrupt happens. + */ + if (idle_should_freeze()) { + cpuidle_enter_freeze(); + local_irq_enable(); + goto exit_idle; + } + /* * Ask the cpuidle framework to choose a convenient idle state. * Fall back to the default arch idle method on errors. diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 7efeedf53ebd..f7c515595b42 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -394,6 +394,56 @@ void tick_resume(void) } } +static DEFINE_RAW_SPINLOCK(tick_freeze_lock); +static unsigned int tick_freeze_depth; + +/** + * tick_freeze - Suspend the local tick and (possibly) timekeeping. + * + * Check if this is the last online CPU executing the function and if so, + * suspend timekeeping. Otherwise suspend the local tick. + * + * Call with interrupts disabled. Must be balanced with %tick_unfreeze(). + * Interrupts must not be enabled before the subsequent %tick_unfreeze(). + */ +void tick_freeze(void) +{ + raw_spin_lock(&tick_freeze_lock); + + tick_freeze_depth++; + if (tick_freeze_depth == num_online_cpus()) { + timekeeping_suspend(); + } else { + tick_suspend(); + tick_suspend_broadcast(); + } + + raw_spin_unlock(&tick_freeze_lock); +} + +/** + * tick_unfreeze - Resume the local tick and (possibly) timekeeping. + * + * Check if this is the first CPU executing the function and if so, resume + * timekeeping. Otherwise resume the local tick. + * + * Call with interrupts disabled. Must be balanced with %tick_freeze(). + * Interrupts must not be enabled after the preceding %tick_freeze(). + */ +void tick_unfreeze(void) +{ + raw_spin_lock(&tick_freeze_lock); + + if (tick_freeze_depth == num_online_cpus()) + timekeeping_resume(); + else + tick_resume(); + + tick_freeze_depth--; + + raw_spin_unlock(&tick_freeze_lock); +} + /** * tick_init - initialize the tick control */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b124af259800..91db94136c10 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -230,9 +230,7 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) /** * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. - * @tk: The timekeeper from which we take the update - * @tkf: The fast timekeeper to update - * @tbase: The time base for the fast timekeeper (mono/raw) + * @tkr: Timekeeping readout base from which we take the update * * We want to use this from any context including NMI and tracing / * instrumenting the timekeeping code itself. @@ -244,11 +242,11 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) * smp_wmb(); <- Ensure that the last base[1] update is visible * tkf->seq++; * smp_wmb(); <- Ensure that the seqcount update is visible - * update(tkf->base[0], tk); + * update(tkf->base[0], tkr); * smp_wmb(); <- Ensure that the base[0] update is visible * tkf->seq++; * smp_wmb(); <- Ensure that the seqcount update is visible - * update(tkf->base[1], tk); + * update(tkf->base[1], tkr); * * The reader side does: * @@ -269,7 +267,7 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) * slightly wrong timestamp (a few nanoseconds). See * @ktime_get_mono_fast_ns. */ -static void update_fast_timekeeper(struct timekeeper *tk) +static void update_fast_timekeeper(struct tk_read_base *tkr) { struct tk_read_base *base = tk_fast_mono.base; @@ -277,7 +275,7 @@ static void update_fast_timekeeper(struct timekeeper *tk) raw_write_seqcount_latch(&tk_fast_mono.seq); /* Update base[0] */ - memcpy(base, &tk->tkr, sizeof(*base)); + memcpy(base, tkr, sizeof(*base)); /* Force readers back to base[0] */ raw_write_seqcount_latch(&tk_fast_mono.seq); @@ -334,6 +332,35 @@ u64 notrace ktime_get_mono_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); +/* Suspend-time cycles value for halted fast timekeeper. */ +static cycle_t cycles_at_suspend; + +static cycle_t dummy_clock_read(struct clocksource *cs) +{ + return cycles_at_suspend; +} + +/** + * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. + * @tk: Timekeeper to snapshot. + * + * It generally is unsafe to access the clocksource after timekeeping has been + * suspended, so take a snapshot of the readout base of @tk and use it as the + * fast timekeeper's readout base while suspended. It will return the same + * number of cycles every time until timekeeping is resumed at which time the + * proper readout base for the fast timekeeper will be restored automatically. + */ +static void halt_fast_timekeeper(struct timekeeper *tk) +{ + static struct tk_read_base tkr_dummy; + struct tk_read_base *tkr = &tk->tkr; + + memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); + cycles_at_suspend = tkr->read(tkr->clock); + tkr_dummy.read = dummy_clock_read; + update_fast_timekeeper(&tkr_dummy); +} + #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD static inline void update_vsyscall(struct timekeeper *tk) @@ -462,7 +489,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) memcpy(&shadow_timekeeper, &tk_core.timekeeper, sizeof(tk_core.timekeeper)); - update_fast_timekeeper(tk); + update_fast_timekeeper(&tk->tkr); } /** @@ -1170,7 +1197,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) * xtime/wall_to_monotonic/jiffies/etc are * still managed by arch specific suspend/resume code. */ -static void timekeeping_resume(void) +void timekeeping_resume(void) { struct timekeeper *tk = &tk_core.timekeeper; struct clocksource *clock = tk->tkr.clock; @@ -1251,7 +1278,7 @@ static void timekeeping_resume(void) hrtimers_resume(); } -static int timekeeping_suspend(void) +int timekeeping_suspend(void) { struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; @@ -1296,6 +1323,7 @@ static int timekeeping_suspend(void) } timekeeping_update(tk, TK_MIRROR); + halt_fast_timekeeper(tk); write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index adc1fc98bde3..1d91416055d5 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -16,5 +16,7 @@ extern int timekeeping_inject_offset(struct timespec *ts); extern s32 timekeeping_get_tai_offset(void); extern void timekeeping_set_tai_offset(s32 tai_offset); extern void timekeeping_clocktai(struct timespec *ts); +extern int timekeeping_suspend(void); +extern void timekeeping_resume(void); #endif