2010-03-08 22:07:30 +03:00
/*
* intel_idle . c - native hardware idle loop for modern Intel processors
*
2013-11-09 09:30:17 +04:00
* Copyright ( c ) 2013 , Intel Corporation .
2010-03-08 22:07:30 +03:00
* Len Brown < len . brown @ intel . com >
*
* This program is free software ; you can redistribute it and / or modify it
* under the terms and conditions of the GNU General Public License ,
* version 2 , as published by the Free Software Foundation .
*
* This program is distributed in the hope it will be useful , but WITHOUT
* ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License for
* more details .
*
* You should have received a copy of the GNU General Public License along with
* this program ; if not , write to the Free Software Foundation , Inc . ,
* 51 Franklin St - Fifth Floor , Boston , MA 02110 - 1301 USA .
*/
/*
* intel_idle is a cpuidle driver that loads on specific Intel processors
* in lieu of the legacy ACPI processor_idle driver . The intent is to
* make Linux more efficient on these processors , as intel_idle knows
* more than ACPI , as well as make Linux more immune to ACPI BIOS bugs .
*/
/*
* Design Assumptions
*
* All CPUs have same idle states as boot CPU
*
* Chipset BM_STS ( bus master status ) bit is a NOP
* for preventing entry into deep C - stats
*/
/*
* Known limitations
*
* The driver currently initializes for_each_online_cpu ( ) upon modprobe .
* It it unaware of subsequent processors hot - added to the system .
* This means that if you boot with maxcpus = n and later online
* processors above n , those processors will use C1 only .
*
* ACPI has a . suspend hack to turn off deep c - statees during suspend
* to avoid complications with the lapic timer workaround .
* Have not seen issues with suspend , but may need same workaround here .
*
* There is currently no kernel - based automatic probing / loading mechanism
* if the driver is built as a module .
*/
/* un-comment DEBUG to enable pr_debug() statements */
# define DEBUG
# include <linux/kernel.h>
# include <linux/cpuidle.h>
# include <linux/clockchips.h>
# include <trace/events/power.h>
# include <linux/sched.h>
2011-01-10 04:38:12 +03:00
# include <linux/notifier.h>
# include <linux/cpu.h>
2011-05-27 20:33:10 +04:00
# include <linux/module.h>
2012-01-26 03:09:07 +04:00
# include <asm/cpu_device_id.h>
2010-09-18 02:36:40 +04:00
# include <asm/mwait.h>
2011-01-19 04:48:27 +03:00
# include <asm/msr.h>
2010-03-08 22:07:30 +03:00
# define INTEL_IDLE_VERSION "0.4"
# define PREFIX "intel_idle: "
static struct cpuidle_driver intel_idle_driver = {
. name = " intel_idle " ,
. owner = THIS_MODULE ,
} ;
/* intel_idle.max_cstate=0 disables driver */
2013-02-02 06:35:35 +04:00
static int max_cstate = CPUIDLE_STATE_MAX - 1 ;
2010-03-08 22:07:30 +03:00
2010-05-28 10:22:03 +04:00
static unsigned int mwait_substates ;
2010-03-08 22:07:30 +03:00
2011-01-10 04:38:12 +03:00
# define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
2010-03-08 22:07:30 +03:00
/* Reliable LAPIC Timer States, bit 1 for C1 etc. */
2010-07-07 08:12:03 +04:00
static unsigned int lapic_timer_reliable_states = ( 1 < < 1 ) ; /* Default to only C1 */
2010-03-08 22:07:30 +03:00
2012-01-26 03:09:07 +04:00
struct idle_cpu {
struct cpuidle_state * state_table ;
/*
* Hardware C - state auto - demotion may not always be optimal .
* Indicate which enable bits to clear here .
*/
unsigned long auto_demotion_disable_flags ;
2013-02-02 10:31:56 +04:00
bool disable_promotion_to_c1e ;
2012-01-26 03:09:07 +04:00
} ;
static const struct idle_cpu * icpu ;
2010-08-07 22:10:03 +04:00
static struct cpuidle_device __percpu * intel_idle_cpuidle_devices ;
2011-10-28 14:50:42 +04:00
static int intel_idle ( struct cpuidle_device * dev ,
struct cpuidle_driver * drv , int index ) ;
2012-07-05 17:23:25 +04:00
static int intel_idle_cpu_init ( int cpu ) ;
2010-03-08 22:07:30 +03:00
static struct cpuidle_state * cpuidle_state_table ;
2011-01-12 10:51:20 +03:00
/*
* Set this flag for states where the HW flushes the TLB for us
* and so we don ' t need cross - calls to keep it consistent .
* If this flag is set , SW flushes the TLB , so even if the
* HW doesn ' t do the flushing , this flag is safe to use .
*/
# define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
2013-02-01 04:55:37 +04:00
/*
* MWAIT takes an 8 - bit " hint " in EAX " suggesting "
* the C - state ( top nibble ) and sub - state ( bottom nibble )
* 0x00 means " MWAIT(C1) " , 0x10 means " MWAIT(C2) " etc .
*
* We store the hint at the top of our " flags " for each state .
*/
# define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
# define MWAIT2flg(eax) ((eax & 0xFF) << 24)
2010-03-08 22:07:30 +03:00
/*
* States are indexed by the cstate number ,
* which is also the index into the MWAIT hint array .
* Thus C0 is a dummy .
*/
2013-08-30 14:26:42 +04:00
static struct cpuidle_state nehalem_cstates [ ] __initdata = {
2013-02-02 08:37:30 +04:00
{
2011-02-28 00:36:43 +03:00
. name = " C1-NHM " ,
2010-03-08 22:07:30 +03:00
. desc = " MWAIT 0x00 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x00 ) | CPUIDLE_FLAG_TIME_VALID ,
2010-03-08 22:07:30 +03:00
. exit_latency = 3 ,
. target_residency = 6 ,
. enter = & intel_idle } ,
2013-02-02 10:31:56 +04:00
{
. name = " C1E-NHM " ,
. desc = " MWAIT 0x01 " ,
. flags = MWAIT2flg ( 0x01 ) | CPUIDLE_FLAG_TIME_VALID ,
. exit_latency = 10 ,
. target_residency = 20 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2011-02-28 00:36:43 +03:00
. name = " C3-NHM " ,
2010-03-08 22:07:30 +03:00
. desc = " MWAIT 0x10 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x10 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
2010-03-08 22:07:30 +03:00
. exit_latency = 20 ,
. target_residency = 80 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2011-02-28 00:36:43 +03:00
. name = " C6-NHM " ,
2010-03-08 22:07:30 +03:00
. desc = " MWAIT 0x20 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x20 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
2010-03-08 22:07:30 +03:00
. exit_latency = 200 ,
. target_residency = 800 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
. enter = NULL }
2010-03-08 22:07:30 +03:00
} ;
2013-08-30 14:26:42 +04:00
static struct cpuidle_state snb_cstates [ ] __initdata = {
2013-02-02 08:37:30 +04:00
{
2011-02-28 00:36:43 +03:00
. name = " C1-SNB " ,
2010-07-07 08:12:03 +04:00
. desc = " MWAIT 0x00 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x00 ) | CPUIDLE_FLAG_TIME_VALID ,
2013-02-02 10:31:56 +04:00
. exit_latency = 2 ,
. target_residency = 2 ,
. enter = & intel_idle } ,
{
. name = " C1E-SNB " ,
. desc = " MWAIT 0x01 " ,
. flags = MWAIT2flg ( 0x01 ) | CPUIDLE_FLAG_TIME_VALID ,
. exit_latency = 10 ,
. target_residency = 20 ,
2010-07-07 08:12:03 +04:00
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2011-02-28 00:36:43 +03:00
. name = " C3-SNB " ,
2010-07-07 08:12:03 +04:00
. desc = " MWAIT 0x10 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x10 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
2010-07-07 08:12:03 +04:00
. exit_latency = 80 ,
2010-12-14 02:28:22 +03:00
. target_residency = 211 ,
2010-07-07 08:12:03 +04:00
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2011-02-28 00:36:43 +03:00
. name = " C6-SNB " ,
2010-07-07 08:12:03 +04:00
. desc = " MWAIT 0x20 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x20 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
2010-07-07 08:12:03 +04:00
. exit_latency = 104 ,
2010-12-14 02:28:22 +03:00
. target_residency = 345 ,
2010-07-07 08:12:03 +04:00
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2011-02-28 00:36:43 +03:00
. name = " C7-SNB " ,
2010-07-07 08:12:03 +04:00
. desc = " MWAIT 0x30 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x30 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
2010-07-07 08:12:03 +04:00
. exit_latency = 109 ,
2010-12-14 02:28:22 +03:00
. target_residency = 345 ,
2010-07-07 08:12:03 +04:00
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
. enter = NULL }
2010-07-07 08:12:03 +04:00
} ;
2013-08-30 14:26:42 +04:00
static struct cpuidle_state ivb_cstates [ ] __initdata = {
2013-02-02 08:37:30 +04:00
{
2012-06-02 03:45:32 +04:00
. name = " C1-IVB " ,
. desc = " MWAIT 0x00 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x00 ) | CPUIDLE_FLAG_TIME_VALID ,
2012-06-02 03:45:32 +04:00
. exit_latency = 1 ,
. target_residency = 1 ,
. enter = & intel_idle } ,
2013-02-02 10:31:56 +04:00
{
. name = " C1E-IVB " ,
. desc = " MWAIT 0x01 " ,
. flags = MWAIT2flg ( 0x01 ) | CPUIDLE_FLAG_TIME_VALID ,
. exit_latency = 10 ,
. target_residency = 20 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2012-06-02 03:45:32 +04:00
. name = " C3-IVB " ,
. desc = " MWAIT 0x10 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x10 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
2012-06-02 03:45:32 +04:00
. exit_latency = 59 ,
. target_residency = 156 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2012-06-02 03:45:32 +04:00
. name = " C6-IVB " ,
. desc = " MWAIT 0x20 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x20 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
2012-06-02 03:45:32 +04:00
. exit_latency = 80 ,
. target_residency = 300 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2012-06-02 03:45:32 +04:00
. name = " C7-IVB " ,
. desc = " MWAIT 0x30 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x30 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
2012-06-02 03:45:32 +04:00
. exit_latency = 87 ,
. target_residency = 300 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
. enter = NULL }
2012-06-02 03:45:32 +04:00
} ;
2013-08-30 14:26:42 +04:00
static struct cpuidle_state hsw_cstates [ ] __initdata = {
2013-02-02 08:37:30 +04:00
{
2013-01-31 23:40:49 +04:00
. name = " C1-HSW " ,
. desc = " MWAIT 0x00 " ,
. flags = MWAIT2flg ( 0x00 ) | CPUIDLE_FLAG_TIME_VALID ,
. exit_latency = 2 ,
. target_residency = 2 ,
. enter = & intel_idle } ,
2013-02-02 10:31:56 +04:00
{
. name = " C1E-HSW " ,
. desc = " MWAIT 0x01 " ,
. flags = MWAIT2flg ( 0x01 ) | CPUIDLE_FLAG_TIME_VALID ,
. exit_latency = 10 ,
. target_residency = 20 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2013-01-31 23:40:49 +04:00
. name = " C3-HSW " ,
. desc = " MWAIT 0x10 " ,
. flags = MWAIT2flg ( 0x10 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
. exit_latency = 33 ,
. target_residency = 100 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2013-01-31 23:40:49 +04:00
. name = " C6-HSW " ,
. desc = " MWAIT 0x20 " ,
. flags = MWAIT2flg ( 0x20 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
. exit_latency = 133 ,
. target_residency = 400 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2013-01-31 23:40:49 +04:00
. name = " C7s-HSW " ,
. desc = " MWAIT 0x32 " ,
. flags = MWAIT2flg ( 0x32 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
. exit_latency = 166 ,
. target_residency = 500 ,
. enter = & intel_idle } ,
2013-02-27 22:18:50 +04:00
{
. name = " C8-HSW " ,
. desc = " MWAIT 0x40 " ,
. flags = MWAIT2flg ( 0x40 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
. exit_latency = 300 ,
. target_residency = 900 ,
. enter = & intel_idle } ,
{
. name = " C9-HSW " ,
. desc = " MWAIT 0x50 " ,
. flags = MWAIT2flg ( 0x50 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
. exit_latency = 600 ,
. target_residency = 1800 ,
. enter = & intel_idle } ,
{
. name = " C10-HSW " ,
. desc = " MWAIT 0x60 " ,
. flags = MWAIT2flg ( 0x60 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
. exit_latency = 2600 ,
. target_residency = 7700 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
. enter = NULL }
2013-01-31 23:40:49 +04:00
} ;
2013-08-30 14:26:42 +04:00
static struct cpuidle_state atom_cstates [ ] __initdata = {
2013-02-02 08:37:30 +04:00
{
2013-02-02 10:31:56 +04:00
. name = " C1E-ATM " ,
2010-03-08 22:07:30 +03:00
. desc = " MWAIT 0x00 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x00 ) | CPUIDLE_FLAG_TIME_VALID ,
2013-02-02 10:31:56 +04:00
. exit_latency = 10 ,
. target_residency = 20 ,
2010-03-08 22:07:30 +03:00
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2011-02-28 00:36:43 +03:00
. name = " C2-ATM " ,
2010-03-08 22:07:30 +03:00
. desc = " MWAIT 0x10 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x10 ) | CPUIDLE_FLAG_TIME_VALID ,
2010-03-08 22:07:30 +03:00
. exit_latency = 20 ,
. target_residency = 80 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2011-02-28 00:36:43 +03:00
. name = " C4-ATM " ,
2010-03-08 22:07:30 +03:00
. desc = " MWAIT 0x30 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x30 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
2010-03-08 22:07:30 +03:00
. exit_latency = 100 ,
. target_residency = 400 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
2011-02-28 00:36:43 +03:00
. name = " C6-ATM " ,
2010-10-05 21:43:14 +04:00
. desc = " MWAIT 0x52 " ,
2013-02-01 04:55:37 +04:00
. flags = MWAIT2flg ( 0x52 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
2010-10-05 21:43:14 +04:00
. exit_latency = 140 ,
. target_residency = 560 ,
. enter = & intel_idle } ,
2013-02-02 08:37:30 +04:00
{
. enter = NULL }
2010-03-08 22:07:30 +03:00
} ;
2013-11-09 09:30:17 +04:00
static struct cpuidle_state avn_cstates [ CPUIDLE_STATE_MAX ] = {
{
. name = " C1-AVN " ,
. desc = " MWAIT 0x00 " ,
. flags = MWAIT2flg ( 0x00 ) | CPUIDLE_FLAG_TIME_VALID ,
. exit_latency = 2 ,
. target_residency = 2 ,
. enter = & intel_idle } ,
{
. name = " C6-AVN " ,
. desc = " MWAIT 0x51 " ,
. flags = MWAIT2flg ( 0x58 ) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED ,
. exit_latency = 15 ,
. target_residency = 45 ,
. enter = & intel_idle } ,
} ;
2010-03-08 22:07:30 +03:00
/**
* intel_idle
* @ dev : cpuidle_device
2011-10-28 14:50:42 +04:00
* @ drv : cpuidle driver
2011-10-28 14:50:09 +04:00
* @ index : index of cpuidle state
2010-03-08 22:07:30 +03:00
*
2012-01-11 03:48:21 +04:00
* Must be called under local_irq_disable ( ) .
2010-03-08 22:07:30 +03:00
*/
2011-10-28 14:50:42 +04:00
static int intel_idle ( struct cpuidle_device * dev ,
struct cpuidle_driver * drv , int index )
2010-03-08 22:07:30 +03:00
{
unsigned long ecx = 1 ; /* break on interrupt flag */
2011-10-28 14:50:42 +04:00
struct cpuidle_state * state = & drv - > states [ index ] ;
2013-02-01 04:55:37 +04:00
unsigned long eax = flg2MWAIT ( state - > flags ) ;
2010-03-08 22:07:30 +03:00
unsigned int cstate ;
int cpu = smp_processor_id ( ) ;
cstate = ( ( ( eax ) > > MWAIT_SUBSTATE_SIZE ) & MWAIT_CSTATE_MASK ) + 1 ;
2010-10-01 05:19:07 +04:00
/*
2010-10-16 04:43:06 +04:00
* leave_mm ( ) to avoid costly and often unnecessary wakeups
* for flushing the user TLB ' s associated with the active mm .
2010-10-01 05:19:07 +04:00
*/
2010-10-16 04:43:06 +04:00
if ( state - > flags & CPUIDLE_FLAG_TLB_FLUSHED )
2010-10-01 05:19:07 +04:00
leave_mm ( cpu ) ;
2010-03-08 22:07:30 +03:00
if ( ! ( lapic_timer_reliable_states & ( 1 < < ( cstate ) ) ) )
clockevents_notify ( CLOCK_EVT_NOTIFY_BROADCAST_ENTER , & cpu ) ;
2013-09-11 14:43:13 +04:00
if ( ! current_set_polling_and_test ( ) ) {
2010-03-08 22:07:30 +03:00
__monitor ( ( void * ) & current_thread_info ( ) - > flags , 0 , 0 ) ;
smp_mb ( ) ;
if ( ! need_resched ( ) )
__mwait ( eax , ecx ) ;
}
if ( ! ( lapic_timer_reliable_states & ( 1 < < ( cstate ) ) ) )
clockevents_notify ( CLOCK_EVT_NOTIFY_BROADCAST_EXIT , & cpu ) ;
2011-10-28 14:50:09 +04:00
return index ;
2010-03-08 22:07:30 +03:00
}
2011-01-10 04:38:12 +03:00
static void __setup_broadcast_timer ( void * arg )
{
unsigned long reason = ( unsigned long ) arg ;
int cpu = smp_processor_id ( ) ;
reason = reason ?
CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF ;
clockevents_notify ( reason , & cpu ) ;
}
2012-07-05 17:23:25 +04:00
static int cpu_hotplug_notify ( struct notifier_block * n ,
unsigned long action , void * hcpu )
2011-01-10 04:38:12 +03:00
{
int hotcpu = ( unsigned long ) hcpu ;
2012-07-05 17:23:25 +04:00
struct cpuidle_device * dev ;
2011-01-10 04:38:12 +03:00
2013-10-23 17:44:51 +04:00
switch ( action & ~ CPU_TASKS_FROZEN ) {
2011-01-10 04:38:12 +03:00
case CPU_ONLINE :
2012-07-05 17:23:25 +04:00
if ( lapic_timer_reliable_states ! = LAPIC_TIMER_ALWAYS_RELIABLE )
smp_call_function_single ( hotcpu , __setup_broadcast_timer ,
( void * ) true , 1 ) ;
/*
* Some systems can hotplug a cpu at runtime after
* the kernel has booted , we have to initialize the
* driver in this case
*/
dev = per_cpu_ptr ( intel_idle_cpuidle_devices , hotcpu ) ;
if ( ! dev - > registered )
intel_idle_cpu_init ( hotcpu ) ;
2011-01-10 04:38:12 +03:00
break ;
}
return NOTIFY_OK ;
}
2012-07-05 17:23:25 +04:00
static struct notifier_block cpu_hotplug_notifier = {
. notifier_call = cpu_hotplug_notify ,
2011-01-10 04:38:12 +03:00
} ;
2011-01-19 04:48:27 +03:00
static void auto_demotion_disable ( void * dummy )
{
unsigned long long msr_bits ;
rdmsrl ( MSR_NHM_SNB_PKG_CST_CFG_CTL , msr_bits ) ;
2012-01-26 03:09:07 +04:00
msr_bits & = ~ ( icpu - > auto_demotion_disable_flags ) ;
2011-01-19 04:48:27 +03:00
wrmsrl ( MSR_NHM_SNB_PKG_CST_CFG_CTL , msr_bits ) ;
}
2013-02-02 10:31:56 +04:00
static void c1e_promotion_disable ( void * dummy )
{
unsigned long long msr_bits ;
rdmsrl ( MSR_IA32_POWER_CTL , msr_bits ) ;
msr_bits & = ~ 0x2 ;
wrmsrl ( MSR_IA32_POWER_CTL , msr_bits ) ;
}
2011-01-19 04:48:27 +03:00
2012-01-26 03:09:07 +04:00
static const struct idle_cpu idle_cpu_nehalem = {
. state_table = nehalem_cstates ,
. auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE ,
2013-02-02 10:31:56 +04:00
. disable_promotion_to_c1e = true ,
2012-01-26 03:09:07 +04:00
} ;
static const struct idle_cpu idle_cpu_atom = {
. state_table = atom_cstates ,
} ;
static const struct idle_cpu idle_cpu_lincroft = {
. state_table = atom_cstates ,
. auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE ,
} ;
static const struct idle_cpu idle_cpu_snb = {
. state_table = snb_cstates ,
2013-02-02 10:31:56 +04:00
. disable_promotion_to_c1e = true ,
2012-01-26 03:09:07 +04:00
} ;
2012-06-02 03:45:32 +04:00
static const struct idle_cpu idle_cpu_ivb = {
. state_table = ivb_cstates ,
2013-02-02 10:31:56 +04:00
. disable_promotion_to_c1e = true ,
2012-06-02 03:45:32 +04:00
} ;
2013-01-31 23:40:49 +04:00
static const struct idle_cpu idle_cpu_hsw = {
. state_table = hsw_cstates ,
2013-02-02 10:31:56 +04:00
. disable_promotion_to_c1e = true ,
2013-01-31 23:40:49 +04:00
} ;
2013-11-09 09:30:17 +04:00
static const struct idle_cpu idle_cpu_avn = {
. state_table = avn_cstates ,
. disable_promotion_to_c1e = true ,
} ;
2012-01-26 03:09:07 +04:00
# define ICPU(model, cpu) \
{ X86_VENDOR_INTEL , 6 , model , X86_FEATURE_MWAIT , ( unsigned long ) & cpu }
static const struct x86_cpu_id intel_idle_ids [ ] = {
ICPU ( 0x1a , idle_cpu_nehalem ) ,
ICPU ( 0x1e , idle_cpu_nehalem ) ,
ICPU ( 0x1f , idle_cpu_nehalem ) ,
2012-02-16 08:13:14 +04:00
ICPU ( 0x25 , idle_cpu_nehalem ) ,
ICPU ( 0x2c , idle_cpu_nehalem ) ,
ICPU ( 0x2e , idle_cpu_nehalem ) ,
2012-01-26 03:09:07 +04:00
ICPU ( 0x1c , idle_cpu_atom ) ,
ICPU ( 0x26 , idle_cpu_lincroft ) ,
2012-02-16 08:13:14 +04:00
ICPU ( 0x2f , idle_cpu_nehalem ) ,
2012-01-26 03:09:07 +04:00
ICPU ( 0x2a , idle_cpu_snb ) ,
ICPU ( 0x2d , idle_cpu_snb ) ,
2012-06-02 03:45:32 +04:00
ICPU ( 0x3a , idle_cpu_ivb ) ,
2012-09-27 06:28:21 +04:00
ICPU ( 0x3e , idle_cpu_ivb ) ,
2013-01-31 23:40:49 +04:00
ICPU ( 0x3c , idle_cpu_hsw ) ,
ICPU ( 0x3f , idle_cpu_hsw ) ,
ICPU ( 0x45 , idle_cpu_hsw ) ,
2013-03-15 18:55:31 +04:00
ICPU ( 0x46 , idle_cpu_hsw ) ,
2013-11-09 09:30:17 +04:00
ICPU ( 0x4D , idle_cpu_avn ) ,
2012-01-26 03:09:07 +04:00
{ }
} ;
MODULE_DEVICE_TABLE ( x86cpu , intel_idle_ids ) ;
2010-03-08 22:07:30 +03:00
/*
* intel_idle_probe ( )
*/
2013-08-30 14:27:45 +04:00
static int __init intel_idle_probe ( void )
2010-03-08 22:07:30 +03:00
{
2010-05-28 10:22:03 +04:00
unsigned int eax , ebx , ecx ;
2012-01-26 03:09:07 +04:00
const struct x86_cpu_id * id ;
2010-03-08 22:07:30 +03:00
if ( max_cstate = = 0 ) {
pr_debug ( PREFIX " disabled \n " ) ;
return - EPERM ;
}
2012-01-26 03:09:07 +04:00
id = x86_match_cpu ( intel_idle_ids ) ;
if ( ! id ) {
if ( boot_cpu_data . x86_vendor = = X86_VENDOR_INTEL & &
boot_cpu_data . x86 = = 6 )
pr_debug ( PREFIX " does not run on family %d model %d \n " ,
boot_cpu_data . x86 , boot_cpu_data . x86_model ) ;
2010-03-08 22:07:30 +03:00
return - ENODEV ;
2012-01-26 03:09:07 +04:00
}
2010-03-08 22:07:30 +03:00
if ( boot_cpu_data . cpuid_level < CPUID_MWAIT_LEAF )
return - ENODEV ;
2010-05-28 10:22:03 +04:00
cpuid ( CPUID_MWAIT_LEAF , & eax , & ebx , & ecx , & mwait_substates ) ;
2010-03-08 22:07:30 +03:00
if ( ! ( ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED ) | |
2011-12-05 01:17:29 +04:00
! ( ecx & CPUID5_ECX_INTERRUPT_BREAK ) | |
! mwait_substates )
2010-03-08 22:07:30 +03:00
return - ENODEV ;
2010-05-28 10:22:03 +04:00
pr_debug ( PREFIX " MWAIT substates: 0x%x \n " , mwait_substates ) ;
2010-03-08 22:07:30 +03:00
2012-01-26 03:09:07 +04:00
icpu = ( const struct idle_cpu * ) id - > driver_data ;
cpuidle_state_table = icpu - > state_table ;
2010-03-08 22:07:30 +03:00
2010-12-02 09:19:32 +03:00
if ( boot_cpu_has ( X86_FEATURE_ARAT ) ) /* Always Reliable APIC Timer */
2011-01-10 04:38:12 +03:00
lapic_timer_reliable_states = LAPIC_TIMER_ALWAYS_RELIABLE ;
2012-07-05 17:23:25 +04:00
else
2012-01-11 03:48:19 +04:00
on_each_cpu ( __setup_broadcast_timer , ( void * ) true , 1 ) ;
2012-07-05 17:23:25 +04:00
2010-03-08 22:07:30 +03:00
pr_debug ( PREFIX " v " INTEL_IDLE_VERSION
" model 0x%X \n " , boot_cpu_data . x86_model ) ;
pr_debug ( PREFIX " lapic_timer_reliable_states 0x%x \n " ,
lapic_timer_reliable_states ) ;
return 0 ;
}
/*
* intel_idle_cpuidle_devices_uninit ( )
* unregister , free cpuidle_devices
*/
static void intel_idle_cpuidle_devices_uninit ( void )
{
int i ;
struct cpuidle_device * dev ;
for_each_online_cpu ( i ) {
dev = per_cpu_ptr ( intel_idle_cpuidle_devices , i ) ;
cpuidle_unregister_device ( dev ) ;
}
free_percpu ( intel_idle_cpuidle_devices ) ;
return ;
}
2011-10-28 14:50:42 +04:00
/*
* intel_idle_cpuidle_driver_init ( )
* allocate , initialize cpuidle_states
*/
2013-08-30 14:27:45 +04:00
static int __init intel_idle_cpuidle_driver_init ( void )
2011-10-28 14:50:42 +04:00
{
int cstate ;
struct cpuidle_driver * drv = & intel_idle_driver ;
drv - > state_count = 1 ;
2013-02-02 08:37:30 +04:00
for ( cstate = 0 ; cstate < CPUIDLE_STATE_MAX ; + + cstate ) {
int num_substates , mwait_hint , mwait_cstate , mwait_substate ;
2011-10-28 14:50:42 +04:00
2013-02-02 08:37:30 +04:00
if ( cpuidle_state_table [ cstate ] . enter = = NULL )
break ;
if ( cstate + 1 > max_cstate ) {
2011-10-28 14:50:42 +04:00
printk ( PREFIX " max_cstate %d reached \n " ,
max_cstate ) ;
break ;
}
2013-02-02 08:37:30 +04:00
mwait_hint = flg2MWAIT ( cpuidle_state_table [ cstate ] . flags ) ;
mwait_cstate = MWAIT_HINT2CSTATE ( mwait_hint ) ;
mwait_substate = MWAIT_HINT2SUBSTATE ( mwait_hint ) ;
2011-10-28 14:50:42 +04:00
/* does the state exist in CPUID.MWAIT? */
2013-02-02 08:37:30 +04:00
num_substates = ( mwait_substates > > ( ( mwait_cstate + 1 ) * 4 ) )
2011-10-28 14:50:42 +04:00
& MWAIT_SUBSTATE_MASK ;
2013-02-02 08:37:30 +04:00
/* if sub-state in table is not enumerated by CPUID */
if ( ( mwait_substate + 1 ) > num_substates )
2011-10-28 14:50:42 +04:00
continue ;
2013-02-02 08:37:30 +04:00
if ( ( ( mwait_cstate + 1 ) > 2 ) & &
2011-10-28 14:50:42 +04:00
! boot_cpu_has ( X86_FEATURE_NONSTOP_TSC ) )
mark_tsc_unstable ( " TSC halts in idle "
" states deeper than C2 " ) ;
drv - > states [ drv - > state_count ] = /* structure copy */
cpuidle_state_table [ cstate ] ;
drv - > state_count + = 1 ;
}
2012-01-26 03:09:07 +04:00
if ( icpu - > auto_demotion_disable_flags )
2012-01-11 03:48:19 +04:00
on_each_cpu ( auto_demotion_disable , NULL , 1 ) ;
2011-10-28 14:50:42 +04:00
2013-02-02 10:31:56 +04:00
if ( icpu - > disable_promotion_to_c1e ) /* each-cpu is redundant */
on_each_cpu ( c1e_promotion_disable , NULL , 1 ) ;
2011-10-28 14:50:42 +04:00
return 0 ;
}
2010-03-08 22:07:30 +03:00
/*
2012-01-18 01:40:08 +04:00
* intel_idle_cpu_init ( )
2010-03-08 22:07:30 +03:00
* allocate , initialize , register cpuidle_devices
2012-01-18 01:40:08 +04:00
* @ cpu : cpu / core to initialize
2010-03-08 22:07:30 +03:00
*/
2012-07-05 17:23:25 +04:00
static int intel_idle_cpu_init ( int cpu )
2010-03-08 22:07:30 +03:00
{
2012-01-18 01:40:08 +04:00
int cstate ;
2010-03-08 22:07:30 +03:00
struct cpuidle_device * dev ;
2012-01-18 01:40:08 +04:00
dev = per_cpu_ptr ( intel_idle_cpuidle_devices , cpu ) ;
2010-03-08 22:07:30 +03:00
2012-01-18 01:40:08 +04:00
dev - > state_count = 1 ;
2010-03-08 22:07:30 +03:00
2013-02-02 08:37:30 +04:00
for ( cstate = 0 ; cstate < CPUIDLE_STATE_MAX ; + + cstate ) {
int num_substates , mwait_hint , mwait_cstate , mwait_substate ;
2010-03-08 22:07:30 +03:00
2013-02-02 08:37:30 +04:00
if ( cpuidle_state_table [ cstate ] . enter = = NULL )
2013-08-30 14:25:10 +04:00
break ;
2013-02-02 08:37:30 +04:00
if ( cstate + 1 > max_cstate ) {
2012-03-22 03:33:43 +04:00
printk ( PREFIX " max_cstate %d reached \n " , max_cstate ) ;
2012-01-18 01:40:08 +04:00
break ;
}
2010-03-08 22:07:30 +03:00
2013-02-02 08:37:30 +04:00
mwait_hint = flg2MWAIT ( cpuidle_state_table [ cstate ] . flags ) ;
mwait_cstate = MWAIT_HINT2CSTATE ( mwait_hint ) ;
mwait_substate = MWAIT_HINT2SUBSTATE ( mwait_hint ) ;
2012-01-18 01:40:08 +04:00
/* does the state exist in CPUID.MWAIT? */
2013-02-02 08:37:30 +04:00
num_substates = ( mwait_substates > > ( ( mwait_cstate + 1 ) * 4 ) )
& MWAIT_SUBSTATE_MASK ;
/* if sub-state in table is not enumerated by CPUID */
if ( ( mwait_substate + 1 ) > num_substates )
2012-01-18 01:40:08 +04:00
continue ;
2010-03-08 22:07:30 +03:00
2012-03-22 03:33:43 +04:00
dev - > state_count + = 1 ;
}
2012-01-18 01:40:08 +04:00
dev - > cpu = cpu ;
2010-03-08 22:07:30 +03:00
2012-01-18 01:40:08 +04:00
if ( cpuidle_register_device ( dev ) ) {
pr_debug ( PREFIX " cpuidle_register_device %d failed! \n " , cpu ) ;
intel_idle_cpuidle_devices_uninit ( ) ;
return - EIO ;
2010-03-08 22:07:30 +03:00
}
2012-01-26 03:09:07 +04:00
if ( icpu - > auto_demotion_disable_flags )
2012-01-18 01:40:08 +04:00
smp_call_function_single ( cpu , auto_demotion_disable , NULL , 1 ) ;
2010-03-08 22:07:30 +03:00
return 0 ;
}
static int __init intel_idle_init ( void )
{
2012-01-18 01:40:08 +04:00
int retval , i ;
2010-03-08 22:07:30 +03:00
2010-11-03 19:06:14 +03:00
/* Do not load intel_idle at all for now if idle= is passed */
if ( boot_option_idle_override ! = IDLE_NO_OVERRIDE )
return - ENODEV ;
2010-03-08 22:07:30 +03:00
retval = intel_idle_probe ( ) ;
if ( retval )
return retval ;
2011-10-28 14:50:42 +04:00
intel_idle_cpuidle_driver_init ( ) ;
2010-03-08 22:07:30 +03:00
retval = cpuidle_register_driver ( & intel_idle_driver ) ;
if ( retval ) {
2012-08-17 00:06:55 +04:00
struct cpuidle_driver * drv = cpuidle_get_driver ( ) ;
2010-03-08 22:07:30 +03:00
printk ( KERN_DEBUG PREFIX " intel_idle yielding to %s " ,
2012-08-17 00:06:55 +04:00
drv ? drv - > name : " none " ) ;
2010-03-08 22:07:30 +03:00
return retval ;
}
2012-01-18 01:40:08 +04:00
intel_idle_cpuidle_devices = alloc_percpu ( struct cpuidle_device ) ;
if ( intel_idle_cpuidle_devices = = NULL )
return - ENOMEM ;
for_each_online_cpu ( i ) {
retval = intel_idle_cpu_init ( i ) ;
if ( retval ) {
cpuidle_unregister_driver ( & intel_idle_driver ) ;
return retval ;
}
2010-03-08 22:07:30 +03:00
}
2013-01-17 02:40:01 +04:00
register_cpu_notifier ( & cpu_hotplug_notifier ) ;
2010-03-08 22:07:30 +03:00
return 0 ;
}
static void __exit intel_idle_exit ( void )
{
intel_idle_cpuidle_devices_uninit ( ) ;
cpuidle_unregister_driver ( & intel_idle_driver ) ;
2012-07-05 17:23:25 +04:00
if ( lapic_timer_reliable_states ! = LAPIC_TIMER_ALWAYS_RELIABLE )
2012-01-11 03:48:19 +04:00
on_each_cpu ( __setup_broadcast_timer , ( void * ) false , 1 ) ;
2012-07-05 17:23:25 +04:00
unregister_cpu_notifier ( & cpu_hotplug_notifier ) ;
2011-01-10 04:38:12 +03:00
2010-03-08 22:07:30 +03:00
return ;
}
module_init ( intel_idle_init ) ;
module_exit ( intel_idle_exit ) ;
module_param ( max_cstate , int , 0444 ) ;
MODULE_AUTHOR ( " Len Brown <len.brown@intel.com> " ) ;
MODULE_DESCRIPTION ( " Cpuidle driver for Intel Hardware v " INTEL_IDLE_VERSION ) ;
MODULE_LICENSE ( " GPL " ) ;