2006-09-26 10:52:42 +02:00
/*
2006-09-26 10:52:42 +02:00
* Thermal throttle event support code ( such as syslog messaging and rate
* limiting ) that was factored out from x86_64 ( mce_intel . c ) and i386 ( p4 . c ) .
2009-04-08 12:31:19 +02:00
*
2006-09-26 10:52:42 +02:00
* This allows consistent reporting of CPU thermal throttle events .
*
* Maintains a counter in / sys that keeps track of the number of thermal
* events , such that the user knows how bad the thermal problem might be
* ( since the logging to syslog and mcelog is rate limited ) .
2006-09-26 10:52:42 +02:00
*
* Author : Dmitriy Zavin ( dmitriyz @ google . com )
*
* Credits : Adapted from Zwane Mwaikambo ' s original code in mce_intel . c .
2006-09-26 10:52:42 +02:00
* Inspired by Ross Biro ' s and Al Borchers ' counter code .
2006-09-26 10:52:42 +02:00
*/
2009-06-15 17:25:27 +09:00
# include <linux/interrupt.h>
2009-04-08 12:31:19 +02:00
# include <linux/notifier.h>
# include <linux/jiffies.h>
2009-06-15 17:26:10 +09:00
# include <linux/kernel.h>
2006-09-26 10:52:42 +02:00
# include <linux/percpu.h>
2006-09-26 10:52:42 +02:00
# include <linux/sysdev.h>
2009-06-15 17:26:10 +09:00
# include <linux/types.h>
# include <linux/init.h>
# include <linux/smp.h>
2006-09-26 10:52:42 +02:00
# include <linux/cpu.h>
2009-04-08 12:31:19 +02:00
2009-06-15 17:26:10 +09:00
# include <asm/processor.h>
# include <asm/system.h>
# include <asm/apic.h>
2009-06-15 17:25:27 +09:00
# include <asm/idle.h>
# include <asm/mce.h>
2009-06-15 17:26:10 +09:00
# include <asm/msr.h>
2006-09-26 10:52:42 +02:00
/* How long to wait between reporting thermal events */
2009-04-08 12:31:19 +02:00
# define CHECK_INTERVAL (300 * HZ)
2006-09-26 10:52:42 +02:00
2009-09-22 15:50:24 +02:00
/*
* Current thermal throttling state :
*/
struct thermal_state {
bool is_throttled ;
u64 next_check ;
unsigned long throttle_count ;
2009-09-22 15:50:24 +02:00
unsigned long last_throttle_count ;
2009-09-22 15:50:24 +02:00
} ;
2009-04-08 12:31:19 +02:00
2009-09-22 15:50:24 +02:00
static DEFINE_PER_CPU ( struct thermal_state , thermal_state ) ;
static atomic_t therm_throt_en = ATOMIC_INIT ( 0 ) ;
2006-09-26 10:52:42 +02:00
2009-11-10 09:38:24 +08:00
static u32 lvtthmr_init __read_mostly ;
2006-09-26 10:52:42 +02:00
# ifdef CONFIG_SYSFS
2009-04-08 12:31:19 +02:00
# define define_therm_throt_sysdev_one_ro(_name) \
static SYSDEV_ATTR ( _name , 0444 , therm_throt_sysdev_show_ # # _name , NULL )
# define define_therm_throt_sysdev_show_func(name) \
2009-09-22 15:50:24 +02:00
\
static ssize_t therm_throt_sysdev_show_ # # name ( \
struct sys_device * dev , \
struct sysdev_attribute * attr , \
char * buf ) \
2009-04-08 12:31:19 +02:00
{ \
unsigned int cpu = dev - > id ; \
ssize_t ret ; \
\
preempt_disable ( ) ; /* CPU hotplug */ \
if ( cpu_online ( cpu ) ) \
ret = sprintf ( buf , " %lu \n " , \
2009-09-22 15:50:24 +02:00
per_cpu ( thermal_state , cpu ) . name ) ; \
2009-04-08 12:31:19 +02:00
else \
ret = 0 ; \
preempt_enable ( ) ; \
\
return ret ; \
2006-09-26 10:52:42 +02:00
}
2009-09-22 15:50:24 +02:00
define_therm_throt_sysdev_show_func ( throttle_count ) ;
define_therm_throt_sysdev_one_ro ( throttle_count ) ;
2006-09-26 10:52:42 +02:00
static struct attribute * thermal_throttle_attrs [ ] = {
2009-09-22 15:50:24 +02:00
& attr_throttle_count . attr ,
2006-09-26 10:52:42 +02:00
NULL
} ;
static struct attribute_group thermal_throttle_attr_group = {
2009-04-08 12:31:19 +02:00
. attrs = thermal_throttle_attrs ,
. name = " thermal_throttle "
2006-09-26 10:52:42 +02:00
} ;
# endif /* CONFIG_SYSFS */
2006-09-26 10:52:42 +02:00
/***
2006-09-26 10:52:42 +02:00
* therm_throt_process - Process thermal throttling event from interrupt
2006-09-26 10:52:42 +02:00
* @ curr : Whether the condition is current or not ( boolean ) , since the
* thermal interrupt normally gets called both when the thermal
* event begins and once the event has ended .
*
2006-09-26 10:52:42 +02:00
* This function is called by the thermal interrupt after the
2006-09-26 10:52:42 +02:00
* IRQ has been acknowledged .
*
* It will take care of rate limiting and printing messages to the syslog .
*
* Returns : 0 : Event should NOT be further logged , i . e . still in
* " timeout " from previous log message .
* 1 : Event should be logged further , and a message has been
* printed to the syslog .
*/
2009-09-22 15:50:24 +02:00
static int therm_throt_process ( bool is_throttled )
2006-09-26 10:52:42 +02:00
{
2009-09-22 15:50:24 +02:00
struct thermal_state * state ;
unsigned int this_cpu ;
bool was_throttled ;
u64 now ;
this_cpu = smp_processor_id ( ) ;
now = get_jiffies_64 ( ) ;
state = & per_cpu ( thermal_state , this_cpu ) ;
was_throttled = state - > is_throttled ;
state - > is_throttled = is_throttled ;
2006-09-26 10:52:42 +02:00
2009-08-09 21:44:49 -07:00
if ( is_throttled )
2009-09-22 15:50:24 +02:00
state - > throttle_count + + ;
2006-09-26 10:52:42 +02:00
2009-09-22 15:50:24 +02:00
if ( time_before64 ( now , state - > next_check ) & &
state - > throttle_count ! = state - > last_throttle_count )
2006-09-26 10:52:42 +02:00
return 0 ;
2009-09-22 15:50:24 +02:00
state - > next_check = now + CHECK_INTERVAL ;
2009-09-22 15:50:24 +02:00
state - > last_throttle_count = state - > throttle_count ;
2006-09-26 10:52:42 +02:00
/* if we just entered the thermal event */
2009-08-09 21:44:49 -07:00
if ( is_throttled ) {
2009-09-22 15:50:24 +02:00
printk ( KERN_CRIT " CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu) \n " , this_cpu , state - > throttle_count ) ;
2006-09-26 10:52:42 +02:00
2006-09-26 10:52:42 +02:00
add_taint ( TAINT_MACHINE_CHECK ) ;
2009-08-16 15:54:37 +01:00
return 1 ;
}
if ( was_throttled ) {
2009-09-22 15:50:24 +02:00
printk ( KERN_INFO " CPU%d: Temperature/speed normal \n " , this_cpu ) ;
2009-08-16 15:54:37 +01:00
return 1 ;
2006-09-26 10:52:42 +02:00
}
2009-08-16 15:54:37 +01:00
return 0 ;
2006-09-26 10:52:42 +02:00
}
2006-09-26 10:52:42 +02:00
# ifdef CONFIG_SYSFS
2009-04-08 12:31:19 +02:00
/* Add/Remove thermal_throttle interface for CPU device: */
2006-10-12 11:01:30 -07:00
static __cpuinit int thermal_throttle_add_dev ( struct sys_device * sys_dev )
2006-09-26 10:52:42 +02:00
{
2009-04-08 12:31:19 +02:00
return sysfs_create_group ( & sys_dev - > kobj ,
& thermal_throttle_attr_group ) ;
2006-09-26 10:52:42 +02:00
}
2006-10-12 11:01:30 -07:00
static __cpuinit void thermal_throttle_remove_dev ( struct sys_device * sys_dev )
2006-09-26 10:52:42 +02:00
{
2008-02-09 23:24:08 +01:00
sysfs_remove_group ( & sys_dev - > kobj , & thermal_throttle_attr_group ) ;
2006-09-26 10:52:42 +02:00
}
2009-04-08 12:31:19 +02:00
/* Mutex protecting device creation against CPU hotplug: */
2006-09-26 10:52:42 +02:00
static DEFINE_MUTEX ( therm_cpu_lock ) ;
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
2009-04-08 12:31:19 +02:00
static __cpuinit int
thermal_throttle_cpu_callback ( struct notifier_block * nfb ,
unsigned long action ,
void * hcpu )
2006-09-26 10:52:42 +02:00
{
unsigned int cpu = ( unsigned long ) hcpu ;
struct sys_device * sys_dev ;
2007-10-18 03:05:13 -07:00
int err = 0 ;
2006-09-26 10:52:42 +02:00
sys_dev = get_cpu_sysdev ( cpu ) ;
2009-04-08 12:31:19 +02:00
2006-09-26 10:52:42 +02:00
switch ( action ) {
2007-10-18 03:05:13 -07:00
case CPU_UP_PREPARE :
case CPU_UP_PREPARE_FROZEN :
2007-05-24 12:37:34 +03:00
mutex_lock ( & therm_cpu_lock ) ;
2006-10-12 11:01:30 -07:00
err = thermal_throttle_add_dev ( sys_dev ) ;
2007-05-24 12:37:34 +03:00
mutex_unlock ( & therm_cpu_lock ) ;
2006-10-12 11:01:30 -07:00
WARN_ON ( err ) ;
2006-09-26 10:52:42 +02:00
break ;
2007-10-18 03:05:13 -07:00
case CPU_UP_CANCELED :
case CPU_UP_CANCELED_FROZEN :
2006-09-26 10:52:42 +02:00
case CPU_DEAD :
2007-05-09 02:35:10 -07:00
case CPU_DEAD_FROZEN :
2007-05-24 12:37:34 +03:00
mutex_lock ( & therm_cpu_lock ) ;
2006-09-26 10:52:42 +02:00
thermal_throttle_remove_dev ( sys_dev ) ;
2007-05-24 12:37:34 +03:00
mutex_unlock ( & therm_cpu_lock ) ;
2006-09-26 10:52:42 +02:00
break ;
}
2007-10-18 03:05:13 -07:00
return err ? NOTIFY_BAD : NOTIFY_OK ;
2006-09-26 10:52:42 +02:00
}
2007-10-17 18:04:33 +02:00
static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
2006-09-26 10:52:42 +02:00
{
. notifier_call = thermal_throttle_cpu_callback ,
} ;
static __init int thermal_throttle_init_device ( void )
{
unsigned int cpu = 0 ;
2006-10-12 11:01:30 -07:00
int err ;
2006-09-26 10:52:42 +02:00
if ( ! atomic_read ( & therm_throt_en ) )
return 0 ;
register_hotcpu_notifier ( & thermal_throttle_cpu_notifier ) ;
# ifdef CONFIG_HOTPLUG_CPU
mutex_lock ( & therm_cpu_lock ) ;
# endif
/* connect live CPUs to sysfs */
2006-10-12 11:01:30 -07:00
for_each_online_cpu ( cpu ) {
err = thermal_throttle_add_dev ( get_cpu_sysdev ( cpu ) ) ;
WARN_ON ( err ) ;
}
2006-09-26 10:52:42 +02:00
# ifdef CONFIG_HOTPLUG_CPU
mutex_unlock ( & therm_cpu_lock ) ;
# endif
return 0 ;
}
device_initcall ( thermal_throttle_init_device ) ;
2009-06-15 17:25:27 +09:00
2006-09-26 10:52:42 +02:00
# endif /* CONFIG_SYSFS */
2009-06-15 17:25:27 +09:00
/* Thermal transition interrupt handler */
2009-06-15 17:26:36 +09:00
static void intel_thermal_interrupt ( void )
2009-06-15 17:25:27 +09:00
{
__u64 msr_val ;
rdmsrl ( MSR_IA32_THERM_STATUS , msr_val ) ;
2009-09-22 15:50:24 +02:00
if ( therm_throt_process ( ( msr_val & THERM_STATUS_PROCHOT ) ! = 0 ) )
2009-06-15 17:25:27 +09:00
mce_log_therm_throt_event ( msr_val ) ;
}
static void unexpected_thermal_interrupt ( void )
{
printk ( KERN_ERR " CPU%d: Unexpected LVT TMR interrupt! \n " ,
smp_processor_id ( ) ) ;
add_taint ( TAINT_MACHINE_CHECK ) ;
}
static void ( * smp_thermal_vector ) ( void ) = unexpected_thermal_interrupt ;
asmlinkage void smp_thermal_interrupt ( struct pt_regs * regs )
{
exit_idle ( ) ;
irq_enter ( ) ;
inc_irq_stat ( irq_thermal_count ) ;
smp_thermal_vector ( ) ;
irq_exit ( ) ;
/* Ack only at the end to avoid potential reentry */
ack_APIC_irq ( ) ;
}
2009-11-11 15:51:25 +08:00
void __init mcheck_intel_therm_init ( void )
2009-11-10 09:38:24 +08:00
{
/*
* This function is only called on boot CPU . Save the init thermal
* LVT value on BSP and use that value to restore APs ' thermal LVT
* entry BIOS programmed later
*/
if ( cpu_has ( & boot_cpu_data , X86_FEATURE_ACPI ) & &
cpu_has ( & boot_cpu_data , X86_FEATURE_ACC ) )
lvtthmr_init = apic_read ( APIC_LVTTHMR ) ;
}
2009-11-12 15:52:40 +09:00
void intel_init_thermal ( struct cpuinfo_x86 * c )
2009-06-15 17:26:10 +09:00
{
unsigned int cpu = smp_processor_id ( ) ;
int tm2 = 0 ;
u32 l , h ;
2009-12-14 17:56:34 +09:00
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
if ( ! cpu_has_apic | | ! cpu_has ( c , X86_FEATURE_ACPI ) | |
! cpu_has ( c , X86_FEATURE_ACC ) )
2009-06-15 17:26:10 +09:00
return ;
/*
* First check if its enabled already , in which case there might
* be some SMM goo which handles it , so we can ' t even put a handler
* since it might be delivered via SMI already :
*/
rdmsr ( MSR_IA32_MISC_ENABLE , l , h ) ;
2009-11-10 09:38:24 +08:00
/*
* The initial value of thermal LVT entries on all APs always reads
* 0x10000 because APs are woken up by BSP issuing INIT - SIPI - SIPI
* sequence to them and LVT registers are reset to 0 s except for
* the mask bits which are set to 1 s when APs receive INIT IPI .
* Always restore the value that BIOS has programmed on AP based on
* BSP ' s info we saved since BIOS is always setting the same value
* for all threads / cores
*/
apic_write ( APIC_LVTTHMR , lvtthmr_init ) ;
h = lvtthmr_init ;
2009-06-15 17:26:10 +09:00
if ( ( l & MSR_IA32_MISC_ENABLE_TM1 ) & & ( h & APIC_DM_SMI ) ) {
printk ( KERN_DEBUG
" CPU%d: Thermal monitoring handled by SMI \n " , cpu ) ;
return ;
}
/* Check whether a vector already exists */
if ( h & APIC_VECTOR_MASK ) {
printk ( KERN_DEBUG
" CPU%d: Thermal LVT vector (%#x) already installed \n " ,
cpu , ( h & APIC_VECTOR_MASK ) ) ;
return ;
}
2009-07-29 00:04:59 +02:00
/* early Pentium M models use different method for enabling TM2 */
if ( cpu_has ( c , X86_FEATURE_TM2 ) ) {
if ( c - > x86 = = 6 & & ( c - > x86_model = = 9 | | c - > x86_model = = 13 ) ) {
rdmsr ( MSR_THERM2_CTL , l , h ) ;
if ( l & MSR_THERM2_CTL_TM_SELECT )
tm2 = 1 ;
} else if ( l & MSR_IA32_MISC_ENABLE_TM2 )
tm2 = 1 ;
}
2009-06-15 17:26:10 +09:00
/* We'll mask the thermal vector in the lapic till we're ready: */
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED ;
apic_write ( APIC_LVTTHMR , h ) ;
rdmsr ( MSR_IA32_THERM_INTERRUPT , l , h ) ;
wrmsr ( MSR_IA32_THERM_INTERRUPT ,
l | ( THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE ) , h ) ;
2009-06-15 17:26:36 +09:00
smp_thermal_vector = intel_thermal_interrupt ;
2009-06-15 17:26:10 +09:00
rdmsr ( MSR_IA32_MISC_ENABLE , l , h ) ;
wrmsr ( MSR_IA32_MISC_ENABLE , l | MSR_IA32_MISC_ENABLE_TM1 , h ) ;
/* Unmask the thermal vector: */
l = apic_read ( APIC_LVTTHMR ) ;
apic_write ( APIC_LVTTHMR , l & ~ APIC_LVT_MASKED ) ;
2009-12-10 17:19:36 -08:00
printk_once ( KERN_INFO " CPU0: Thermal monitoring enabled (%s) \n " ,
tm2 ? " TM2 " : " TM1 " ) ;
2009-06-15 17:26:10 +09:00
/* enable thermal throttle processing */
atomic_set ( & therm_throt_en , 1 ) ;
}