2005-04-16 15:20:36 -07:00
/*
* Intel specific MCE features .
* Copyright 2004 Zwane Mwaikambo < zwane @ linuxpower . ca >
2009-02-12 13:49:36 +01:00
* Copyright ( C ) 2008 , 2009 Intel Corporation
* Author : Andi Kleen
2005-04-16 15:20:36 -07:00
*/
# include <linux/init.h>
# include <linux/interrupt.h>
# include <linux/percpu.h>
# include <asm/processor.h>
2009-02-17 13:58:15 +01:00
# include <asm/apic.h>
2005-04-16 15:20:36 -07:00
# include <asm/msr.h>
# include <asm/mce.h>
# include <asm/hw_irq.h>
2006-01-11 22:44:36 +01:00
# include <asm/idle.h>
2006-09-26 10:52:42 +02:00
# include <asm/therm_throt.h>
2009-02-12 13:49:36 +01:00
# include <asm/apic.h>
2005-04-16 15:20:36 -07:00
2009-04-08 12:31:23 +02:00
# include "mce.h"
2005-04-16 15:20:36 -07:00
asmlinkage void smp_thermal_interrupt ( void )
{
2006-09-26 10:52:42 +02:00
__u64 msr_val ;
2005-04-16 15:20:36 -07:00
ack_APIC_irq ( ) ;
2006-01-11 22:44:36 +01:00
exit_idle ( ) ;
2005-04-16 15:20:36 -07:00
irq_enter ( ) ;
2006-09-26 10:52:42 +02:00
rdmsrl ( MSR_IA32_THERM_STATUS , msr_val ) ;
2009-04-08 12:31:24 +02:00
if ( therm_throt_process ( msr_val & THERM_STATUS_PROCHOT ) )
2009-02-12 13:43:22 +01:00
mce_log_therm_throt_event ( msr_val ) ;
2006-09-26 10:52:42 +02:00
2008-12-12 15:52:26 -08:00
inc_irq_stat ( irq_thermal_count ) ;
2005-04-16 15:20:36 -07:00
irq_exit ( ) ;
}
2009-02-12 13:49:36 +01:00
/*
* Support for Intel Correct Machine Check Interrupts . This allows
* the CPU to raise an interrupt when a corrected machine check happened .
* Normally we pick those up using a regular polling timer .
* Also supports reliable discovery of shared banks .
*/
static DEFINE_PER_CPU ( mce_banks_t , mce_banks_owned ) ;
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other .
*/
static DEFINE_SPINLOCK ( cmci_discover_lock ) ;
# define CMCI_THRESHOLD 1
2009-02-24 13:19:02 -08:00
static int cmci_supported ( int * banks )
2009-02-12 13:49:36 +01:00
{
u64 cap ;
/*
* Vendor check is not strictly needed , but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise .
*/
if ( boot_cpu_data . x86_vendor ! = X86_VENDOR_INTEL )
return 0 ;
if ( ! cpu_has_apic | | lapic_get_maxlvt ( ) < 6 )
return 0 ;
rdmsrl ( MSR_IA32_MCG_CAP , cap ) ;
* banks = min_t ( unsigned , MAX_NR_BANKS , cap & 0xff ) ;
return ! ! ( cap & MCG_CMCI_P ) ;
}
/*
* The interrupt handler . This is called on every event .
* Just call the poller directly to log any events .
* This could in theory increase the threshold under high load ,
* but doesn ' t for now .
*/
static void intel_threshold_interrupt ( void )
{
machine_check_poll ( MCP_TIMESTAMP , & __get_cpu_var ( mce_banks_owned ) ) ;
mce_notify_user ( ) ;
}
static void print_update ( char * type , int * hdr , int num )
{
if ( * hdr = = 0 )
printk ( KERN_INFO " CPU %d MCA banks " , smp_processor_id ( ) ) ;
* hdr = 1 ;
printk ( KERN_CONT " %s:%d " , type , num ) ;
}
/*
* Enable CMCI ( Corrected Machine Check Interrupt ) for available MCE banks
* on this CPU . Use the algorithm recommended in the SDM to discover shared
* banks .
*/
2009-02-24 13:19:02 -08:00
static void cmci_discover ( int banks , int boot )
2009-02-12 13:49:36 +01:00
{
unsigned long * owned = ( void * ) & __get_cpu_var ( mce_banks_owned ) ;
2009-05-08 17:28:40 +09:00
unsigned long flags ;
2009-02-12 13:49:36 +01:00
int hdr = 0 ;
int i ;
2009-05-08 17:28:40 +09:00
spin_lock_irqsave ( & cmci_discover_lock , flags ) ;
2009-02-12 13:49:36 +01:00
for ( i = 0 ; i < banks ; i + + ) {
u64 val ;
if ( test_bit ( i , owned ) )
continue ;
rdmsrl ( MSR_IA32_MC0_CTL2 + i , val ) ;
/* Already owned by someone else? */
if ( val & CMCI_EN ) {
if ( test_and_clear_bit ( i , owned ) | | boot )
print_update ( " SHD " , & hdr , i ) ;
__clear_bit ( i , __get_cpu_var ( mce_poll_banks ) ) ;
continue ;
}
val | = CMCI_EN | CMCI_THRESHOLD ;
wrmsrl ( MSR_IA32_MC0_CTL2 + i , val ) ;
rdmsrl ( MSR_IA32_MC0_CTL2 + i , val ) ;
/* Did the enable bit stick? -- the bank supports CMCI */
if ( val & CMCI_EN ) {
if ( ! test_and_set_bit ( i , owned ) | | boot )
print_update ( " CMCI " , & hdr , i ) ;
__clear_bit ( i , __get_cpu_var ( mce_poll_banks ) ) ;
} else {
WARN_ON ( ! test_bit ( i , __get_cpu_var ( mce_poll_banks ) ) ) ;
}
}
2009-05-08 17:28:40 +09:00
spin_unlock_irqrestore ( & cmci_discover_lock , flags ) ;
2009-02-12 13:49:36 +01:00
if ( hdr )
printk ( KERN_CONT " \n " ) ;
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks .
*/
2009-02-24 13:19:02 -08:00
void cmci_recheck ( void )
2009-02-12 13:49:36 +01:00
{
unsigned long flags ;
int banks ;
if ( ! mce_available ( & current_cpu_data ) | | ! cmci_supported ( & banks ) )
return ;
local_irq_save ( flags ) ;
machine_check_poll ( MCP_TIMESTAMP , & __get_cpu_var ( mce_banks_owned ) ) ;
local_irq_restore ( flags ) ;
}
/*
* Disable CMCI on this CPU for all banks it owns when it goes down .
* This allows other CPUs to claim the banks on rediscovery .
*/
2009-02-24 13:19:02 -08:00
void cmci_clear ( void )
2009-02-12 13:49:36 +01:00
{
2009-05-08 17:28:40 +09:00
unsigned long flags ;
2009-02-12 13:49:36 +01:00
int i ;
int banks ;
u64 val ;
if ( ! cmci_supported ( & banks ) )
return ;
2009-05-08 17:28:40 +09:00
spin_lock_irqsave ( & cmci_discover_lock , flags ) ;
2009-02-12 13:49:36 +01:00
for ( i = 0 ; i < banks ; i + + ) {
if ( ! test_bit ( i , __get_cpu_var ( mce_banks_owned ) ) )
continue ;
/* Disable CMCI */
rdmsrl ( MSR_IA32_MC0_CTL2 + i , val ) ;
val & = ~ ( CMCI_EN | CMCI_THRESHOLD_MASK ) ;
wrmsrl ( MSR_IA32_MC0_CTL2 + i , val ) ;
__clear_bit ( i , __get_cpu_var ( mce_banks_owned ) ) ;
}
2009-05-08 17:28:40 +09:00
spin_unlock_irqrestore ( & cmci_discover_lock , flags ) ;
2009-02-12 13:49:36 +01:00
}
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context .
*/
2009-02-24 13:19:02 -08:00
void cmci_rediscover ( int dying )
2009-02-12 13:49:36 +01:00
{
int banks ;
int cpu ;
cpumask_var_t old ;
if ( ! cmci_supported ( & banks ) )
return ;
if ( ! alloc_cpumask_var ( & old , GFP_KERNEL ) )
return ;
cpumask_copy ( old , & current - > cpus_allowed ) ;
2009-04-14 17:09:04 +09:00
for_each_online_cpu ( cpu ) {
2009-02-12 13:49:36 +01:00
if ( cpu = = dying )
continue ;
2009-03-13 14:49:54 +10:30
if ( set_cpus_allowed_ptr ( current , cpumask_of ( cpu ) ) )
2009-02-12 13:49:36 +01:00
continue ;
/* Recheck banks in case CPUs don't all have the same */
if ( cmci_supported ( & banks ) )
cmci_discover ( banks , 0 ) ;
}
set_cpus_allowed_ptr ( current , old ) ;
free_cpumask_var ( old ) ;
}
/*
* Reenable CMCI on this CPU in case a CPU down failed .
*/
void cmci_reenable ( void )
{
int banks ;
if ( cmci_supported ( & banks ) )
cmci_discover ( banks , 0 ) ;
}
2009-03-16 17:07:33 +09:00
static void intel_init_cmci ( void )
2009-02-12 13:49:36 +01:00
{
int banks ;
if ( ! cmci_supported ( & banks ) )
return ;
mce_threshold_vector = intel_threshold_interrupt ;
cmci_discover ( banks , 1 ) ;
/*
* For CPU # 0 this runs with still disabled APIC , but that ' s
* ok because only the vector is set up . We still do another
* check for the banks later for CPU # 0 just to make sure
* to not miss any events .
*/
apic_write ( APIC_LVTCMCI , THRESHOLD_APIC_VECTOR | APIC_DM_FIXED ) ;
cmci_recheck ( ) ;
}
2009-02-20 23:35:51 -08:00
void mce_intel_feature_init ( struct cpuinfo_x86 * c )
2005-04-16 15:20:36 -07:00
{
intel_init_thermal ( c ) ;
2009-02-12 13:49:36 +01:00
intel_init_cmci ( ) ;
2005-04-16 15:20:36 -07:00
}