2005-04-16 15:20:36 -07:00
/*
* Intel specific MCE features .
* Copyright 2004 Zwane Mwaikambo < zwane @ linuxpower . ca >
2009-02-12 13:49:36 +01:00
* Copyright ( C ) 2008 , 2009 Intel Corporation
* Author : Andi Kleen
2005-04-16 15:20:36 -07:00
*/
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
# include <linux/gfp.h>
2005-04-16 15:20:36 -07:00
# include <linux/interrupt.h>
# include <linux/percpu.h>
2009-10-07 17:09:06 +04:00
# include <linux/sched.h>
2014-03-27 21:24:36 -04:00
# include <linux/cpumask.h>
2009-06-17 08:31:15 -07:00
# include <asm/apic.h>
2005-04-16 15:20:36 -07:00
# include <asm/processor.h>
# include <asm/msr.h>
# include <asm/mce.h>
2012-08-09 11:44:51 -07:00
# include "mce-internal.h"
2009-02-12 13:49:36 +01:00
/*
* Support for Intel Correct Machine Check Interrupts . This allows
* the CPU to raise an interrupt when a corrected machine check happened .
* Normally we pick those up using a regular polling timer .
* Also supports reliable discovery of shared banks .
*/
2013-06-25 23:58:59 +05:30
/*
* CMCI can be delivered to multiple cpus that share a machine check bank
* so we need to designate a single cpu to process errors logged in each bank
* in the interrupt handler ( otherwise we would have many races and potential
* double reporting of the same error ) .
* Note that this can change when a cpu is offlined or brought online since
* some MCA banks are shared across cpus . When a cpu is offlined , cmci_clear ( )
* disables CMCI on all banks owned by the cpu and clears this bitfield . At
* this point , cmci_rediscover ( ) kicks in and a different cpu may end up
* taking ownership of some of the shared MCA banks that were previously
* owned by the offlined cpu .
*/
2009-02-12 13:49:36 +01:00
static DEFINE_PER_CPU ( mce_banks_t , mce_banks_owned ) ;
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
/*
* CMCI storm detection backoff counter
*
* During storm , we reset this counter to INITIAL_CHECK_INTERVAL in case we ' ve
* encountered an error . If not , we decrement it by one . We signal the end of
* the CMCI storm when it reaches 0.
*/
static DEFINE_PER_CPU ( int , cmci_backoff_cnt ) ;
2009-02-12 13:49:36 +01:00
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other .
*/
2014-08-05 22:57:19 +02:00
static DEFINE_RAW_SPINLOCK ( cmci_discover_lock ) ;
2009-02-12 13:49:36 +01:00
2012-08-09 11:44:51 -07:00
# define CMCI_THRESHOLD 1
# define CMCI_POLL_INTERVAL (30 * HZ)
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
# define CMCI_STORM_INTERVAL (HZ)
2012-08-09 11:44:51 -07:00
# define CMCI_STORM_THRESHOLD 15
static DEFINE_PER_CPU ( unsigned long , cmci_time_stamp ) ;
static DEFINE_PER_CPU ( unsigned int , cmci_storm_cnt ) ;
static DEFINE_PER_CPU ( unsigned int , cmci_storm_state ) ;
enum {
CMCI_STORM_NONE ,
CMCI_STORM_ACTIVE ,
CMCI_STORM_SUBSIDED ,
} ;
static atomic_t cmci_storm_on_cpus ;
2009-02-12 13:49:36 +01:00
2009-02-24 13:19:02 -08:00
static int cmci_supported ( int * banks )
2009-02-12 13:49:36 +01:00
{
u64 cap ;
2012-10-15 20:25:17 +02:00
if ( mca_cfg . cmci_disabled | | mca_cfg . ignore_ce )
2009-06-11 16:06:07 +09:00
return 0 ;
2009-02-12 13:49:36 +01:00
/*
* Vendor check is not strictly needed , but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise .
*/
if ( boot_cpu_data . x86_vendor ! = X86_VENDOR_INTEL )
return 0 ;
if ( ! cpu_has_apic | | lapic_get_maxlvt ( ) < 6 )
return 0 ;
rdmsrl ( MSR_IA32_MCG_CAP , cap ) ;
* banks = min_t ( unsigned , MAX_NR_BANKS , cap & 0xff ) ;
return ! ! ( cap & MCG_CMCI_P ) ;
}
2015-06-04 18:55:23 +02:00
static bool lmce_supported ( void )
{
u64 tmp ;
if ( mca_cfg . lmce_disabled )
return false ;
rdmsrl ( MSR_IA32_MCG_CAP , tmp ) ;
/*
* LMCE depends on recovery support in the processor . Hence both
* MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP .
*/
if ( ( tmp & ( MCG_SER_P | MCG_LMCE_P ) ) ! =
( MCG_SER_P | MCG_LMCE_P ) )
return false ;
/*
* BIOS should indicate support for LMCE by setting bit 20 in
* IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
* generate a # GP fault .
*/
rdmsrl ( MSR_IA32_FEATURE_CONTROL , tmp ) ;
if ( ( tmp & ( FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE ) ) = =
( FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE ) )
return true ;
return false ;
}
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
bool mce_intel_cmci_poll ( void )
2012-08-09 11:44:51 -07:00
{
if ( __this_cpu_read ( cmci_storm_state ) = = CMCI_STORM_NONE )
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
return false ;
/*
* Reset the counter if we ' ve logged an error in the last poll
* during the storm .
*/
if ( machine_check_poll ( MCP_TIMESTAMP , this_cpu_ptr ( & mce_banks_owned ) ) )
this_cpu_write ( cmci_backoff_cnt , INITIAL_CHECK_INTERVAL ) ;
else
this_cpu_dec ( cmci_backoff_cnt ) ;
return true ;
2012-08-09 11:44:51 -07:00
}
void mce_intel_hcpu_update ( unsigned long cpu )
{
if ( per_cpu ( cmci_storm_state , cpu ) = = CMCI_STORM_ACTIVE )
atomic_dec ( & cmci_storm_on_cpus ) ;
per_cpu ( cmci_storm_state , cpu ) = CMCI_STORM_NONE ;
}
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
unsigned long cmci_intel_adjust_timer ( unsigned long interval )
2012-08-09 11:44:51 -07:00
{
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
if ( ( this_cpu_read ( cmci_backoff_cnt ) > 0 ) & &
( __this_cpu_read ( cmci_storm_state ) = = CMCI_STORM_ACTIVE ) ) {
mce_notify_irq ( ) ;
return CMCI_STORM_INTERVAL ;
}
2012-08-09 11:44:51 -07:00
switch ( __this_cpu_read ( cmci_storm_state ) ) {
case CMCI_STORM_ACTIVE :
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
2012-08-09 11:44:51 -07:00
/*
* We switch back to interrupt mode once the poll timer has
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
* silenced itself . That means no events recorded and the timer
* interval is back to our poll interval .
2012-08-09 11:44:51 -07:00
*/
__this_cpu_write ( cmci_storm_state , CMCI_STORM_SUBSIDED ) ;
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
if ( ! atomic_sub_return ( 1 , & cmci_storm_on_cpus ) )
2012-08-09 11:44:51 -07:00
pr_notice ( " CMCI storm subsided: switching to interrupt mode \n " ) ;
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
2012-08-09 11:44:51 -07:00
/* FALLTHROUGH */
case CMCI_STORM_SUBSIDED :
/*
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
* We wait for all CPUs to go back to SUBSIDED state . When that
* happens we switch back to interrupt mode .
2012-08-09 11:44:51 -07:00
*/
if ( ! atomic_read ( & cmci_storm_on_cpus ) ) {
__this_cpu_write ( cmci_storm_state , CMCI_STORM_NONE ) ;
cmci_reenable ( ) ;
cmci_recheck ( ) ;
}
return CMCI_POLL_INTERVAL ;
default :
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
/* We have shiny weather. Let the poll do whatever it thinks. */
2012-08-09 11:44:51 -07:00
return interval ;
}
}
2014-03-27 21:24:36 -04:00
static void cmci_storm_disable_banks ( void )
{
unsigned long flags , * owned ;
int bank ;
u64 val ;
2014-08-05 22:57:19 +02:00
raw_spin_lock_irqsave ( & cmci_discover_lock , flags ) ;
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
owned = this_cpu_ptr ( mce_banks_owned ) ;
2014-03-27 21:24:36 -04:00
for_each_set_bit ( bank , owned , MAX_NR_BANKS ) {
rdmsrl ( MSR_IA32_MCx_CTL2 ( bank ) , val ) ;
val & = ~ MCI_CTL2_CMCI_EN ;
wrmsrl ( MSR_IA32_MCx_CTL2 ( bank ) , val ) ;
}
2014-08-05 22:57:19 +02:00
raw_spin_unlock_irqrestore ( & cmci_discover_lock , flags ) ;
2014-03-27 21:24:36 -04:00
}
2012-08-09 11:44:51 -07:00
static bool cmci_storm_detect ( void )
{
unsigned int cnt = __this_cpu_read ( cmci_storm_cnt ) ;
unsigned long ts = __this_cpu_read ( cmci_time_stamp ) ;
unsigned long now = jiffies ;
int r ;
if ( __this_cpu_read ( cmci_storm_state ) ! = CMCI_STORM_NONE )
return true ;
if ( time_before_eq ( now , ts + CMCI_STORM_INTERVAL ) ) {
cnt + + ;
} else {
cnt = 1 ;
__this_cpu_write ( cmci_time_stamp , now ) ;
}
__this_cpu_write ( cmci_storm_cnt , cnt ) ;
if ( cnt < = CMCI_STORM_THRESHOLD )
return false ;
2014-03-27 21:24:36 -04:00
cmci_storm_disable_banks ( ) ;
2012-08-09 11:44:51 -07:00
__this_cpu_write ( cmci_storm_state , CMCI_STORM_ACTIVE ) ;
r = atomic_add_return ( 1 , & cmci_storm_on_cpus ) ;
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
mce_timer_kick ( CMCI_STORM_INTERVAL ) ;
this_cpu_write ( cmci_backoff_cnt , INITIAL_CHECK_INTERVAL ) ;
2012-08-09 11:44:51 -07:00
if ( r = = 1 )
pr_notice ( " CMCI storm detected: switching to poll mode \n " ) ;
return true ;
}
2009-02-12 13:49:36 +01:00
/*
* The interrupt handler . This is called on every event .
* Just call the poller directly to log any events .
* This could in theory increase the threshold under high load ,
* but doesn ' t for now .
*/
static void intel_threshold_interrupt ( void )
{
2012-08-09 11:44:51 -07:00
if ( cmci_storm_detect ( ) )
return ;
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
machine_check_poll ( MCP_TIMESTAMP , this_cpu_ptr ( & mce_banks_owned ) ) ;
2009-02-12 13:49:36 +01:00
}
/*
* Enable CMCI ( Corrected Machine Check Interrupt ) for available MCE banks
* on this CPU . Use the algorithm recommended in the SDM to discover shared
* banks .
*/
2012-08-09 10:59:21 -07:00
static void cmci_discover ( int banks )
2009-02-12 13:49:36 +01:00
{
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
unsigned long * owned = ( void * ) this_cpu_ptr ( & mce_banks_owned ) ;
2009-05-08 17:28:40 +09:00
unsigned long flags ;
2009-02-12 13:49:36 +01:00
int i ;
2012-09-27 10:08:00 -07:00
int bios_wrong_thresh = 0 ;
2009-02-12 13:49:36 +01:00
2014-08-05 22:57:19 +02:00
raw_spin_lock_irqsave ( & cmci_discover_lock , flags ) ;
2009-02-12 13:49:36 +01:00
for ( i = 0 ; i < banks ; i + + ) {
u64 val ;
2012-09-27 10:08:00 -07:00
int bios_zero_thresh = 0 ;
2009-02-12 13:49:36 +01:00
if ( test_bit ( i , owned ) )
continue ;
2013-07-01 21:08:47 +05:30
/* Skip banks in firmware first mode */
if ( test_bit ( i , mce_banks_ce_disabled ) )
continue ;
2009-07-09 00:31:44 +02:00
rdmsrl ( MSR_IA32_MCx_CTL2 ( i ) , val ) ;
2009-02-12 13:49:36 +01:00
/* Already owned by someone else? */
2010-06-08 14:09:08 +08:00
if ( val & MCI_CTL2_CMCI_EN ) {
2012-08-09 10:59:21 -07:00
clear_bit ( i , owned ) ;
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
__clear_bit ( i , this_cpu_ptr ( mce_poll_banks ) ) ;
2009-02-12 13:49:36 +01:00
continue ;
}
2012-10-17 12:05:33 +02:00
if ( ! mca_cfg . bios_cmci_threshold ) {
2012-09-27 10:08:00 -07:00
val & = ~ MCI_CTL2_CMCI_THRESHOLD_MASK ;
val | = CMCI_THRESHOLD ;
} else if ( ! ( val & MCI_CTL2_CMCI_THRESHOLD_MASK ) ) {
/*
* If bios_cmci_threshold boot option was specified
* but the threshold is zero , we ' ll try to initialize
* it to 1.
*/
bios_zero_thresh = 1 ;
val | = CMCI_THRESHOLD ;
}
val | = MCI_CTL2_CMCI_EN ;
2009-07-09 00:31:44 +02:00
wrmsrl ( MSR_IA32_MCx_CTL2 ( i ) , val ) ;
rdmsrl ( MSR_IA32_MCx_CTL2 ( i ) , val ) ;
2009-02-12 13:49:36 +01:00
/* Did the enable bit stick? -- the bank supports CMCI */
2010-06-08 14:09:08 +08:00
if ( val & MCI_CTL2_CMCI_EN ) {
2012-08-09 10:59:21 -07:00
set_bit ( i , owned ) ;
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
__clear_bit ( i , this_cpu_ptr ( mce_poll_banks ) ) ;
2012-09-27 10:08:00 -07:00
/*
* We are able to set thresholds for some banks that
* had a threshold of 0. This means the BIOS has not
* set the thresholds properly or does not work with
* this boot option . Note down now and report later .
*/
2012-10-17 12:05:33 +02:00
if ( mca_cfg . bios_cmci_threshold & & bios_zero_thresh & &
2012-09-27 10:08:00 -07:00
( val & MCI_CTL2_CMCI_THRESHOLD_MASK ) )
bios_wrong_thresh = 1 ;
2009-02-12 13:49:36 +01:00
} else {
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
WARN_ON ( ! test_bit ( i , this_cpu_ptr ( mce_poll_banks ) ) ) ;
2009-02-12 13:49:36 +01:00
}
}
2014-08-05 22:57:19 +02:00
raw_spin_unlock_irqrestore ( & cmci_discover_lock , flags ) ;
2012-10-17 12:05:33 +02:00
if ( mca_cfg . bios_cmci_threshold & & bios_wrong_thresh ) {
2012-09-27 10:08:00 -07:00
pr_info_once (
" bios_cmci_threshold: Some banks do not have valid thresholds set \n " ) ;
pr_info_once (
" bios_cmci_threshold: Make sure your BIOS supports this boot option \n " ) ;
}
2009-02-12 13:49:36 +01:00
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks .
*/
2009-02-24 13:19:02 -08:00
void cmci_recheck ( void )
2009-02-12 13:49:36 +01:00
{
unsigned long flags ;
int banks ;
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
if ( ! mce_available ( raw_cpu_ptr ( & cpu_info ) ) | | ! cmci_supported ( & banks ) )
2009-02-12 13:49:36 +01:00
return ;
x86/MCE/intel: Cleanup CMCI storm logic
Initially, this started with the yet another report about a race
condition in the CMCI storm adaptive period length thing. Yes, we have
to admit, it is fragile and error prone. So let's simplify it.
The simpler logic is: now, after we enter storm mode, we go straight to
polling with CMCI_STORM_INTERVAL, i.e. once a second. We remain in storm
mode as long as we see errors being logged while polling.
Theoretically, if we see an uninterrupted error stream, we will remain
in storm mode indefinitely and keep polling the MSRs.
However, when the storm is actually a burst of errors, once we have
logged them all, we back out of it after ~5 mins of polling and no more
errors logged.
If we encounter an error during those 5 minutes, we reset the polling
interval to 5 mins.
Making machine_check_poll() return a bool and denoting whether it has
seen an error or not lets us simplify a bunch of code and move the storm
handling private to mce_intel.c.
Some minor cleanups while at it.
Reported-by: Calvin Owens <calvinowens@fb.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1417746575-23299-1-git-send-email-calvinowens@fb.com
Signed-off-by: Borislav Petkov <bp@suse.de>
2015-01-13 15:08:51 +01:00
2009-02-12 13:49:36 +01:00
local_irq_save ( flags ) ;
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
machine_check_poll ( MCP_TIMESTAMP , this_cpu_ptr ( & mce_banks_owned ) ) ;
2009-02-12 13:49:36 +01:00
local_irq_restore ( flags ) ;
}
2013-07-01 21:08:47 +05:30
/* Caller must hold the lock on cmci_discover_lock */
static void __cmci_disable_bank ( int bank )
{
u64 val ;
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
if ( ! test_bit ( bank , this_cpu_ptr ( mce_banks_owned ) ) )
2013-07-01 21:08:47 +05:30
return ;
rdmsrl ( MSR_IA32_MCx_CTL2 ( bank ) , val ) ;
val & = ~ MCI_CTL2_CMCI_EN ;
wrmsrl ( MSR_IA32_MCx_CTL2 ( bank ) , val ) ;
x86: Replace __get_cpu_var uses
__get_cpu_var() is used for multiple purposes in the kernel source. One of
them is address calculation via the form &__get_cpu_var(x). This calculates
the address for the instance of the percpu variable of the current processor
based on an offset.
Other use cases are for storing and retrieving data from the current
processors percpu area. __get_cpu_var() can be used as an lvalue when
writing data or on the right side of an assignment.
__get_cpu_var() is defined as :
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
__get_cpu_var() always only does an address determination. However, store
and retrieve operations could use a segment prefix (or global register on
other platforms) to avoid the address calculation.
this_cpu_write() and this_cpu_read() can directly take an offset into a
percpu area and use optimized assembly code to read and write per cpu
variables.
This patch converts __get_cpu_var into either an explicit address
calculation using this_cpu_ptr() or into a use of this_cpu operations that
use the offset. Thereby address calculations are avoided and less registers
are used when code is generated.
Transformations done to __get_cpu_var()
1. Determine the address of the percpu instance of the current processor.
DEFINE_PER_CPU(int, y);
int *x = &__get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(&y);
2. Same as #1 but this time an array structure is involved.
DEFINE_PER_CPU(int, y[20]);
int *x = __get_cpu_var(y);
Converts to
int *x = this_cpu_ptr(y);
3. Retrieve the content of the current processors instance of a per cpu
variable.
DEFINE_PER_CPU(int, y);
int x = __get_cpu_var(y)
Converts to
int x = __this_cpu_read(y);
4. Retrieve the content of a percpu struct
DEFINE_PER_CPU(struct mystruct, y);
struct mystruct x = __get_cpu_var(y);
Converts to
memcpy(&x, this_cpu_ptr(&y), sizeof(x));
5. Assignment to a per cpu variable
DEFINE_PER_CPU(int, y)
__get_cpu_var(y) = x;
Converts to
__this_cpu_write(y, x);
6. Increment/Decrement etc of a per cpu variable
DEFINE_PER_CPU(int, y);
__get_cpu_var(y)++
Converts to
__this_cpu_inc(y)
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
2014-08-17 12:30:40 -05:00
__clear_bit ( bank , this_cpu_ptr ( mce_banks_owned ) ) ;
2013-07-01 21:08:47 +05:30
}
2009-02-12 13:49:36 +01:00
/*
* Disable CMCI on this CPU for all banks it owns when it goes down .
* This allows other CPUs to claim the banks on rediscovery .
*/
2009-02-24 13:19:02 -08:00
void cmci_clear ( void )
2009-02-12 13:49:36 +01:00
{
2009-05-08 17:28:40 +09:00
unsigned long flags ;
2009-02-12 13:49:36 +01:00
int i ;
int banks ;
if ( ! cmci_supported ( & banks ) )
return ;
2014-08-05 22:57:19 +02:00
raw_spin_lock_irqsave ( & cmci_discover_lock , flags ) ;
2013-07-01 21:08:47 +05:30
for ( i = 0 ; i < banks ; i + + )
__cmci_disable_bank ( i ) ;
2014-08-05 22:57:19 +02:00
raw_spin_unlock_irqrestore ( & cmci_discover_lock , flags ) ;
2009-02-12 13:49:36 +01:00
}
2013-03-20 15:31:29 +05:30
static void cmci_rediscover_work_func ( void * arg )
2012-10-29 11:01:50 +08:00
{
int banks ;
/* Recheck banks in case CPUs don't all have the same */
if ( cmci_supported ( & banks ) )
cmci_discover ( banks ) ;
}
2013-03-20 15:31:29 +05:30
/* After a CPU went down cycle through all the others and rediscover */
void cmci_rediscover ( void )
2009-02-12 13:49:36 +01:00
{
2013-03-20 15:31:29 +05:30
int banks ;
2009-02-12 13:49:36 +01:00
if ( ! cmci_supported ( & banks ) )
return ;
2013-03-20 15:31:29 +05:30
on_each_cpu ( cmci_rediscover_work_func , NULL , 1 ) ;
2009-02-12 13:49:36 +01:00
}
/*
* Reenable CMCI on this CPU in case a CPU down failed .
*/
void cmci_reenable ( void )
{
int banks ;
if ( cmci_supported ( & banks ) )
2012-08-09 10:59:21 -07:00
cmci_discover ( banks ) ;
2009-02-12 13:49:36 +01:00
}
2013-07-01 21:08:47 +05:30
void cmci_disable_bank ( int bank )
{
int banks ;
unsigned long flags ;
if ( ! cmci_supported ( & banks ) )
return ;
2014-08-05 22:57:19 +02:00
raw_spin_lock_irqsave ( & cmci_discover_lock , flags ) ;
2013-07-01 21:08:47 +05:30
__cmci_disable_bank ( bank ) ;
2014-08-05 22:57:19 +02:00
raw_spin_unlock_irqrestore ( & cmci_discover_lock , flags ) ;
2013-07-01 21:08:47 +05:30
}
2009-03-16 17:07:33 +09:00
static void intel_init_cmci ( void )
2009-02-12 13:49:36 +01:00
{
int banks ;
if ( ! cmci_supported ( & banks ) )
return ;
mce_threshold_vector = intel_threshold_interrupt ;
2012-08-09 10:59:21 -07:00
cmci_discover ( banks ) ;
2009-02-12 13:49:36 +01:00
/*
* For CPU # 0 this runs with still disabled APIC , but that ' s
* ok because only the vector is set up . We still do another
* check for the banks later for CPU # 0 just to make sure
* to not miss any events .
*/
apic_write ( APIC_LVTCMCI , THRESHOLD_APIC_VECTOR | APIC_DM_FIXED ) ;
cmci_recheck ( ) ;
}
2015-08-12 18:29:40 +02:00
static void intel_init_lmce ( void )
2015-06-04 18:55:23 +02:00
{
u64 val ;
if ( ! lmce_supported ( ) )
return ;
rdmsrl ( MSR_IA32_MCG_EXT_CTL , val ) ;
if ( ! ( val & MCG_EXT_CTL_LMCE_EN ) )
wrmsrl ( MSR_IA32_MCG_EXT_CTL , val | MCG_EXT_CTL_LMCE_EN ) ;
}
2015-08-12 18:29:40 +02:00
static void intel_clear_lmce ( void )
{
u64 val ;
if ( ! lmce_supported ( ) )
return ;
rdmsrl ( MSR_IA32_MCG_EXT_CTL , val ) ;
val & = ~ MCG_EXT_CTL_LMCE_EN ;
wrmsrl ( MSR_IA32_MCG_EXT_CTL , val ) ;
}
2009-02-20 23:35:51 -08:00
void mce_intel_feature_init ( struct cpuinfo_x86 * c )
2005-04-16 15:20:36 -07:00
{
intel_init_thermal ( c ) ;
2009-02-12 13:49:36 +01:00
intel_init_cmci ( ) ;
2015-06-04 18:55:24 +02:00
intel_init_lmce ( ) ;
2005-04-16 15:20:36 -07:00
}
2015-08-12 18:29:40 +02:00
void mce_intel_feature_clear ( struct cpuinfo_x86 * c )
{
intel_clear_lmce ( ) ;
}