Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (80 commits) x86, mce: Add boot options for corrected errors x86, mce: Fix mce printing x86, mce: fix for mce counters x86, mce: support action-optional machine checks x86, mce: define MCE_VECTOR x86, mce: rename mce_notify_user to mce_notify_irq x86: fix panic with interrupts off (needed for MCE) x86, mce: export MCE severities coverage via debugfs x86, mce: implement new status bits x86, mce: print header/footer only once for multiple MCEs x86, mce: default to panic timeout for machine checks x86, mce: improve mce_get_rip x86, mce: make non Monarch panic message "Fatal machine check" too x86, mce: switch x86 machine check handler to Monarch election. x86, mce: implement panic synchronization x86, mce: implement bootstrapping for machine check wakeups x86, mce: check early in exception handler if panic is needed x86, mce: add table driven machine check grading x86, mce: remove TSC print heuristic x86, mce: log corrected errors when panicing ...
This commit is contained in:
commit
a2ee2981ae
@ -48,6 +48,7 @@ o procps 3.2.0 # ps --version
|
||||
o oprofile 0.9 # oprofiled --version
|
||||
o udev 081 # udevinfo -V
|
||||
o grub 0.93 # grub --version
|
||||
o mcelog 0.6
|
||||
|
||||
Kernel compilation
|
||||
==================
|
||||
@ -276,6 +277,16 @@ before running exportfs or mountd. It is recommended that all NFS
|
||||
services be protected from the internet-at-large by a firewall where
|
||||
that is possible.
|
||||
|
||||
mcelog
|
||||
------
|
||||
|
||||
In Linux 2.6.31+ the i386 kernel needs to run the mcelog utility
|
||||
as a regular cronjob similar to the x86-64 kernel to process and log
|
||||
machine check events when CONFIG_X86_NEW_MCE is enabled. Machine check
|
||||
events are errors reported by the CPU. Processing them is strongly encouraged.
|
||||
All x86-64 kernels since 2.6.4 require the mcelog utility to
|
||||
process machine checks.
|
||||
|
||||
Getting updated software
|
||||
========================
|
||||
|
||||
@ -365,6 +376,10 @@ FUSE
|
||||
----
|
||||
o <http://sourceforge.net/projects/fuse>
|
||||
|
||||
mcelog
|
||||
------
|
||||
o <ftp://ftp.kernel.org/pub/linux/utils/cpu/mce/mcelog/>
|
||||
|
||||
Networking
|
||||
**********
|
||||
|
||||
|
@ -437,3 +437,13 @@ Why: Superseded by tdfxfb. I2C/DDC support used to live in a separate
|
||||
driver but this caused driver conflicts.
|
||||
Who: Jean Delvare <khali@linux-fr.org>
|
||||
Krzysztof Helt <krzysztof.h1@wp.pl>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: CONFIG_X86_OLD_MCE
|
||||
When: 2.6.32
|
||||
Why: Remove the old legacy 32bit machine check code. This has been
|
||||
superseded by the newer machine check code from the 64bit port,
|
||||
but the old version has been kept around for easier testing. Note this
|
||||
doesn't impact the old P5 and WinChip machine check handlers.
|
||||
Who: Andi Kleen <andi@firstfloor.org>
|
||||
|
@ -5,21 +5,51 @@ only the AMD64 specific ones are listed here.
|
||||
|
||||
Machine check
|
||||
|
||||
mce=off disable machine check
|
||||
mce=bootlog Enable logging of machine checks left over from booting.
|
||||
Disabled by default on AMD because some BIOS leave bogus ones.
|
||||
If your BIOS doesn't do that it's a good idea to enable though
|
||||
to make sure you log even machine check events that result
|
||||
in a reboot. On Intel systems it is enabled by default.
|
||||
Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
|
||||
|
||||
mce=off
|
||||
Disable machine check
|
||||
mce=no_cmci
|
||||
Disable CMCI(Corrected Machine Check Interrupt) that
|
||||
Intel processor supports. Usually this disablement is
|
||||
not recommended, but it might be handy if your hardware
|
||||
is misbehaving.
|
||||
Note that you'll get more problems without CMCI than with
|
||||
due to the shared banks, i.e. you might get duplicated
|
||||
error logs.
|
||||
mce=dont_log_ce
|
||||
Don't make logs for corrected errors. All events reported
|
||||
as corrected are silently cleared by OS.
|
||||
This option will be useful if you have no interest in any
|
||||
of corrected errors.
|
||||
mce=ignore_ce
|
||||
Disable features for corrected errors, e.g. polling timer
|
||||
and CMCI. All events reported as corrected are not cleared
|
||||
by OS and remained in its error banks.
|
||||
Usually this disablement is not recommended, however if
|
||||
there is an agent checking/clearing corrected errors
|
||||
(e.g. BIOS or hardware monitoring applications), conflicting
|
||||
with OS's error handling, and you cannot deactivate the agent,
|
||||
then this option will be a help.
|
||||
mce=bootlog
|
||||
Enable logging of machine checks left over from booting.
|
||||
Disabled by default on AMD because some BIOS leave bogus ones.
|
||||
If your BIOS doesn't do that it's a good idea to enable though
|
||||
to make sure you log even machine check events that result
|
||||
in a reboot. On Intel systems it is enabled by default.
|
||||
mce=nobootlog
|
||||
Disable boot machine check logging.
|
||||
mce=tolerancelevel (number)
|
||||
mce=tolerancelevel[,monarchtimeout] (number,number)
|
||||
tolerance levels:
|
||||
0: always panic on uncorrected errors, log corrected errors
|
||||
1: panic or SIGBUS on uncorrected errors, log corrected errors
|
||||
2: SIGBUS or log uncorrected errors, log corrected errors
|
||||
3: never panic or SIGBUS, log all errors (for testing only)
|
||||
Default is 1
|
||||
Can be also set using sysfs which is preferable.
|
||||
monarchtimeout:
|
||||
Sets the time in us to wait for other CPUs on machine checks. 0
|
||||
to disable.
|
||||
|
||||
nomce (for compatibility with i386): same as mce=off
|
||||
|
||||
|
@ -41,7 +41,9 @@ check_interval
|
||||
the polling interval. When the poller stops finding MCEs, it
|
||||
triggers an exponential backoff (poll less often) on the polling
|
||||
interval. The check_interval variable is both the initial and
|
||||
maximum polling interval.
|
||||
maximum polling interval. 0 means no polling for corrected machine
|
||||
check errors (but some corrected errors might be still reported
|
||||
in other ways)
|
||||
|
||||
tolerant
|
||||
Tolerance level. When a machine check exception occurs for a non
|
||||
@ -67,6 +69,10 @@ trigger
|
||||
Program to run when a machine check event is detected.
|
||||
This is an alternative to running mcelog regularly from cron
|
||||
and allows to detect events faster.
|
||||
monarch_timeout
|
||||
How long to wait for the other CPUs to machine check too on a
|
||||
exception. 0 to disable waiting for other CPUs.
|
||||
Unit: us
|
||||
|
||||
TBD document entries for AMD threshold interrupt configuration
|
||||
|
||||
|
@ -789,10 +789,26 @@ config X86_MCE
|
||||
to disable it. MCE support simply ignores non-MCE processors like
|
||||
the 386 and 486, so nearly everyone can say Y here.
|
||||
|
||||
config X86_OLD_MCE
|
||||
depends on X86_32 && X86_MCE
|
||||
bool "Use legacy machine check code (will go away)"
|
||||
default n
|
||||
select X86_ANCIENT_MCE
|
||||
---help---
|
||||
Use the old i386 machine check code. This is merely intended for
|
||||
testing in a transition period. Try this if you run into any machine
|
||||
check related software problems, but report the problem to
|
||||
linux-kernel. When in doubt say no.
|
||||
|
||||
config X86_NEW_MCE
|
||||
depends on X86_MCE
|
||||
bool
|
||||
default y if (!X86_OLD_MCE && X86_32) || X86_64
|
||||
|
||||
config X86_MCE_INTEL
|
||||
def_bool y
|
||||
prompt "Intel MCE features"
|
||||
depends on X86_64 && X86_MCE && X86_LOCAL_APIC
|
||||
depends on X86_NEW_MCE && X86_LOCAL_APIC
|
||||
---help---
|
||||
Additional support for intel specific MCE features such as
|
||||
the thermal monitor.
|
||||
@ -800,19 +816,36 @@ config X86_MCE_INTEL
|
||||
config X86_MCE_AMD
|
||||
def_bool y
|
||||
prompt "AMD MCE features"
|
||||
depends on X86_64 && X86_MCE && X86_LOCAL_APIC
|
||||
depends on X86_NEW_MCE && X86_LOCAL_APIC
|
||||
---help---
|
||||
Additional support for AMD specific MCE features such as
|
||||
the DRAM Error Threshold.
|
||||
|
||||
config X86_ANCIENT_MCE
|
||||
def_bool n
|
||||
depends on X86_32
|
||||
prompt "Support for old Pentium 5 / WinChip machine checks"
|
||||
---help---
|
||||
Include support for machine check handling on old Pentium 5 or WinChip
|
||||
systems. These typically need to be enabled explicitely on the command
|
||||
line.
|
||||
|
||||
config X86_MCE_THRESHOLD
|
||||
depends on X86_MCE_AMD || X86_MCE_INTEL
|
||||
bool
|
||||
default y
|
||||
|
||||
config X86_MCE_INJECT
|
||||
depends on X86_NEW_MCE
|
||||
tristate "Machine check injector support"
|
||||
---help---
|
||||
Provide support for injecting machine checks for testing purposes.
|
||||
If you don't know what a machine check is and you don't do kernel
|
||||
QA it is safe to say n.
|
||||
|
||||
config X86_MCE_NONFATAL
|
||||
tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
|
||||
depends on X86_32 && X86_MCE
|
||||
depends on X86_OLD_MCE
|
||||
---help---
|
||||
Enabling this feature starts a timer that triggers every 5 seconds which
|
||||
will look at the machine check registers to see if anything happened.
|
||||
@ -825,11 +858,15 @@ config X86_MCE_NONFATAL
|
||||
|
||||
config X86_MCE_P4THERMAL
|
||||
bool "check for P4 thermal throttling interrupt."
|
||||
depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
|
||||
depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
|
||||
---help---
|
||||
Enabling this feature will cause a message to be printed when the P4
|
||||
enters thermal throttling.
|
||||
|
||||
config X86_THERMAL_VECTOR
|
||||
def_bool y
|
||||
depends on X86_MCE_P4THERMAL || X86_MCE_INTEL
|
||||
|
||||
config VM86
|
||||
bool "Enable VM86 support" if EMBEDDED
|
||||
default y
|
||||
|
@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
|
||||
BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
|
||||
BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
|
||||
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
|
||||
BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
|
||||
|
||||
BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
|
||||
smp_invalidate_interrupt)
|
||||
@ -52,8 +53,16 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
|
||||
BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MCE_P4THERMAL
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -22,7 +22,7 @@ typedef struct {
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
unsigned int irq_thermal_count;
|
||||
# ifdef CONFIG_X86_64
|
||||
# ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
unsigned int irq_threshold_count;
|
||||
# endif
|
||||
#endif
|
||||
|
@ -34,6 +34,7 @@ extern void perf_pending_interrupt(void);
|
||||
extern void spurious_interrupt(void);
|
||||
extern void thermal_interrupt(void);
|
||||
extern void reschedule_interrupt(void);
|
||||
extern void mce_self_interrupt(void);
|
||||
|
||||
extern void invalidate_interrupt(void);
|
||||
extern void invalidate_interrupt0(void);
|
||||
@ -46,6 +47,7 @@ extern void invalidate_interrupt6(void);
|
||||
extern void invalidate_interrupt7(void);
|
||||
|
||||
extern void irq_move_cleanup_interrupt(void);
|
||||
extern void reboot_interrupt(void);
|
||||
extern void threshold_interrupt(void);
|
||||
|
||||
extern void call_function_interrupt(void);
|
||||
|
@ -25,6 +25,7 @@
|
||||
*/
|
||||
|
||||
#define NMI_VECTOR 0x02
|
||||
#define MCE_VECTOR 0x12
|
||||
|
||||
/*
|
||||
* IDT vectors usable for external interrupt sources start
|
||||
@ -87,13 +88,8 @@
|
||||
#define CALL_FUNCTION_VECTOR 0xfc
|
||||
#define CALL_FUNCTION_SINGLE_VECTOR 0xfb
|
||||
#define THERMAL_APIC_VECTOR 0xfa
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* 0xf8 - 0xf9 : free */
|
||||
#else
|
||||
# define THRESHOLD_APIC_VECTOR 0xf9
|
||||
# define UV_BAU_MESSAGE 0xf8
|
||||
#endif
|
||||
#define THRESHOLD_APIC_VECTOR 0xf9
|
||||
#define REBOOT_VECTOR 0xf8
|
||||
|
||||
/* f0-f7 used for spreading out TLB flushes: */
|
||||
#define INVALIDATE_TLB_VECTOR_END 0xf7
|
||||
@ -117,6 +113,13 @@
|
||||
*/
|
||||
#define LOCAL_PENDING_VECTOR 0xec
|
||||
|
||||
#define UV_BAU_MESSAGE 0xec
|
||||
|
||||
/*
|
||||
* Self IPI vector for machine checks
|
||||
*/
|
||||
#define MCE_SELF_VECTOR 0xeb
|
||||
|
||||
/*
|
||||
* First APIC vector available to drivers: (vectors 0x30-0xee) we
|
||||
* start at 0x31(0x41) to spread out vectors evenly between priority
|
||||
|
@ -1,8 +1,6 @@
|
||||
#ifndef _ASM_X86_MCE_H
|
||||
#define _ASM_X86_MCE_H
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <asm/ioctls.h>
|
||||
|
||||
@ -10,21 +8,35 @@
|
||||
* Machine Check support for x86
|
||||
*/
|
||||
|
||||
#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
|
||||
#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
|
||||
#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
|
||||
#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
|
||||
#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */
|
||||
#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
|
||||
#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
|
||||
#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
|
||||
#define MCG_EXT_CNT_SHIFT 16
|
||||
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
|
||||
#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
|
||||
|
||||
#define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */
|
||||
#define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */
|
||||
#define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */
|
||||
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
|
||||
#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
|
||||
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
|
||||
|
||||
#define MCI_STATUS_VAL (1UL<<63) /* valid error */
|
||||
#define MCI_STATUS_OVER (1UL<<62) /* previous errors lost */
|
||||
#define MCI_STATUS_UC (1UL<<61) /* uncorrected error */
|
||||
#define MCI_STATUS_EN (1UL<<60) /* error enabled */
|
||||
#define MCI_STATUS_MISCV (1UL<<59) /* misc error reg. valid */
|
||||
#define MCI_STATUS_ADDRV (1UL<<58) /* addr reg. valid */
|
||||
#define MCI_STATUS_PCC (1UL<<57) /* processor context corrupt */
|
||||
#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
|
||||
#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
|
||||
#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
|
||||
#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
|
||||
#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
|
||||
#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
|
||||
#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
|
||||
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
|
||||
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
|
||||
|
||||
/* MISC register defines */
|
||||
#define MCM_ADDR_SEGOFF 0 /* segment offset */
|
||||
#define MCM_ADDR_LINEAR 1 /* linear address */
|
||||
#define MCM_ADDR_PHYS 2 /* physical address */
|
||||
#define MCM_ADDR_MEM 3 /* memory address */
|
||||
#define MCM_ADDR_GENERIC 7 /* generic */
|
||||
|
||||
/* Fields are zero when not available */
|
||||
struct mce {
|
||||
@ -34,13 +46,19 @@ struct mce {
|
||||
__u64 mcgstatus;
|
||||
__u64 ip;
|
||||
__u64 tsc; /* cpu time stamp counter */
|
||||
__u64 res1; /* for future extension */
|
||||
__u64 res2; /* dito. */
|
||||
__u64 time; /* wall time_t when error was detected */
|
||||
__u8 cpuvendor; /* cpu vendor as encoded in system.h */
|
||||
__u8 pad1;
|
||||
__u16 pad2;
|
||||
__u32 cpuid; /* CPUID 1 EAX */
|
||||
__u8 cs; /* code segment */
|
||||
__u8 bank; /* machine check bank */
|
||||
__u8 cpu; /* cpu that raised the error */
|
||||
__u8 cpu; /* cpu number; obsolete; use extcpu now */
|
||||
__u8 finished; /* entry is valid */
|
||||
__u32 pad;
|
||||
__u32 extcpu; /* linux cpu number that detected the error */
|
||||
__u32 socketid; /* CPU socket ID */
|
||||
__u32 apicid; /* CPU initial apic ID */
|
||||
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -57,7 +75,7 @@ struct mce_log {
|
||||
unsigned len; /* = MCE_LOG_LEN */
|
||||
unsigned next;
|
||||
unsigned flags;
|
||||
unsigned pad0;
|
||||
unsigned recordlen; /* length of struct mce */
|
||||
struct mce entry[MCE_LOG_LEN];
|
||||
};
|
||||
|
||||
@ -82,19 +100,16 @@ struct mce_log {
|
||||
#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
|
||||
#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
extern int mce_disabled;
|
||||
#else /* CONFIG_X86_32 */
|
||||
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/percpu.h>
|
||||
|
||||
void mce_setup(struct mce *m);
|
||||
void mce_log(struct mce *m);
|
||||
DECLARE_PER_CPU(struct sys_device, device_mce);
|
||||
DECLARE_PER_CPU(struct sys_device, mce_dev);
|
||||
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
|
||||
|
||||
/*
|
||||
@ -104,6 +119,8 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
|
||||
#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
extern int mce_cmci_disabled;
|
||||
extern int mce_ignore_ce;
|
||||
void mce_intel_feature_init(struct cpuinfo_x86 *c);
|
||||
void cmci_clear(void);
|
||||
void cmci_reenable(void);
|
||||
@ -123,13 +140,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
|
||||
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
|
||||
#endif
|
||||
|
||||
extern int mce_available(struct cpuinfo_x86 *c);
|
||||
int mce_available(struct cpuinfo_x86 *c);
|
||||
|
||||
DECLARE_PER_CPU(unsigned, mce_exception_count);
|
||||
DECLARE_PER_CPU(unsigned, mce_poll_count);
|
||||
|
||||
void mce_log_therm_throt_event(__u64 status);
|
||||
|
||||
extern atomic_t mce_entry;
|
||||
|
||||
extern void do_machine_check(struct pt_regs *, long);
|
||||
void do_machine_check(struct pt_regs *, long);
|
||||
|
||||
typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
|
||||
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
|
||||
@ -139,14 +159,16 @@ enum mcp_flags {
|
||||
MCP_UC = (1 << 1), /* log uncorrected errors */
|
||||
MCP_DONTLOG = (1 << 2), /* only clear, don't log */
|
||||
};
|
||||
extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
|
||||
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
|
||||
|
||||
extern int mce_notify_user(void);
|
||||
int mce_notify_irq(void);
|
||||
void mce_notify_process(void);
|
||||
|
||||
#endif /* !CONFIG_X86_32 */
|
||||
DECLARE_PER_CPU(struct mce, injectm);
|
||||
extern struct file_operations mce_chrdev_ops;
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
extern void mcheck_init(struct cpuinfo_x86 *c);
|
||||
void mcheck_init(struct cpuinfo_x86 *c);
|
||||
#else
|
||||
#define mcheck_init(c) do { } while (0)
|
||||
#endif
|
||||
|
@ -207,7 +207,14 @@
|
||||
|
||||
#define MSR_IA32_THERM_CONTROL 0x0000019a
|
||||
#define MSR_IA32_THERM_INTERRUPT 0x0000019b
|
||||
|
||||
#define THERM_INT_LOW_ENABLE (1 << 0)
|
||||
#define THERM_INT_HIGH_ENABLE (1 << 1)
|
||||
|
||||
#define MSR_IA32_THERM_STATUS 0x0000019c
|
||||
|
||||
#define THERM_STATUS_PROCHOT (1 << 0)
|
||||
|
||||
#define MSR_IA32_MISC_ENABLE 0x000001a0
|
||||
|
||||
/* MISC_ENABLE bits: architectural */
|
||||
|
@ -899,7 +899,7 @@ void clear_local_APIC(void)
|
||||
}
|
||||
|
||||
/* lets not touch this if we didn't frob it */
|
||||
#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
if (maxlvt >= 5) {
|
||||
v = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
|
||||
@ -2017,7 +2017,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
|
||||
apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
|
||||
apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
|
||||
apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
|
||||
#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
|
||||
#ifdef CONFIG_X86_THERMAL_VECTOR
|
||||
if (maxlvt >= 5)
|
||||
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
|
||||
#endif
|
||||
|
@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
|
||||
|
||||
static inline int mce_in_progress(void)
|
||||
{
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
|
||||
#if defined(CONFIG_X86_NEW_MCE)
|
||||
return atomic_read(&mce_entry) > 0;
|
||||
#endif
|
||||
return 0;
|
||||
|
@ -1,7 +1,11 @@
|
||||
obj-y = mce_$(BITS).o therm_throt.o
|
||||
obj-y = mce.o therm_throt.o
|
||||
|
||||
obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o
|
||||
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o
|
||||
obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
|
||||
obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
|
||||
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
|
||||
obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o
|
||||
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o
|
||||
obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
|
||||
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
|
||||
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
|
||||
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
|
||||
|
@ -2,11 +2,10 @@
|
||||
* Athlon specific Machine Check Exception Reporting
|
||||
* (C) Copyright 2002 Dave Jones <davej@redhat.com>
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
@ -15,12 +14,12 @@
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* Machine Check Handler For AMD Athlon/Duron */
|
||||
/* Machine Check Handler For AMD Athlon/Duron: */
|
||||
static void k7_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
int recover = 1;
|
||||
u32 alow, ahigh, high, low;
|
||||
u32 mcgstl, mcgsth;
|
||||
int recover = 1;
|
||||
int i;
|
||||
|
||||
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
@ -32,15 +31,19 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
|
||||
|
||||
for (i = 1; i < nr_mce_banks; i++) {
|
||||
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
|
||||
if (high&(1<<31)) {
|
||||
if (high & (1<<31)) {
|
||||
char misc[20];
|
||||
char addr[24];
|
||||
misc[0] = addr[0] = '\0';
|
||||
|
||||
misc[0] = '\0';
|
||||
addr[0] = '\0';
|
||||
|
||||
if (high & (1<<29))
|
||||
recover |= 1;
|
||||
if (high & (1<<25))
|
||||
recover |= 2;
|
||||
high &= ~(1<<31);
|
||||
|
||||
if (high & (1<<27)) {
|
||||
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
|
||||
snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
|
||||
@ -49,27 +52,31 @@ static void k7_machine_check(struct pt_regs *regs, long error_code)
|
||||
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
|
||||
snprintf(addr, 24, " at %08x%08x", ahigh, alow);
|
||||
}
|
||||
|
||||
printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
|
||||
smp_processor_id(), i, high, low, misc, addr);
|
||||
/* Clear it */
|
||||
|
||||
/* Clear it: */
|
||||
wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
|
||||
/* Serialize */
|
||||
/* Serialize: */
|
||||
wmb();
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
}
|
||||
|
||||
if (recover&2)
|
||||
if (recover & 2)
|
||||
panic("CPU context corrupt");
|
||||
if (recover&1)
|
||||
if (recover & 1)
|
||||
panic("Unable to continue");
|
||||
|
||||
printk(KERN_EMERG "Attempting to continue.\n");
|
||||
|
||||
mcgstl &= ~(1<<2);
|
||||
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
}
|
||||
|
||||
|
||||
/* AMD K7 machine check is Intel like */
|
||||
/* AMD K7 machine check is Intel like: */
|
||||
void amd_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
@ -79,21 +86,26 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
|
||||
return;
|
||||
|
||||
machine_check_vector = k7_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
||||
printk(KERN_INFO "Intel machine check architecture supported.\n");
|
||||
|
||||
rdmsr(MSR_IA32_MCG_CAP, l, h);
|
||||
if (l & (1<<8)) /* Control register present ? */
|
||||
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
nr_mce_banks = l & 0xff;
|
||||
|
||||
/* Clear status for MC index 0 separately, we don't touch CTL,
|
||||
* as some K7 Athlons cause spurious MCEs when its enabled. */
|
||||
/*
|
||||
* Clear status for MC index 0 separately, we don't touch CTL,
|
||||
* as some K7 Athlons cause spurious MCEs when its enabled:
|
||||
*/
|
||||
if (boot_cpu_data.x86 == 6) {
|
||||
wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
|
||||
i = 1;
|
||||
} else
|
||||
i = 0;
|
||||
|
||||
for (; i < nr_mce_banks; i++) {
|
||||
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
|
||||
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
|
||||
|
127
arch/x86/kernel/cpu/mcheck/mce-inject.c
Normal file
127
arch/x86/kernel/cpu/mcheck/mce-inject.c
Normal file
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Machine check injection support.
|
||||
* Copyright 2008 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*
|
||||
* Authors:
|
||||
* Andi Kleen
|
||||
* Ying Huang
|
||||
*/
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/smp.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
/* Update fake mce registers on current CPU. */
|
||||
static void inject_mce(struct mce *m)
|
||||
{
|
||||
struct mce *i = &per_cpu(injectm, m->extcpu);
|
||||
|
||||
/* Make sure noone reads partially written injectm */
|
||||
i->finished = 0;
|
||||
mb();
|
||||
m->finished = 0;
|
||||
/* First set the fields after finished */
|
||||
i->extcpu = m->extcpu;
|
||||
mb();
|
||||
/* Now write record in order, finished last (except above) */
|
||||
memcpy(i, m, sizeof(struct mce));
|
||||
/* Finally activate it */
|
||||
mb();
|
||||
i->finished = 1;
|
||||
}
|
||||
|
||||
struct delayed_mce {
|
||||
struct timer_list timer;
|
||||
struct mce m;
|
||||
};
|
||||
|
||||
/* Inject mce on current CPU */
|
||||
static void raise_mce(unsigned long data)
|
||||
{
|
||||
struct delayed_mce *dm = (struct delayed_mce *)data;
|
||||
struct mce *m = &dm->m;
|
||||
int cpu = m->extcpu;
|
||||
|
||||
inject_mce(m);
|
||||
if (m->status & MCI_STATUS_UC) {
|
||||
struct pt_regs regs;
|
||||
memset(®s, 0, sizeof(struct pt_regs));
|
||||
regs.ip = m->ip;
|
||||
regs.cs = m->cs;
|
||||
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
|
||||
do_machine_check(®s, 0);
|
||||
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
|
||||
} else {
|
||||
mce_banks_t b;
|
||||
memset(&b, 0xff, sizeof(mce_banks_t));
|
||||
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
|
||||
machine_check_poll(0, &b);
|
||||
mce_notify_irq();
|
||||
printk(KERN_INFO "Finished machine check poll on CPU %d\n",
|
||||
cpu);
|
||||
}
|
||||
kfree(dm);
|
||||
}
|
||||
|
||||
/* Error injection interface */
|
||||
static ssize_t mce_write(struct file *filp, const char __user *ubuf,
|
||||
size_t usize, loff_t *off)
|
||||
{
|
||||
struct delayed_mce *dm;
|
||||
struct mce m;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
/*
|
||||
* There are some cases where real MSR reads could slip
|
||||
* through.
|
||||
*/
|
||||
if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
|
||||
return -EIO;
|
||||
|
||||
if ((unsigned long)usize > sizeof(struct mce))
|
||||
usize = sizeof(struct mce);
|
||||
if (copy_from_user(&m, ubuf, usize))
|
||||
return -EFAULT;
|
||||
|
||||
if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
|
||||
return -EINVAL;
|
||||
|
||||
dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
|
||||
if (!dm)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Need to give user space some time to set everything up,
|
||||
* so do it a jiffie or two later everywhere.
|
||||
* Should we use a hrtimer here for better synchronization?
|
||||
*/
|
||||
memcpy(&dm->m, &m, sizeof(struct mce));
|
||||
setup_timer(&dm->timer, raise_mce, (unsigned long)dm);
|
||||
dm->timer.expires = jiffies + 2;
|
||||
add_timer_on(&dm->timer, m.extcpu);
|
||||
return usize;
|
||||
}
|
||||
|
||||
static int inject_init(void)
|
||||
{
|
||||
printk(KERN_INFO "Machine check injector initialized\n");
|
||||
mce_chrdev_ops.write = mce_write;
|
||||
return 0;
|
||||
}
|
||||
|
||||
module_init(inject_init);
|
||||
/*
|
||||
* Cannot tolerate unloading currently because we cannot
|
||||
* guarantee all openers of mce_chrdev will get a reference to us.
|
||||
*/
|
||||
MODULE_LICENSE("GPL");
|
15
arch/x86/kernel/cpu/mcheck/mce-internal.h
Normal file
15
arch/x86/kernel/cpu/mcheck/mce-internal.h
Normal file
@ -0,0 +1,15 @@
|
||||
#include <asm/mce.h>
|
||||
|
||||
enum severity_level {
|
||||
MCE_NO_SEVERITY,
|
||||
MCE_KEEP_SEVERITY,
|
||||
MCE_SOME_SEVERITY,
|
||||
MCE_AO_SEVERITY,
|
||||
MCE_UC_SEVERITY,
|
||||
MCE_AR_SEVERITY,
|
||||
MCE_PANIC_SEVERITY,
|
||||
};
|
||||
|
||||
int mce_severity(struct mce *a, int tolerant, char **msg);
|
||||
|
||||
extern int mce_ser;
|
218
arch/x86/kernel/cpu/mcheck/mce-severity.c
Normal file
218
arch/x86/kernel/cpu/mcheck/mce-severity.c
Normal file
@ -0,0 +1,218 @@
|
||||
/*
|
||||
* MCE grading rules.
|
||||
* Copyright 2008, 2009 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*
|
||||
* Author: Andi Kleen
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce-internal.h"
|
||||
|
||||
/*
|
||||
* Grade an mce by severity. In general the most severe ones are processed
|
||||
* first. Since there are quite a lot of combinations test the bits in a
|
||||
* table-driven way. The rules are simply processed in order, first
|
||||
* match wins.
|
||||
*
|
||||
* Note this is only used for machine check exceptions, the corrected
|
||||
* errors use much simpler rules. The exceptions still check for the corrected
|
||||
* errors, but only to leave them alone for the CMCI handler (except for
|
||||
* panic situations)
|
||||
*/
|
||||
|
||||
enum context { IN_KERNEL = 1, IN_USER = 2 };
|
||||
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
|
||||
|
||||
static struct severity {
|
||||
u64 mask;
|
||||
u64 result;
|
||||
unsigned char sev;
|
||||
unsigned char mcgmask;
|
||||
unsigned char mcgres;
|
||||
unsigned char ser;
|
||||
unsigned char context;
|
||||
unsigned char covered;
|
||||
char *msg;
|
||||
} severities[] = {
|
||||
#define KERNEL .context = IN_KERNEL
|
||||
#define USER .context = IN_USER
|
||||
#define SER .ser = SER_REQUIRED
|
||||
#define NOSER .ser = NO_SER
|
||||
#define SEV(s) .sev = MCE_ ## s ## _SEVERITY
|
||||
#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
|
||||
#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
|
||||
#define MCGMASK(x, res, s, m, r...) \
|
||||
{ .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
|
||||
#define MASK(x, y, s, m, r...) \
|
||||
{ .mask = x, .result = y, SEV(s), .msg = m, ## r }
|
||||
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
|
||||
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
|
||||
#define MCACOD 0xffff
|
||||
|
||||
BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
|
||||
BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
|
||||
BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
|
||||
/* When MCIP is not set something is very confused */
|
||||
MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
|
||||
/* Neither return not error IP -- no chance to recover -> PANIC */
|
||||
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
|
||||
"Neither restart nor error IP"),
|
||||
MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
|
||||
KERNEL),
|
||||
BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
|
||||
MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
|
||||
"Spurious not enabled", SER),
|
||||
|
||||
/* ignore OVER for UCNA */
|
||||
MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
|
||||
"Uncorrected no action required", SER),
|
||||
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
|
||||
"Illegal combination (UCNA with AR=1)", SER),
|
||||
MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
|
||||
|
||||
/* AR add known MCACODs here */
|
||||
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
|
||||
"Action required with lost events", SER),
|
||||
MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
|
||||
"Action required; unknown MCACOD", SER),
|
||||
|
||||
/* known AO MCACODs: */
|
||||
MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
|
||||
"Action optional: memory scrubbing error", SER),
|
||||
MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
|
||||
"Action optional: last level cache writeback error", SER),
|
||||
|
||||
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
|
||||
"Action optional unknown MCACOD", SER),
|
||||
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
|
||||
"Action optional with lost events", SER),
|
||||
BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
|
||||
BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
|
||||
BITSET(0, SOME, "No match") /* always matches. keep at end */
|
||||
};
|
||||
|
||||
/*
|
||||
* If the EIPV bit is set, it means the saved IP is the
|
||||
* instruction which caused the MCE.
|
||||
*/
|
||||
static int error_context(struct mce *m)
|
||||
{
|
||||
if (m->mcgstatus & MCG_STATUS_EIPV)
|
||||
return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
|
||||
/* Unknown, assume kernel */
|
||||
return IN_KERNEL;
|
||||
}
|
||||
|
||||
int mce_severity(struct mce *a, int tolerant, char **msg)
|
||||
{
|
||||
enum context ctx = error_context(a);
|
||||
struct severity *s;
|
||||
|
||||
for (s = severities;; s++) {
|
||||
if ((a->status & s->mask) != s->result)
|
||||
continue;
|
||||
if ((a->mcgstatus & s->mcgmask) != s->mcgres)
|
||||
continue;
|
||||
if (s->ser == SER_REQUIRED && !mce_ser)
|
||||
continue;
|
||||
if (s->ser == NO_SER && mce_ser)
|
||||
continue;
|
||||
if (s->context && ctx != s->context)
|
||||
continue;
|
||||
if (msg)
|
||||
*msg = s->msg;
|
||||
s->covered = 1;
|
||||
if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
|
||||
if (panic_on_oops || tolerant < 1)
|
||||
return MCE_PANIC_SEVERITY;
|
||||
}
|
||||
return s->sev;
|
||||
}
|
||||
}
|
||||
|
||||
static void *s_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
if (*pos >= ARRAY_SIZE(severities))
|
||||
return NULL;
|
||||
return &severities[*pos];
|
||||
}
|
||||
|
||||
static void *s_next(struct seq_file *f, void *data, loff_t *pos)
|
||||
{
|
||||
if (++(*pos) >= ARRAY_SIZE(severities))
|
||||
return NULL;
|
||||
return &severities[*pos];
|
||||
}
|
||||
|
||||
static void s_stop(struct seq_file *f, void *data)
|
||||
{
|
||||
}
|
||||
|
||||
static int s_show(struct seq_file *f, void *data)
|
||||
{
|
||||
struct severity *ser = data;
|
||||
seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations severities_seq_ops = {
|
||||
.start = s_start,
|
||||
.next = s_next,
|
||||
.stop = s_stop,
|
||||
.show = s_show,
|
||||
};
|
||||
|
||||
static int severities_coverage_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return seq_open(file, &severities_seq_ops);
|
||||
}
|
||||
|
||||
static ssize_t severities_coverage_write(struct file *file,
|
||||
const char __user *ubuf,
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < ARRAY_SIZE(severities); i++)
|
||||
severities[i].covered = 0;
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations severities_coverage_fops = {
|
||||
.open = severities_coverage_open,
|
||||
.release = seq_release,
|
||||
.read = seq_read,
|
||||
.write = severities_coverage_write,
|
||||
};
|
||||
|
||||
static int __init severities_debugfs_init(void)
|
||||
{
|
||||
struct dentry *dmce = NULL, *fseverities_coverage = NULL;
|
||||
|
||||
dmce = debugfs_create_dir("mce", NULL);
|
||||
if (dmce == NULL)
|
||||
goto err_out;
|
||||
fseverities_coverage = debugfs_create_file("severities-coverage",
|
||||
0444, dmce, NULL,
|
||||
&severities_coverage_fops);
|
||||
if (fseverities_coverage == NULL)
|
||||
goto err_out;
|
||||
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
if (fseverities_coverage)
|
||||
debugfs_remove(fseverities_coverage);
|
||||
if (dmce)
|
||||
debugfs_remove(dmce);
|
||||
return -ENOMEM;
|
||||
}
|
||||
late_initcall(severities_debugfs_init);
|
1964
arch/x86/kernel/cpu/mcheck/mce.c
Normal file
1964
arch/x86/kernel/cpu/mcheck/mce.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,14 +1,38 @@
|
||||
#include <linux/init.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#ifdef CONFIG_X86_OLD_MCE
|
||||
void amd_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_ANCIENT_MCE
|
||||
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void winchip_mcheck_init(struct cpuinfo_x86 *c);
|
||||
extern int mce_p5_enable;
|
||||
static inline int mce_p5_enabled(void) { return mce_p5_enable; }
|
||||
static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
|
||||
#else
|
||||
static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
|
||||
static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
|
||||
static inline int mce_p5_enabled(void) { return 0; }
|
||||
static inline void enable_p5_mce(void) { }
|
||||
#endif
|
||||
|
||||
/* Call the installed machine check handler for this CPU setup. */
|
||||
extern void (*machine_check_vector)(struct pt_regs *, long error_code);
|
||||
|
||||
#ifdef CONFIG_X86_OLD_MCE
|
||||
|
||||
extern int nr_mce_banks;
|
||||
|
||||
void intel_set_thermal_handler(void);
|
||||
|
||||
#else
|
||||
|
||||
static inline void intel_set_thermal_handler(void) { }
|
||||
|
||||
#endif
|
||||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c);
|
||||
|
@ -1,76 +0,0 @@
|
||||
/*
|
||||
* mce.c - x86 Machine Check Exception Reporting
|
||||
* (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com>
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/thread_info.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
int mce_disabled;
|
||||
int nr_mce_banks;
|
||||
|
||||
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
|
||||
|
||||
/* Handle unconfigured int18 (should never happen) */
|
||||
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
|
||||
}
|
||||
|
||||
/* Call the installed machine check handler for this CPU setup. */
|
||||
void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
|
||||
|
||||
/* This has to be run for each processor */
|
||||
void mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mce_disabled == 1)
|
||||
return;
|
||||
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
amd_mcheck_init(c);
|
||||
break;
|
||||
|
||||
case X86_VENDOR_INTEL:
|
||||
if (c->x86 == 5)
|
||||
intel_p5_mcheck_init(c);
|
||||
if (c->x86 == 6)
|
||||
intel_p6_mcheck_init(c);
|
||||
if (c->x86 == 15)
|
||||
intel_p4_mcheck_init(c);
|
||||
break;
|
||||
|
||||
case X86_VENDOR_CENTAUR:
|
||||
if (c->x86 == 5)
|
||||
winchip_mcheck_init(c);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int __init mcheck_disable(char *str)
|
||||
{
|
||||
mce_disabled = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init mcheck_enable(char *str)
|
||||
{
|
||||
mce_disabled = -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("nomce", mcheck_disable);
|
||||
__setup("mce", mcheck_enable);
|
File diff suppressed because it is too large
Load Diff
@ -13,22 +13,22 @@
|
||||
*
|
||||
* All MC4_MISCi registers are shared between multi-cores
|
||||
*/
|
||||
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/apic.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/idle.h>
|
||||
|
||||
#define PFX "mce_threshold: "
|
||||
#define VERSION "version 1.1.1"
|
||||
@ -48,26 +48,26 @@
|
||||
#define MCG_XBLK_ADDR 0xC0000400
|
||||
|
||||
struct threshold_block {
|
||||
unsigned int block;
|
||||
unsigned int bank;
|
||||
unsigned int cpu;
|
||||
u32 address;
|
||||
u16 interrupt_enable;
|
||||
u16 threshold_limit;
|
||||
struct kobject kobj;
|
||||
struct list_head miscj;
|
||||
unsigned int block;
|
||||
unsigned int bank;
|
||||
unsigned int cpu;
|
||||
u32 address;
|
||||
u16 interrupt_enable;
|
||||
u16 threshold_limit;
|
||||
struct kobject kobj;
|
||||
struct list_head miscj;
|
||||
};
|
||||
|
||||
/* defaults used early on boot */
|
||||
static struct threshold_block threshold_defaults = {
|
||||
.interrupt_enable = 0,
|
||||
.threshold_limit = THRESHOLD_MAX,
|
||||
.interrupt_enable = 0,
|
||||
.threshold_limit = THRESHOLD_MAX,
|
||||
};
|
||||
|
||||
struct threshold_bank {
|
||||
struct kobject *kobj;
|
||||
struct threshold_block *blocks;
|
||||
cpumask_var_t cpus;
|
||||
struct kobject *kobj;
|
||||
struct threshold_block *blocks;
|
||||
cpumask_var_t cpus;
|
||||
};
|
||||
static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
|
||||
|
||||
@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void);
|
||||
*/
|
||||
|
||||
struct thresh_restart {
|
||||
struct threshold_block *b;
|
||||
int reset;
|
||||
u16 old_limit;
|
||||
struct threshold_block *b;
|
||||
int reset;
|
||||
u16 old_limit;
|
||||
};
|
||||
|
||||
/* must be called with correct cpu affinity */
|
||||
@ -110,6 +110,7 @@ static void threshold_restart_bank(void *_tr)
|
||||
} else if (tr->old_limit) { /* change limit w/o reset */
|
||||
int new_count = (mci_misc_hi & THRESHOLD_MAX) +
|
||||
(tr->old_limit - tr->b->threshold_limit);
|
||||
|
||||
mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
|
||||
(new_count & THRESHOLD_MAX);
|
||||
}
|
||||
@ -125,11 +126,11 @@ static void threshold_restart_bank(void *_tr)
|
||||
/* cpu init entry point, called from mce.c with preempt off */
|
||||
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int bank, block;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
u8 lvt_off;
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
unsigned int bank, block;
|
||||
struct thresh_restart tr;
|
||||
u8 lvt_off;
|
||||
|
||||
for (bank = 0; bank < NR_BANKS; ++bank) {
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
@ -140,8 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
if (!address)
|
||||
break;
|
||||
address += MCG_XBLK_ADDR;
|
||||
}
|
||||
else
|
||||
} else
|
||||
++address;
|
||||
|
||||
if (rdmsr_safe(address, &low, &high))
|
||||
@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||
*/
|
||||
static void amd_threshold_interrupt(void)
|
||||
{
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
unsigned int bank, block;
|
||||
struct mce m;
|
||||
u32 low = 0, high = 0, address = 0;
|
||||
|
||||
mce_setup(&m);
|
||||
|
||||
@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void)
|
||||
if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
|
||||
continue;
|
||||
for (block = 0; block < NR_BLOCKS; ++block) {
|
||||
if (block == 0)
|
||||
if (block == 0) {
|
||||
address = MSR_IA32_MC0_MISC + bank * 4;
|
||||
else if (block == 1) {
|
||||
} else if (block == 1) {
|
||||
address = (low & MASK_BLKPTR_LO) >> 21;
|
||||
if (!address)
|
||||
break;
|
||||
address += MCG_XBLK_ADDR;
|
||||
}
|
||||
else
|
||||
} else {
|
||||
++address;
|
||||
}
|
||||
|
||||
if (rdmsr_safe(address, &low, &high))
|
||||
break;
|
||||
@ -229,8 +229,10 @@ static void amd_threshold_interrupt(void)
|
||||
(high & MASK_LOCKED_HI))
|
||||
continue;
|
||||
|
||||
/* Log the machine check that caused the threshold
|
||||
event. */
|
||||
/*
|
||||
* Log the machine check that caused the threshold
|
||||
* event.
|
||||
*/
|
||||
machine_check_poll(MCP_TIMESTAMP,
|
||||
&__get_cpu_var(mce_poll_banks));
|
||||
|
||||
@ -254,48 +256,52 @@ static void amd_threshold_interrupt(void)
|
||||
|
||||
struct threshold_attr {
|
||||
struct attribute attr;
|
||||
ssize_t(*show) (struct threshold_block *, char *);
|
||||
ssize_t(*store) (struct threshold_block *, const char *, size_t count);
|
||||
ssize_t (*show) (struct threshold_block *, char *);
|
||||
ssize_t (*store) (struct threshold_block *, const char *, size_t count);
|
||||
};
|
||||
|
||||
#define SHOW_FIELDS(name) \
|
||||
static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
|
||||
{ \
|
||||
return sprintf(buf, "%lx\n", (unsigned long) b->name); \
|
||||
#define SHOW_FIELDS(name) \
|
||||
static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
|
||||
{ \
|
||||
return sprintf(buf, "%lx\n", (unsigned long) b->name); \
|
||||
}
|
||||
SHOW_FIELDS(interrupt_enable)
|
||||
SHOW_FIELDS(threshold_limit)
|
||||
|
||||
static ssize_t store_interrupt_enable(struct threshold_block *b,
|
||||
const char *buf, size_t count)
|
||||
static ssize_t
|
||||
store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
|
||||
{
|
||||
char *end;
|
||||
struct thresh_restart tr;
|
||||
unsigned long new = simple_strtoul(buf, &end, 0);
|
||||
if (end == buf)
|
||||
unsigned long new;
|
||||
|
||||
if (strict_strtoul(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
b->interrupt_enable = !!new;
|
||||
|
||||
tr.b = b;
|
||||
tr.reset = 0;
|
||||
tr.old_limit = 0;
|
||||
tr.b = b;
|
||||
tr.reset = 0;
|
||||
tr.old_limit = 0;
|
||||
|
||||
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
|
||||
|
||||
return end - buf;
|
||||
return size;
|
||||
}
|
||||
|
||||
static ssize_t store_threshold_limit(struct threshold_block *b,
|
||||
const char *buf, size_t count)
|
||||
static ssize_t
|
||||
store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
|
||||
{
|
||||
char *end;
|
||||
struct thresh_restart tr;
|
||||
unsigned long new = simple_strtoul(buf, &end, 0);
|
||||
if (end == buf)
|
||||
unsigned long new;
|
||||
|
||||
if (strict_strtoul(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (new > THRESHOLD_MAX)
|
||||
new = THRESHOLD_MAX;
|
||||
if (new < 1)
|
||||
new = 1;
|
||||
|
||||
tr.old_limit = b->threshold_limit;
|
||||
b->threshold_limit = new;
|
||||
tr.b = b;
|
||||
@ -303,12 +309,12 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
|
||||
|
||||
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
|
||||
|
||||
return end - buf;
|
||||
return size;
|
||||
}
|
||||
|
||||
struct threshold_block_cross_cpu {
|
||||
struct threshold_block *tb;
|
||||
long retval;
|
||||
struct threshold_block *tb;
|
||||
long retval;
|
||||
};
|
||||
|
||||
static void local_error_count_handler(void *_tbcc)
|
||||
@ -338,16 +344,13 @@ static ssize_t store_error_count(struct threshold_block *b,
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \
|
||||
.attr = {.name = __stringify(_name), .mode = _mode }, \
|
||||
.show = _show, \
|
||||
.store = _store, \
|
||||
#define RW_ATTR(val) \
|
||||
static struct threshold_attr val = { \
|
||||
.attr = {.name = __stringify(val), .mode = 0644 }, \
|
||||
.show = show_## val, \
|
||||
.store = store_## val, \
|
||||
};
|
||||
|
||||
#define RW_ATTR(name) \
|
||||
static struct threshold_attr name = \
|
||||
THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
|
||||
|
||||
RW_ATTR(interrupt_enable);
|
||||
RW_ATTR(threshold_limit);
|
||||
RW_ATTR(error_count);
|
||||
@ -359,15 +362,17 @@ static struct attribute *default_attrs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
#define to_block(k) container_of(k, struct threshold_block, kobj)
|
||||
#define to_attr(a) container_of(a, struct threshold_attr, attr)
|
||||
#define to_block(k) container_of(k, struct threshold_block, kobj)
|
||||
#define to_attr(a) container_of(a, struct threshold_attr, attr)
|
||||
|
||||
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
|
||||
{
|
||||
struct threshold_block *b = to_block(kobj);
|
||||
struct threshold_attr *a = to_attr(attr);
|
||||
ssize_t ret;
|
||||
|
||||
ret = a->show ? a->show(b, buf) : -EIO;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -377,18 +382,20 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
|
||||
struct threshold_block *b = to_block(kobj);
|
||||
struct threshold_attr *a = to_attr(attr);
|
||||
ssize_t ret;
|
||||
|
||||
ret = a->store ? a->store(b, buf, count) : -EIO;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct sysfs_ops threshold_ops = {
|
||||
.show = show,
|
||||
.store = store,
|
||||
.show = show,
|
||||
.store = store,
|
||||
};
|
||||
|
||||
static struct kobj_type threshold_ktype = {
|
||||
.sysfs_ops = &threshold_ops,
|
||||
.default_attrs = default_attrs,
|
||||
.sysfs_ops = &threshold_ops,
|
||||
.default_attrs = default_attrs,
|
||||
};
|
||||
|
||||
static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
|
||||
@ -396,9 +403,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
|
||||
unsigned int block,
|
||||
u32 address)
|
||||
{
|
||||
int err;
|
||||
u32 low, high;
|
||||
struct threshold_block *b = NULL;
|
||||
u32 low, high;
|
||||
int err;
|
||||
|
||||
if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
|
||||
return 0;
|
||||
@ -421,20 +428,21 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
|
||||
if (!b)
|
||||
return -ENOMEM;
|
||||
|
||||
b->block = block;
|
||||
b->bank = bank;
|
||||
b->cpu = cpu;
|
||||
b->address = address;
|
||||
b->interrupt_enable = 0;
|
||||
b->threshold_limit = THRESHOLD_MAX;
|
||||
b->block = block;
|
||||
b->bank = bank;
|
||||
b->cpu = cpu;
|
||||
b->address = address;
|
||||
b->interrupt_enable = 0;
|
||||
b->threshold_limit = THRESHOLD_MAX;
|
||||
|
||||
INIT_LIST_HEAD(&b->miscj);
|
||||
|
||||
if (per_cpu(threshold_banks, cpu)[bank]->blocks)
|
||||
if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
|
||||
list_add(&b->miscj,
|
||||
&per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
|
||||
else
|
||||
} else {
|
||||
per_cpu(threshold_banks, cpu)[bank]->blocks = b;
|
||||
}
|
||||
|
||||
err = kobject_init_and_add(&b->kobj, &threshold_ktype,
|
||||
per_cpu(threshold_banks, cpu)[bank]->kobj,
|
||||
@ -447,8 +455,9 @@ recurse:
|
||||
if (!address)
|
||||
return 0;
|
||||
address += MCG_XBLK_ADDR;
|
||||
} else
|
||||
} else {
|
||||
++address;
|
||||
}
|
||||
|
||||
err = allocate_threshold_blocks(cpu, bank, ++block, address);
|
||||
if (err)
|
||||
@ -500,13 +509,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
|
||||
if (!b)
|
||||
goto out;
|
||||
|
||||
err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
|
||||
err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj,
|
||||
b->kobj, name);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
cpumask_copy(b->cpus, cpu_core_mask(cpu));
|
||||
per_cpu(threshold_banks, cpu)[bank] = b;
|
||||
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
@ -522,7 +532,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
|
||||
goto out;
|
||||
}
|
||||
|
||||
b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
|
||||
b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj);
|
||||
if (!b->kobj)
|
||||
goto out_free;
|
||||
|
||||
@ -542,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
|
||||
if (i == cpu)
|
||||
continue;
|
||||
|
||||
err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
|
||||
err = sysfs_create_link(&per_cpu(mce_dev, i).kobj,
|
||||
b->kobj, name);
|
||||
if (err)
|
||||
goto out;
|
||||
@ -605,15 +615,13 @@ static void deallocate_threshold_block(unsigned int cpu,
|
||||
|
||||
static void threshold_remove_bank(unsigned int cpu, int bank)
|
||||
{
|
||||
int i = 0;
|
||||
struct threshold_bank *b;
|
||||
char name[32];
|
||||
int i = 0;
|
||||
|
||||
b = per_cpu(threshold_banks, cpu)[bank];
|
||||
|
||||
if (!b)
|
||||
return;
|
||||
|
||||
if (!b->blocks)
|
||||
goto free_out;
|
||||
|
||||
@ -622,8 +630,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
|
||||
#ifdef CONFIG_SMP
|
||||
/* sibling symlink */
|
||||
if (shared_bank[bank] && b->blocks->cpu != cpu) {
|
||||
sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
|
||||
sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name);
|
||||
per_cpu(threshold_banks, cpu)[bank] = NULL;
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -633,7 +642,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
|
||||
if (i == cpu)
|
||||
continue;
|
||||
|
||||
sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
|
||||
sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name);
|
||||
per_cpu(threshold_banks, i)[bank] = NULL;
|
||||
}
|
||||
|
||||
@ -659,12 +668,9 @@ static void threshold_remove_device(unsigned int cpu)
|
||||
}
|
||||
|
||||
/* get notified when a cpu comes on/off */
|
||||
static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action,
|
||||
unsigned int cpu)
|
||||
static void __cpuinit
|
||||
amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
|
||||
{
|
||||
if (cpu >= NR_CPUS)
|
||||
return;
|
||||
|
||||
switch (action) {
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
@ -686,11 +692,12 @@ static __init int threshold_init_device(void)
|
||||
/* to hit CPUs online before the notifier is up */
|
||||
for_each_online_cpu(lcpu) {
|
||||
int err = threshold_create_device(lcpu);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
threshold_cpu_callback = amd_64_threshold_cpu_callback;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
device_initcall(threshold_init_device);
|
||||
|
74
arch/x86/kernel/cpu/mcheck/mce_intel.c
Normal file
74
arch/x86/kernel/cpu/mcheck/mce_intel.c
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Common code for Intel machine checks
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/therm_throt.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
int tm2 = 0;
|
||||
u32 l, h;
|
||||
|
||||
/* Thermal monitoring depends on ACPI and clock modulation*/
|
||||
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
|
||||
return;
|
||||
|
||||
/*
|
||||
* First check if its enabled already, in which case there might
|
||||
* be some SMM goo which handles it, so we can't even put a handler
|
||||
* since it might be delivered via SMI already:
|
||||
*/
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
h = apic_read(APIC_LVTTHMR);
|
||||
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
|
||||
tm2 = 1;
|
||||
|
||||
/* Check whether a vector already exists */
|
||||
if (h & APIC_VECTOR_MASK) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal LVT vector (%#x) already installed\n",
|
||||
cpu, (h & APIC_VECTOR_MASK));
|
||||
return;
|
||||
}
|
||||
|
||||
/* We'll mask the thermal vector in the lapic till we're ready: */
|
||||
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
|
||||
apic_write(APIC_LVTTHMR, h);
|
||||
|
||||
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT,
|
||||
l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
|
||||
|
||||
intel_set_thermal_handler();
|
||||
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
|
||||
|
||||
/* Unmask the thermal vector: */
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
|
||||
printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
|
||||
cpu, tm2 ? "TM2" : "TM1");
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
}
|
@ -16,6 +16,8 @@
|
||||
#include <asm/idle.h>
|
||||
#include <asm/therm_throt.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
asmlinkage void smp_thermal_interrupt(void)
|
||||
{
|
||||
__u64 msr_val;
|
||||
@ -26,67 +28,13 @@ asmlinkage void smp_thermal_interrupt(void)
|
||||
irq_enter();
|
||||
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
if (therm_throt_process(msr_val & 1))
|
||||
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
|
||||
mce_log_therm_throt_event(msr_val);
|
||||
|
||||
inc_irq_stat(irq_thermal_count);
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
static void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
int tm2 = 0;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_ACPI))
|
||||
return;
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_ACC))
|
||||
return;
|
||||
|
||||
/* first check if TM1 is already enabled by the BIOS, in which
|
||||
* case there might be some SMM goo which handles it, so we can't even
|
||||
* put a handler since it might be delivered via SMI already.
|
||||
*/
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
h = apic_read(APIC_LVTTHMR);
|
||||
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal monitoring handled by SMI\n", cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
|
||||
tm2 = 1;
|
||||
|
||||
if (h & APIC_VECTOR_MASK) {
|
||||
printk(KERN_DEBUG
|
||||
"CPU%d: Thermal LVT vector (%#x) already "
|
||||
"installed\n", cpu, (h & APIC_VECTOR_MASK));
|
||||
return;
|
||||
}
|
||||
|
||||
h = THERMAL_APIC_VECTOR;
|
||||
h |= (APIC_DM_FIXED | APIC_LVT_MASKED);
|
||||
apic_write(APIC_LVTTHMR, h);
|
||||
|
||||
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
|
||||
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
|
||||
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
|
||||
cpu, tm2 ? "TM2" : "TM1");
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Support for Intel Correct Machine Check Interrupts. This allows
|
||||
* the CPU to raise an interrupt when a corrected machine check happened.
|
||||
@ -108,6 +56,9 @@ static int cmci_supported(int *banks)
|
||||
{
|
||||
u64 cap;
|
||||
|
||||
if (mce_cmci_disabled || mce_ignore_ce)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Vendor check is not strictly needed, but the initial
|
||||
* initialization is vendor keyed and this
|
||||
@ -131,7 +82,7 @@ static int cmci_supported(int *banks)
|
||||
static void intel_threshold_interrupt(void)
|
||||
{
|
||||
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
|
||||
mce_notify_user();
|
||||
mce_notify_irq();
|
||||
}
|
||||
|
||||
static void print_update(char *type, int *hdr, int num)
|
||||
@ -247,7 +198,7 @@ void cmci_rediscover(int dying)
|
||||
return;
|
||||
cpumask_copy(old, ¤t->cpus_allowed);
|
||||
|
||||
for_each_online_cpu (cpu) {
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu == dying)
|
||||
continue;
|
||||
if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
|
||||
|
@ -6,15 +6,14 @@
|
||||
* This file contains routines to check for non-fatal MCEs every 15s
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
@ -22,9 +21,9 @@
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
static int firstbank;
|
||||
static int firstbank;
|
||||
|
||||
#define MCE_RATE 15*HZ /* timer rate is 15s */
|
||||
#define MCE_RATE (15*HZ) /* timer rate is 15s */
|
||||
|
||||
static void mce_checkregs(void *info)
|
||||
{
|
||||
@ -34,23 +33,24 @@ static void mce_checkregs(void *info)
|
||||
for (i = firstbank; i < nr_mce_banks; i++) {
|
||||
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
|
||||
|
||||
if (high & (1<<31)) {
|
||||
printk(KERN_INFO "MCE: The hardware reports a non "
|
||||
"fatal, correctable incident occurred on "
|
||||
"CPU %d.\n",
|
||||
if (!(high & (1<<31)))
|
||||
continue;
|
||||
|
||||
printk(KERN_INFO "MCE: The hardware reports a non fatal, "
|
||||
"correctable incident occurred on CPU %d.\n",
|
||||
smp_processor_id());
|
||||
printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
|
||||
|
||||
/*
|
||||
* Scrub the error so we don't pick it up in MCE_RATE
|
||||
* seconds time.
|
||||
*/
|
||||
wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
|
||||
printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
|
||||
|
||||
/* Serialize */
|
||||
wmb();
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
/*
|
||||
* Scrub the error so we don't pick it up in MCE_RATE
|
||||
* seconds time:
|
||||
*/
|
||||
wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
|
||||
|
||||
/* Serialize: */
|
||||
wmb();
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
}
|
||||
|
||||
@ -77,16 +77,17 @@ static int __init init_nonfatal_mce_checker(void)
|
||||
|
||||
/* Some Athlons misbehave when we frob bank 0 */
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
|
||||
boot_cpu_data.x86 == 6)
|
||||
firstbank = 1;
|
||||
boot_cpu_data.x86 == 6)
|
||||
firstbank = 1;
|
||||
else
|
||||
firstbank = 0;
|
||||
firstbank = 0;
|
||||
|
||||
/*
|
||||
* Check for non-fatal errors every MCE_RATE s
|
||||
*/
|
||||
schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
|
||||
printk(KERN_INFO "Machine check exception polling timer started.\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
module_init(init_nonfatal_mce_checker);
|
||||
|
@ -2,18 +2,17 @@
|
||||
* P4 specific Machine Check Exception Reporting
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/therm_throt.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
#include <asm/therm_throt.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
@ -36,6 +35,7 @@ static int mce_num_extended_msrs;
|
||||
|
||||
|
||||
#ifdef CONFIG_X86_MCE_P4THERMAL
|
||||
|
||||
static void unexpected_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
|
||||
@ -43,7 +43,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
|
||||
/* P4/Xeon Thermal transition interrupt handler */
|
||||
/* P4/Xeon Thermal transition interrupt handler: */
|
||||
static void intel_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
__u64 msr_val;
|
||||
@ -51,11 +51,12 @@ static void intel_thermal_interrupt(struct pt_regs *regs)
|
||||
ack_APIC_irq();
|
||||
|
||||
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
|
||||
therm_throt_process(msr_val & 0x1);
|
||||
therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
|
||||
}
|
||||
|
||||
/* Thermal interrupt handler for this CPU setup */
|
||||
static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
|
||||
/* Thermal interrupt handler for this CPU setup: */
|
||||
static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
|
||||
unexpected_thermal_interrupt;
|
||||
|
||||
void smp_thermal_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
@ -65,67 +66,15 @@ void smp_thermal_interrupt(struct pt_regs *regs)
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
/* P4/Xeon Thermal regulation detect and init */
|
||||
static void intel_init_thermal(struct cpuinfo_x86 *c)
|
||||
void intel_set_thermal_handler(void)
|
||||
{
|
||||
u32 l, h;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
/* Thermal monitoring */
|
||||
if (!cpu_has(c, X86_FEATURE_ACPI))
|
||||
return; /* -ENODEV */
|
||||
|
||||
/* Clock modulation */
|
||||
if (!cpu_has(c, X86_FEATURE_ACC))
|
||||
return; /* -ENODEV */
|
||||
|
||||
/* first check if its enabled already, in which case there might
|
||||
* be some SMM goo which handles it, so we can't even put a handler
|
||||
* since it might be delivered via SMI already -zwanem.
|
||||
*/
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
h = apic_read(APIC_LVTTHMR);
|
||||
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
|
||||
printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
|
||||
cpu);
|
||||
return; /* -EBUSY */
|
||||
}
|
||||
|
||||
/* check whether a vector already exists, temporarily masked? */
|
||||
if (h & APIC_VECTOR_MASK) {
|
||||
printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
|
||||
"installed\n",
|
||||
cpu, (h & APIC_VECTOR_MASK));
|
||||
return; /* -EBUSY */
|
||||
}
|
||||
|
||||
/* The temperature transition interrupt handler setup */
|
||||
h = THERMAL_APIC_VECTOR; /* our delivery vector */
|
||||
h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
|
||||
apic_write(APIC_LVTTHMR, h);
|
||||
|
||||
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
|
||||
wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
|
||||
|
||||
/* ok we're good to go... */
|
||||
vendor_thermal_interrupt = intel_thermal_interrupt;
|
||||
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
|
||||
wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
|
||||
|
||||
l = apic_read(APIC_LVTTHMR);
|
||||
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
|
||||
printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
|
||||
|
||||
/* enable thermal throttle processing */
|
||||
atomic_set(&therm_throt_en, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_X86_MCE_P4THERMAL */
|
||||
|
||||
|
||||
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
|
||||
static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
|
||||
static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
|
||||
{
|
||||
u32 h;
|
||||
|
||||
@ -143,9 +92,9 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
|
||||
|
||||
static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
int recover = 1;
|
||||
u32 alow, ahigh, high, low;
|
||||
u32 mcgstl, mcgsth;
|
||||
int recover = 1;
|
||||
int i;
|
||||
|
||||
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
@ -157,7 +106,9 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
|
||||
if (mce_num_extended_msrs > 0) {
|
||||
struct intel_mce_extended_msrs dbg;
|
||||
|
||||
intel_get_extended_msrs(&dbg);
|
||||
|
||||
printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
|
||||
"\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
|
||||
"\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
|
||||
@ -171,6 +122,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
if (high & (1<<31)) {
|
||||
char misc[20];
|
||||
char addr[24];
|
||||
|
||||
misc[0] = addr[0] = '\0';
|
||||
if (high & (1<<29))
|
||||
recover |= 1;
|
||||
@ -196,6 +148,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
panic("Unable to continue");
|
||||
|
||||
printk(KERN_EMERG "Attempting to continue.\n");
|
||||
|
||||
/*
|
||||
* Do not clear the MSR_IA32_MCi_STATUS if the error is not
|
||||
* recoverable/continuable.This will allow BIOS to look at the MSRs
|
||||
@ -217,7 +170,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
}
|
||||
|
||||
|
||||
void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
|
@ -2,11 +2,10 @@
|
||||
* P5 specific Machine Check Exception Reporting
|
||||
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
@ -15,39 +14,58 @@
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* Machine check handler for Pentium class Intel */
|
||||
/* By default disabled */
|
||||
int mce_p5_enable;
|
||||
|
||||
/* Machine check handler for Pentium class Intel CPUs: */
|
||||
static void pentium_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
u32 loaddr, hi, lotype;
|
||||
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
|
||||
printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
|
||||
if (lotype&(1<<5))
|
||||
printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
|
||||
|
||||
printk(KERN_EMERG
|
||||
"CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n",
|
||||
smp_processor_id(), loaddr, lotype);
|
||||
|
||||
if (lotype & (1<<5)) {
|
||||
printk(KERN_EMERG
|
||||
"CPU#%d: Possible thermal failure (CPU on fire ?).\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE */
|
||||
/* Set up machine check reporting for processors with Intel style MCE: */
|
||||
void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
|
||||
/*Check for MCE support */
|
||||
/* Check for MCE support: */
|
||||
if (!cpu_has(c, X86_FEATURE_MCE))
|
||||
return;
|
||||
|
||||
/* Default P5 to off as its often misconnected */
|
||||
#ifdef CONFIG_X86_OLD_MCE
|
||||
/* Default P5 to off as its often misconnected: */
|
||||
if (mce_disabled != -1)
|
||||
return;
|
||||
#endif
|
||||
|
||||
machine_check_vector = pentium_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
||||
/* Read registers before enabling */
|
||||
/* Read registers before enabling: */
|
||||
rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
|
||||
rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
|
||||
printk(KERN_INFO "Intel old style machine check architecture supported.\n");
|
||||
printk(KERN_INFO
|
||||
"Intel old style machine check architecture supported.\n");
|
||||
|
||||
/* Enable MCE */
|
||||
/* Enable MCE: */
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
|
||||
printk(KERN_INFO
|
||||
"Intel old style machine check reporting enabled on CPU#%d.\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
@ -2,11 +2,10 @@
|
||||
* P6 specific Machine Check Exception Reporting
|
||||
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
@ -18,9 +17,9 @@
|
||||
/* Machine Check Handler For PII/PIII */
|
||||
static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
int recover = 1;
|
||||
u32 alow, ahigh, high, low;
|
||||
u32 mcgstl, mcgsth;
|
||||
int recover = 1;
|
||||
int i;
|
||||
|
||||
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
@ -35,12 +34,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
if (high & (1<<31)) {
|
||||
char misc[20];
|
||||
char addr[24];
|
||||
misc[0] = addr[0] = '\0';
|
||||
|
||||
misc[0] = '\0';
|
||||
addr[0] = '\0';
|
||||
|
||||
if (high & (1<<29))
|
||||
recover |= 1;
|
||||
if (high & (1<<25))
|
||||
recover |= 2;
|
||||
high &= ~(1<<31);
|
||||
|
||||
if (high & (1<<27)) {
|
||||
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
|
||||
snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
|
||||
@ -49,6 +52,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
|
||||
snprintf(addr, 24, " at %08x%08x", ahigh, alow);
|
||||
}
|
||||
|
||||
printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
|
||||
smp_processor_id(), i, high, low, misc, addr);
|
||||
}
|
||||
@ -63,16 +67,17 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
/*
|
||||
* Do not clear the MSR_IA32_MCi_STATUS if the error is not
|
||||
* recoverable/continuable.This will allow BIOS to look at the MSRs
|
||||
* for errors if the OS could not log the error.
|
||||
* for errors if the OS could not log the error:
|
||||
*/
|
||||
for (i = 0; i < nr_mce_banks; i++) {
|
||||
unsigned int msr;
|
||||
|
||||
msr = MSR_IA32_MC0_STATUS+i*4;
|
||||
rdmsr(msr, low, high);
|
||||
if (high & (1<<31)) {
|
||||
/* Clear it */
|
||||
/* Clear it: */
|
||||
wrmsr(msr, 0UL, 0UL);
|
||||
/* Serialize */
|
||||
/* Serialize: */
|
||||
wmb();
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
@ -81,7 +86,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE */
|
||||
/* Set up machine check reporting for processors with Intel style MCE: */
|
||||
void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
@ -97,6 +102,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
|
||||
|
||||
/* Ok machine check is available */
|
||||
machine_check_vector = intel_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
||||
printk(KERN_INFO "Intel machine check architecture supported.\n");
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*
|
||||
* Thermal throttle event support code (such as syslog messaging and rate
|
||||
* limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
|
||||
*
|
||||
* This allows consistent reporting of CPU thermal throttle events.
|
||||
*
|
||||
* Maintains a counter in /sys that keeps track of the number of thermal
|
||||
@ -13,43 +13,43 @@
|
||||
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
|
||||
* Inspired by Ross Biro's and Al Borchers' counter code.
|
||||
*/
|
||||
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/jiffies.h>
|
||||
|
||||
#include <asm/therm_throt.h>
|
||||
|
||||
/* How long to wait between reporting thermal events */
|
||||
#define CHECK_INTERVAL (300 * HZ)
|
||||
#define CHECK_INTERVAL (300 * HZ)
|
||||
|
||||
static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
|
||||
static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
|
||||
atomic_t therm_throt_en = ATOMIC_INIT(0);
|
||||
|
||||
atomic_t therm_throt_en = ATOMIC_INIT(0);
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
#define define_therm_throt_sysdev_one_ro(_name) \
|
||||
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
|
||||
#define define_therm_throt_sysdev_one_ro(_name) \
|
||||
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
|
||||
|
||||
#define define_therm_throt_sysdev_show_func(name) \
|
||||
static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
|
||||
struct sysdev_attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
unsigned int cpu = dev->id; \
|
||||
ssize_t ret; \
|
||||
\
|
||||
preempt_disable(); /* CPU hotplug */ \
|
||||
if (cpu_online(cpu)) \
|
||||
ret = sprintf(buf, "%lu\n", \
|
||||
per_cpu(thermal_throttle_##name, cpu)); \
|
||||
else \
|
||||
ret = 0; \
|
||||
preempt_enable(); \
|
||||
\
|
||||
return ret; \
|
||||
#define define_therm_throt_sysdev_show_func(name) \
|
||||
static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
|
||||
struct sysdev_attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
unsigned int cpu = dev->id; \
|
||||
ssize_t ret; \
|
||||
\
|
||||
preempt_disable(); /* CPU hotplug */ \
|
||||
if (cpu_online(cpu)) \
|
||||
ret = sprintf(buf, "%lu\n", \
|
||||
per_cpu(thermal_throttle_##name, cpu)); \
|
||||
else \
|
||||
ret = 0; \
|
||||
preempt_enable(); \
|
||||
\
|
||||
return ret; \
|
||||
}
|
||||
|
||||
define_therm_throt_sysdev_show_func(count);
|
||||
@ -61,8 +61,8 @@ static struct attribute *thermal_throttle_attrs[] = {
|
||||
};
|
||||
|
||||
static struct attribute_group thermal_throttle_attr_group = {
|
||||
.attrs = thermal_throttle_attrs,
|
||||
.name = "thermal_throttle"
|
||||
.attrs = thermal_throttle_attrs,
|
||||
.name = "thermal_throttle"
|
||||
};
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
@ -110,10 +110,11 @@ int therm_throt_process(int curr)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
/* Add/Remove thermal_throttle interface for CPU device */
|
||||
/* Add/Remove thermal_throttle interface for CPU device: */
|
||||
static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
|
||||
{
|
||||
return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
|
||||
return sysfs_create_group(&sys_dev->kobj,
|
||||
&thermal_throttle_attr_group);
|
||||
}
|
||||
|
||||
static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
|
||||
@ -121,19 +122,21 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
|
||||
sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
|
||||
}
|
||||
|
||||
/* Mutex protecting device creation against CPU hotplug */
|
||||
/* Mutex protecting device creation against CPU hotplug: */
|
||||
static DEFINE_MUTEX(therm_cpu_lock);
|
||||
|
||||
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
|
||||
static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
static __cpuinit int
|
||||
thermal_throttle_cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
struct sys_device *sys_dev;
|
||||
int err = 0;
|
||||
|
||||
sys_dev = get_cpu_sysdev(cpu);
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_UP_PREPARE_FROZEN:
|
||||
|
@ -17,7 +17,7 @@ static void default_threshold_interrupt(void)
|
||||
|
||||
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
|
||||
|
||||
asmlinkage void mce_threshold_interrupt(void)
|
||||
asmlinkage void smp_threshold_interrupt(void)
|
||||
{
|
||||
exit_idle();
|
||||
irq_enter();
|
||||
|
@ -2,11 +2,10 @@
|
||||
* IDT Winchip specific Machine Check Exception Reporting
|
||||
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
@ -14,7 +13,7 @@
|
||||
|
||||
#include "mce.h"
|
||||
|
||||
/* Machine check handler for WinChip C6 */
|
||||
/* Machine check handler for WinChip C6: */
|
||||
static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
|
||||
@ -25,12 +24,18 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code)
|
||||
void winchip_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 lo, hi;
|
||||
|
||||
machine_check_vector = winchip_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
||||
rdmsr(MSR_IDT_FCR1, lo, hi);
|
||||
lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */
|
||||
lo &= ~(1<<4); /* Enable MCE */
|
||||
wrmsr(MSR_IDT_FCR1, lo, hi);
|
||||
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
|
||||
|
||||
printk(KERN_INFO
|
||||
"Winchip machine check reporting enabled on CPU#0.\n");
|
||||
}
|
||||
|
@ -963,6 +963,8 @@ END(\sym)
|
||||
#ifdef CONFIG_SMP
|
||||
apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
|
||||
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
|
||||
apicinterrupt REBOOT_VECTOR \
|
||||
reboot_interrupt smp_reboot_interrupt
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_UV
|
||||
@ -994,10 +996,15 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
|
||||
#endif
|
||||
|
||||
apicinterrupt THRESHOLD_APIC_VECTOR \
|
||||
threshold_interrupt mce_threshold_interrupt
|
||||
threshold_interrupt smp_threshold_interrupt
|
||||
apicinterrupt THERMAL_APIC_VECTOR \
|
||||
thermal_interrupt smp_thermal_interrupt
|
||||
|
||||
#ifdef CONFIG_X86_MCE
|
||||
apicinterrupt MCE_SELF_VECTOR \
|
||||
mce_self_interrupt smp_mce_self_interrupt
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
|
||||
call_function_single_interrupt smp_call_function_single_interrupt
|
||||
@ -1379,7 +1386,7 @@ errorentry xen_stack_segment do_stack_segment
|
||||
errorentry general_protection do_general_protection
|
||||
errorentry page_fault do_page_fault
|
||||
#ifdef CONFIG_X86_MCE
|
||||
paranoidzeroentry machine_check do_machine_check
|
||||
paranoidzeroentry machine_check *machine_check_vector(%rip)
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <asm/io_apic.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/idle.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/hw_irq.h>
|
||||
|
||||
atomic_t irq_err_count;
|
||||
@ -96,12 +97,22 @@ static int show_other_interrupts(struct seq_file *p, int prec)
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
|
||||
seq_printf(p, " Thermal event interrupts\n");
|
||||
# ifdef CONFIG_X86_64
|
||||
# ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
seq_printf(p, "%*s: ", prec, "THR");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
|
||||
seq_printf(p, " Threshold APIC interrupts\n");
|
||||
# endif
|
||||
#endif
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
seq_printf(p, "%*s: ", prec, "MCE");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
|
||||
seq_printf(p, " Machine check exceptions\n");
|
||||
seq_printf(p, "%*s: ", prec, "MCP");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
|
||||
seq_printf(p, " Machine check polls\n");
|
||||
#endif
|
||||
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
|
||||
#if defined(CONFIG_X86_IO_APIC)
|
||||
@ -185,9 +196,13 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
|
||||
#endif
|
||||
#ifdef CONFIG_X86_MCE
|
||||
sum += irq_stats(cpu)->irq_thermal_count;
|
||||
# ifdef CONFIG_X86_64
|
||||
# ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
sum += irq_stats(cpu)->irq_threshold_count;
|
||||
# endif
|
||||
#endif
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
sum += per_cpu(mce_exception_count, cpu);
|
||||
sum += per_cpu(mce_poll_count, cpu);
|
||||
#endif
|
||||
return sum;
|
||||
}
|
||||
|
@ -173,6 +173,9 @@ static void __init smp_intr_init(void)
|
||||
/* Low priority IPI to cleanup after moving an irq */
|
||||
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
|
||||
set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
|
||||
|
||||
/* IPI used for rebooting/stopping */
|
||||
alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
|
||||
#endif
|
||||
#endif /* CONFIG_SMP */
|
||||
}
|
||||
|
@ -24,11 +24,11 @@
|
||||
#include <asm/ucontext.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/proto.h>
|
||||
#include <asm/ia32_unistd.h>
|
||||
#include <asm/mce.h>
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#include <asm/syscall.h>
|
||||
@ -856,10 +856,10 @@ static void do_signal(struct pt_regs *regs)
|
||||
void
|
||||
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
{
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
/* notify userspace of pending MCEs */
|
||||
if (thread_info_flags & _TIF_MCE_NOTIFY)
|
||||
mce_notify_user();
|
||||
mce_notify_process();
|
||||
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
|
||||
|
||||
/* deal with pending signal delivery */
|
||||
|
@ -150,14 +150,40 @@ void native_send_call_func_ipi(const struct cpumask *mask)
|
||||
* this function calls the 'stop' function on all other CPUs in the system.
|
||||
*/
|
||||
|
||||
asmlinkage void smp_reboot_interrupt(void)
|
||||
{
|
||||
ack_APIC_irq();
|
||||
irq_enter();
|
||||
stop_this_cpu(NULL);
|
||||
irq_exit();
|
||||
}
|
||||
|
||||
static void native_smp_send_stop(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long wait;
|
||||
|
||||
if (reboot_force)
|
||||
return;
|
||||
|
||||
smp_call_function(stop_this_cpu, NULL, 0);
|
||||
/*
|
||||
* Use an own vector here because smp_call_function
|
||||
* does lots of things not suitable in a panic situation.
|
||||
* On most systems we could also use an NMI here,
|
||||
* but there are a few systems around where NMI
|
||||
* is problematic so stay with an non NMI for now
|
||||
* (this implies we cannot stop CPUs spinning with irq off
|
||||
* currently)
|
||||
*/
|
||||
if (num_online_cpus() > 1) {
|
||||
apic->send_IPI_allbutself(REBOOT_VECTOR);
|
||||
|
||||
/* Don't wait longer than a second */
|
||||
wait = USEC_PER_SEC;
|
||||
while (num_online_cpus() > 1 && wait--)
|
||||
udelay(1);
|
||||
}
|
||||
|
||||
local_irq_save(flags);
|
||||
disable_local_APIC();
|
||||
local_irq_restore(flags);
|
||||
|
@ -798,15 +798,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
|
||||
|
||||
return new_kesp;
|
||||
}
|
||||
#else
|
||||
#endif
|
||||
|
||||
asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
|
||||
{
|
||||
}
|
||||
|
||||
asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
|
||||
asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 'math_state_restore()' saves the current math information in the
|
||||
|
@ -757,6 +757,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
|
||||
wake_up_idle_cpu(cpu);
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(add_timer_on);
|
||||
|
||||
/**
|
||||
* del_timer - deactive a timer.
|
||||
|
Loading…
x
Reference in New Issue
Block a user