Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
Pull EDAC fixes from Borislav Petkov: - EDAC core error path fix, from Denis Kirjanov. - Generalization of AMD MCE bank names and some minor error reporting improvements. - EDAC core cleanups and simplifications, from Wei Yongjun. - amd64_edac fixes for sysfs-reported values, from Josh Hunt. - some heavy amd64_edac error reporting path shaving, leading to removing a bunch of code. - amd64_edac error injection method improvements. - EDAC core cleanups and fixes * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: (24 commits) EDAC, pci_sysfs: Use for_each_pci_dev to simplify the code EDAC: Handle error path in edac_mc_sysfs_init() properly MCE, AMD: Dump error status MCE, AMD: Report decoded error type first MCE, AMD: Dump CPU f/m/s triple with the error MCE, AMD: Remove functional unit references EDAC: Convert to use simple_open() EDAC, Calxeda highbank: Convert to use simple_open() EDAC: Fix mc size reported in sysfs EDAC: Fix csrow size reported in sysfs EDAC: Pass mci parent EDAC: Add memory controller flags amd64_edac: Fix csrows size and pages computation amd64_edac: Use DBAM_DIMM macro amd64_edac: Fix K8 chip select reporting amd64_edac: Reorganize error reporting path amd64_edac: Do not check whether error address is valid amd64_edac: Improve error injection amd64_edac: Cleanup error injection code amd64_edac: Small fixlets and cleanups ...
This commit is contained in:
commit
9ada9fd5df
@ -42,10 +42,10 @@ config EDAC_LEGACY_SYSFS
|
|||||||
config EDAC_DEBUG
|
config EDAC_DEBUG
|
||||||
bool "Debugging"
|
bool "Debugging"
|
||||||
help
|
help
|
||||||
This turns on debugging information for the entire EDAC
|
This turns on debugging information for the entire EDAC subsystem.
|
||||||
sub-system. You can insert module with "debug_level=x", current
|
You do so by inserting edac_module with "edac_debug_level=x." Valid
|
||||||
there're four debug levels (x=0,1,2,3 from low to high).
|
levels are 0-4 (from low to high) and by default it is set to 2.
|
||||||
Usually you should select 'N'.
|
Usually you should select 'N' here.
|
||||||
|
|
||||||
config EDAC_DECODE_MCE
|
config EDAC_DECODE_MCE
|
||||||
tristate "Decode MCEs in human-readable form (only on AMD for now)"
|
tristate "Decode MCEs in human-readable form (only on AMD for now)"
|
||||||
|
@ -60,8 +60,8 @@ struct scrubrate {
|
|||||||
{ 0x00, 0UL}, /* scrubbing off */
|
{ 0x00, 0UL}, /* scrubbing off */
|
||||||
};
|
};
|
||||||
|
|
||||||
static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
|
int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
|
||||||
u32 *val, const char *func)
|
u32 *val, const char *func)
|
||||||
{
|
{
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
@ -423,7 +423,6 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
|
|||||||
u64 *hole_offset, u64 *hole_size)
|
u64 *hole_offset, u64 *hole_size)
|
||||||
{
|
{
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
u64 base;
|
|
||||||
|
|
||||||
/* only revE and later have the DRAM Hole Address Register */
|
/* only revE and later have the DRAM Hole Address Register */
|
||||||
if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) {
|
if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) {
|
||||||
@ -462,10 +461,8 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
|
|||||||
* addresses in the hole so that they start at 0x100000000.
|
* addresses in the hole so that they start at 0x100000000.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
base = dhar_base(pvt);
|
*hole_base = dhar_base(pvt);
|
||||||
|
*hole_size = (1ULL << 32) - *hole_base;
|
||||||
*hole_base = base;
|
|
||||||
*hole_size = (0x1ull << 32) - base;
|
|
||||||
|
|
||||||
if (boot_cpu_data.x86 > 0xf)
|
if (boot_cpu_data.x86 > 0xf)
|
||||||
*hole_offset = f10_dhar_offset(pvt);
|
*hole_offset = f10_dhar_offset(pvt);
|
||||||
@ -513,15 +510,15 @@ static u64 sys_addr_to_dram_addr(struct mem_ctl_info *mci, u64 sys_addr)
|
|||||||
{
|
{
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
|
u64 dram_base, hole_base, hole_offset, hole_size, dram_addr;
|
||||||
int ret = 0;
|
int ret;
|
||||||
|
|
||||||
dram_base = get_dram_base(pvt, pvt->mc_node_id);
|
dram_base = get_dram_base(pvt, pvt->mc_node_id);
|
||||||
|
|
||||||
ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
|
ret = amd64_get_dram_hole_info(mci, &hole_base, &hole_offset,
|
||||||
&hole_size);
|
&hole_size);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
if ((sys_addr >= (1ull << 32)) &&
|
if ((sys_addr >= (1ULL << 32)) &&
|
||||||
(sys_addr < ((1ull << 32) + hole_size))) {
|
(sys_addr < ((1ULL << 32) + hole_size))) {
|
||||||
/* use DHAR to translate SysAddr to DramAddr */
|
/* use DHAR to translate SysAddr to DramAddr */
|
||||||
dram_addr = sys_addr - hole_offset;
|
dram_addr = sys_addr - hole_offset;
|
||||||
|
|
||||||
@ -712,10 +709,10 @@ static inline u64 input_addr_to_sys_addr(struct mem_ctl_info *mci,
|
|||||||
|
|
||||||
/* Map the Error address to a PAGE and PAGE OFFSET. */
|
/* Map the Error address to a PAGE and PAGE OFFSET. */
|
||||||
static inline void error_address_to_page_and_offset(u64 error_address,
|
static inline void error_address_to_page_and_offset(u64 error_address,
|
||||||
u32 *page, u32 *offset)
|
struct err_info *err)
|
||||||
{
|
{
|
||||||
*page = (u32) (error_address >> PAGE_SHIFT);
|
err->page = (u32) (error_address >> PAGE_SHIFT);
|
||||||
*offset = ((u32) error_address) & ~PAGE_MASK;
|
err->offset = ((u32) error_address) & ~PAGE_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1026,59 +1023,44 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
|
static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
|
||||||
u16 syndrome)
|
struct err_info *err)
|
||||||
{
|
{
|
||||||
struct mem_ctl_info *src_mci;
|
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
int channel, csrow;
|
|
||||||
u32 page, offset;
|
|
||||||
|
|
||||||
error_address_to_page_and_offset(sys_addr, &page, &offset);
|
error_address_to_page_and_offset(sys_addr, err);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find out which node the error address belongs to. This may be
|
* Find out which node the error address belongs to. This may be
|
||||||
* different from the node that detected the error.
|
* different from the node that detected the error.
|
||||||
*/
|
*/
|
||||||
src_mci = find_mc_by_sys_addr(mci, sys_addr);
|
err->src_mci = find_mc_by_sys_addr(mci, sys_addr);
|
||||||
if (!src_mci) {
|
if (!err->src_mci) {
|
||||||
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
|
amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
|
||||||
(unsigned long)sys_addr);
|
(unsigned long)sys_addr);
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
|
err->err_code = ERR_NODE;
|
||||||
page, offset, syndrome,
|
|
||||||
-1, -1, -1,
|
|
||||||
"failed to map error addr to a node",
|
|
||||||
"");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now map the sys_addr to a CSROW */
|
/* Now map the sys_addr to a CSROW */
|
||||||
csrow = sys_addr_to_csrow(src_mci, sys_addr);
|
err->csrow = sys_addr_to_csrow(err->src_mci, sys_addr);
|
||||||
if (csrow < 0) {
|
if (err->csrow < 0) {
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
|
err->err_code = ERR_CSROW;
|
||||||
page, offset, syndrome,
|
|
||||||
-1, -1, -1,
|
|
||||||
"failed to map error addr to a csrow",
|
|
||||||
"");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* CHIPKILL enabled */
|
/* CHIPKILL enabled */
|
||||||
if (pvt->nbcfg & NBCFG_CHIPKILL) {
|
if (pvt->nbcfg & NBCFG_CHIPKILL) {
|
||||||
channel = get_channel_from_ecc_syndrome(mci, syndrome);
|
err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
|
||||||
if (channel < 0) {
|
if (err->channel < 0) {
|
||||||
/*
|
/*
|
||||||
* Syndrome didn't map, so we don't know which of the
|
* Syndrome didn't map, so we don't know which of the
|
||||||
* 2 DIMMs is in error. So we need to ID 'both' of them
|
* 2 DIMMs is in error. So we need to ID 'both' of them
|
||||||
* as suspect.
|
* as suspect.
|
||||||
*/
|
*/
|
||||||
amd64_mc_warn(src_mci, "unknown syndrome 0x%04x - "
|
amd64_mc_warn(err->src_mci, "unknown syndrome 0x%04x - "
|
||||||
"possible error reporting race\n",
|
"possible error reporting race\n",
|
||||||
syndrome);
|
err->syndrome);
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
|
err->err_code = ERR_CHANNEL;
|
||||||
page, offset, syndrome,
|
|
||||||
csrow, -1, -1,
|
|
||||||
"unknown syndrome - possible error reporting race",
|
|
||||||
"");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -1090,13 +1072,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
|
|||||||
* was obtained from email communication with someone at AMD.
|
* was obtained from email communication with someone at AMD.
|
||||||
* (Wish the email was placed in this comment - norsk)
|
* (Wish the email was placed in this comment - norsk)
|
||||||
*/
|
*/
|
||||||
channel = ((sys_addr & BIT(3)) != 0);
|
err->channel = ((sys_addr & BIT(3)) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, src_mci, 1,
|
|
||||||
page, offset, syndrome,
|
|
||||||
csrow, channel, -1,
|
|
||||||
"", "");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ddr2_cs_size(unsigned i, bool dct_width)
|
static int ddr2_cs_size(unsigned i, bool dct_width)
|
||||||
@ -1482,7 +1459,7 @@ static u64 f1x_swap_interleaved_region(struct amd64_pvt *pvt, u64 sys_addr)
|
|||||||
|
|
||||||
/* For a given @dram_range, check if @sys_addr falls within it. */
|
/* For a given @dram_range, check if @sys_addr falls within it. */
|
||||||
static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
|
static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
|
||||||
u64 sys_addr, int *nid, int *chan_sel)
|
u64 sys_addr, int *chan_sel)
|
||||||
{
|
{
|
||||||
int cs_found = -EINVAL;
|
int cs_found = -EINVAL;
|
||||||
u64 chan_addr;
|
u64 chan_addr;
|
||||||
@ -1555,15 +1532,14 @@ static int f1x_match_to_this_node(struct amd64_pvt *pvt, unsigned range,
|
|||||||
|
|
||||||
cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
|
cs_found = f1x_lookup_addr_in_dct(chan_addr, node_id, channel);
|
||||||
|
|
||||||
if (cs_found >= 0) {
|
if (cs_found >= 0)
|
||||||
*nid = node_id;
|
|
||||||
*chan_sel = channel;
|
*chan_sel = channel;
|
||||||
}
|
|
||||||
return cs_found;
|
return cs_found;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
|
static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
|
||||||
int *node, int *chan_sel)
|
int *chan_sel)
|
||||||
{
|
{
|
||||||
int cs_found = -EINVAL;
|
int cs_found = -EINVAL;
|
||||||
unsigned range;
|
unsigned range;
|
||||||
@ -1577,8 +1553,7 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
|
|||||||
(get_dram_limit(pvt, range) >= sys_addr)) {
|
(get_dram_limit(pvt, range) >= sys_addr)) {
|
||||||
|
|
||||||
cs_found = f1x_match_to_this_node(pvt, range,
|
cs_found = f1x_match_to_this_node(pvt, range,
|
||||||
sys_addr, node,
|
sys_addr, chan_sel);
|
||||||
chan_sel);
|
|
||||||
if (cs_found >= 0)
|
if (cs_found >= 0)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1594,22 +1569,15 @@ static int f1x_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr,
|
|||||||
* (MCX_ADDR).
|
* (MCX_ADDR).
|
||||||
*/
|
*/
|
||||||
static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
|
static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
|
||||||
u16 syndrome)
|
struct err_info *err)
|
||||||
{
|
{
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
u32 page, offset;
|
|
||||||
int nid, csrow, chan = 0;
|
|
||||||
|
|
||||||
error_address_to_page_and_offset(sys_addr, &page, &offset);
|
error_address_to_page_and_offset(sys_addr, err);
|
||||||
|
|
||||||
csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan);
|
err->csrow = f1x_translate_sysaddr_to_cs(pvt, sys_addr, &err->channel);
|
||||||
|
if (err->csrow < 0) {
|
||||||
if (csrow < 0) {
|
err->err_code = ERR_CSROW;
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
|
|
||||||
page, offset, syndrome,
|
|
||||||
-1, -1, -1,
|
|
||||||
"failed to map error addr to a csrow",
|
|
||||||
"");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1619,12 +1587,7 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
|
|||||||
* this point.
|
* this point.
|
||||||
*/
|
*/
|
||||||
if (dct_ganging_enabled(pvt))
|
if (dct_ganging_enabled(pvt))
|
||||||
chan = get_channel_from_ecc_syndrome(mci, syndrome);
|
err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome);
|
||||||
|
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
|
|
||||||
page, offset, syndrome,
|
|
||||||
csrow, chan, -1,
|
|
||||||
"", "");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1633,14 +1596,11 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr,
|
|||||||
*/
|
*/
|
||||||
static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
|
static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
|
||||||
{
|
{
|
||||||
int dimm, size0, size1, factor = 0;
|
int dimm, size0, size1;
|
||||||
u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
|
u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases;
|
||||||
u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
|
u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0;
|
||||||
|
|
||||||
if (boot_cpu_data.x86 == 0xf) {
|
if (boot_cpu_data.x86 == 0xf) {
|
||||||
if (pvt->dclr0 & WIDTH_128)
|
|
||||||
factor = 1;
|
|
||||||
|
|
||||||
/* K8 families < revF not supported yet */
|
/* K8 families < revF not supported yet */
|
||||||
if (pvt->ext_model < K8_REV_F)
|
if (pvt->ext_model < K8_REV_F)
|
||||||
return;
|
return;
|
||||||
@ -1671,8 +1631,8 @@ static void amd64_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
|
|||||||
DBAM_DIMM(dimm, dbam));
|
DBAM_DIMM(dimm, dbam));
|
||||||
|
|
||||||
amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
|
amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
|
||||||
dimm * 2, size0 << factor,
|
dimm * 2, size0,
|
||||||
dimm * 2 + 1, size1 << factor);
|
dimm * 2 + 1, size1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1893,101 +1853,56 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
|
|||||||
return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
|
return map_err_sym_to_channel(err_sym, pvt->ecc_sym_sz);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static void __log_bus_error(struct mem_ctl_info *mci, struct err_info *err,
|
||||||
* Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR
|
u8 ecc_type)
|
||||||
* ADDRESS and process.
|
|
||||||
*/
|
|
||||||
static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
|
|
||||||
{
|
{
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
enum hw_event_mc_err_type err_type;
|
||||||
u64 sys_addr;
|
const char *string;
|
||||||
u16 syndrome;
|
|
||||||
|
|
||||||
/* Ensure that the Error Address is VALID */
|
if (ecc_type == 2)
|
||||||
if (!(m->status & MCI_STATUS_ADDRV)) {
|
err_type = HW_EVENT_ERR_CORRECTED;
|
||||||
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
|
else if (ecc_type == 1)
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
|
err_type = HW_EVENT_ERR_UNCORRECTED;
|
||||||
0, 0, 0,
|
else {
|
||||||
-1, -1, -1,
|
WARN(1, "Something is rotten in the state of Denmark.\n");
|
||||||
"HW has no ERROR_ADDRESS available",
|
|
||||||
"");
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
sys_addr = get_error_address(m);
|
switch (err->err_code) {
|
||||||
syndrome = extract_syndrome(m->status);
|
case DECODE_OK:
|
||||||
|
string = "";
|
||||||
amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
|
break;
|
||||||
|
case ERR_NODE:
|
||||||
pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome);
|
string = "Failed to map error addr to a node";
|
||||||
}
|
break;
|
||||||
|
case ERR_CSROW:
|
||||||
/* Handle any Un-correctable Errors (UEs) */
|
string = "Failed to map error addr to a csrow";
|
||||||
static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m)
|
break;
|
||||||
{
|
case ERR_CHANNEL:
|
||||||
struct mem_ctl_info *log_mci, *src_mci = NULL;
|
string = "unknown syndrome - possible error reporting race";
|
||||||
int csrow;
|
break;
|
||||||
u64 sys_addr;
|
default:
|
||||||
u32 page, offset;
|
string = "WTF error";
|
||||||
|
break;
|
||||||
log_mci = mci;
|
|
||||||
|
|
||||||
if (!(m->status & MCI_STATUS_ADDRV)) {
|
|
||||||
amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
|
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
|
|
||||||
0, 0, 0,
|
|
||||||
-1, -1, -1,
|
|
||||||
"HW has no ERROR_ADDRESS available",
|
|
||||||
"");
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sys_addr = get_error_address(m);
|
edac_mc_handle_error(err_type, mci, 1,
|
||||||
error_address_to_page_and_offset(sys_addr, &page, &offset);
|
err->page, err->offset, err->syndrome,
|
||||||
|
err->csrow, err->channel, -1,
|
||||||
/*
|
string, "");
|
||||||
* Find out which node the error address belongs to. This may be
|
|
||||||
* different from the node that detected the error.
|
|
||||||
*/
|
|
||||||
src_mci = find_mc_by_sys_addr(mci, sys_addr);
|
|
||||||
if (!src_mci) {
|
|
||||||
amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
|
|
||||||
(unsigned long)sys_addr);
|
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
|
|
||||||
page, offset, 0,
|
|
||||||
-1, -1, -1,
|
|
||||||
"ERROR ADDRESS NOT mapped to a MC",
|
|
||||||
"");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
log_mci = src_mci;
|
|
||||||
|
|
||||||
csrow = sys_addr_to_csrow(log_mci, sys_addr);
|
|
||||||
if (csrow < 0) {
|
|
||||||
amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
|
|
||||||
(unsigned long)sys_addr);
|
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
|
|
||||||
page, offset, 0,
|
|
||||||
-1, -1, -1,
|
|
||||||
"ERROR ADDRESS NOT mapped to CS",
|
|
||||||
"");
|
|
||||||
} else {
|
|
||||||
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
|
|
||||||
page, offset, 0,
|
|
||||||
csrow, -1, -1,
|
|
||||||
"", "");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
|
static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
|
||||||
struct mce *m)
|
struct mce *m)
|
||||||
{
|
{
|
||||||
u16 ec = EC(m->status);
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
u8 xec = XEC(m->status, 0x1f);
|
|
||||||
u8 ecc_type = (m->status >> 45) & 0x3;
|
u8 ecc_type = (m->status >> 45) & 0x3;
|
||||||
|
u8 xec = XEC(m->status, 0x1f);
|
||||||
|
u16 ec = EC(m->status);
|
||||||
|
u64 sys_addr;
|
||||||
|
struct err_info err;
|
||||||
|
|
||||||
/* Bail early out if this was an 'observed' error */
|
/* Bail out early if this was an 'observed' error */
|
||||||
if (PP(ec) == NBSL_PP_OBS)
|
if (PP(ec) == NBSL_PP_OBS)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1995,10 +1910,16 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
|
|||||||
if (xec && xec != F10_NBSL_EXT_ERR_ECC)
|
if (xec && xec != F10_NBSL_EXT_ERR_ECC)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
memset(&err, 0, sizeof(err));
|
||||||
|
|
||||||
|
sys_addr = get_error_address(m);
|
||||||
|
|
||||||
if (ecc_type == 2)
|
if (ecc_type == 2)
|
||||||
amd64_handle_ce(mci, m);
|
err.syndrome = extract_syndrome(m->status);
|
||||||
else if (ecc_type == 1)
|
|
||||||
amd64_handle_ue(mci, m);
|
pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, &err);
|
||||||
|
|
||||||
|
__log_bus_error(mci, &err, ecc_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void amd64_decode_bus_error(int node_id, struct mce *m)
|
void amd64_decode_bus_error(int node_id, struct mce *m)
|
||||||
@ -2166,6 +2087,7 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
|
|||||||
u32 cs_mode, nr_pages;
|
u32 cs_mode, nr_pages;
|
||||||
u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
|
u32 dbam = dct ? pvt->dbam1 : pvt->dbam0;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The math on this doesn't look right on the surface because x/2*4 can
|
* The math on this doesn't look right on the surface because x/2*4 can
|
||||||
* be simplified to x*2 but this expression makes use of the fact that
|
* be simplified to x*2 but this expression makes use of the fact that
|
||||||
@ -2173,13 +2095,13 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
|
|||||||
* number of bits to shift the DBAM register to extract the proper CSROW
|
* number of bits to shift the DBAM register to extract the proper CSROW
|
||||||
* field.
|
* field.
|
||||||
*/
|
*/
|
||||||
cs_mode = (dbam >> ((csrow_nr / 2) * 4)) & 0xF;
|
cs_mode = DBAM_DIMM(csrow_nr / 2, dbam);
|
||||||
|
|
||||||
nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
|
nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT);
|
||||||
|
|
||||||
edac_dbg(0, " (csrow=%d) DBAM map index= %d\n", csrow_nr, cs_mode);
|
edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n",
|
||||||
edac_dbg(0, " nr_pages/channel= %u channel-count = %d\n",
|
csrow_nr, dct, cs_mode);
|
||||||
nr_pages, pvt->channel_count);
|
edac_dbg(0, "nr_pages/channel: %u\n", nr_pages);
|
||||||
|
|
||||||
return nr_pages;
|
return nr_pages;
|
||||||
}
|
}
|
||||||
@ -2190,15 +2112,14 @@ static u32 amd64_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr)
|
|||||||
*/
|
*/
|
||||||
static int init_csrows(struct mem_ctl_info *mci)
|
static int init_csrows(struct mem_ctl_info *mci)
|
||||||
{
|
{
|
||||||
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
struct csrow_info *csrow;
|
struct csrow_info *csrow;
|
||||||
struct dimm_info *dimm;
|
struct dimm_info *dimm;
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
|
||||||
u64 base, mask;
|
|
||||||
u32 val;
|
|
||||||
int i, j, empty = 1;
|
|
||||||
enum mem_type mtype;
|
|
||||||
enum edac_type edac_mode;
|
enum edac_type edac_mode;
|
||||||
|
enum mem_type mtype;
|
||||||
|
int i, j, empty = 1;
|
||||||
int nr_pages = 0;
|
int nr_pages = 0;
|
||||||
|
u32 val;
|
||||||
|
|
||||||
amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
|
amd64_read_pci_cfg(pvt->F3, NBCFG, &val);
|
||||||
|
|
||||||
@ -2208,29 +2129,35 @@ static int init_csrows(struct mem_ctl_info *mci)
|
|||||||
pvt->mc_node_id, val,
|
pvt->mc_node_id, val,
|
||||||
!!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
|
!!(val & NBCFG_CHIPKILL), !!(val & NBCFG_ECC_ENABLE));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We iterate over DCT0 here but we look at DCT1 in parallel, if needed.
|
||||||
|
*/
|
||||||
for_each_chip_select(i, 0, pvt) {
|
for_each_chip_select(i, 0, pvt) {
|
||||||
csrow = mci->csrows[i];
|
bool row_dct0 = !!csrow_enabled(i, 0, pvt);
|
||||||
|
bool row_dct1 = false;
|
||||||
|
|
||||||
if (!csrow_enabled(i, 0, pvt) && !csrow_enabled(i, 1, pvt)) {
|
if (boot_cpu_data.x86 != 0xf)
|
||||||
edac_dbg(1, "----CSROW %d VALID for MC node %d\n",
|
row_dct1 = !!csrow_enabled(i, 1, pvt);
|
||||||
i, pvt->mc_node_id);
|
|
||||||
|
if (!row_dct0 && !row_dct1)
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
|
csrow = mci->csrows[i];
|
||||||
empty = 0;
|
empty = 0;
|
||||||
if (csrow_enabled(i, 0, pvt))
|
|
||||||
nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
|
|
||||||
if (csrow_enabled(i, 1, pvt))
|
|
||||||
nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
|
|
||||||
|
|
||||||
get_cs_base_and_mask(pvt, i, 0, &base, &mask);
|
edac_dbg(1, "MC node: %d, csrow: %d\n",
|
||||||
/* 8 bytes of resolution */
|
pvt->mc_node_id, i);
|
||||||
|
|
||||||
|
if (row_dct0)
|
||||||
|
nr_pages = amd64_csrow_nr_pages(pvt, 0, i);
|
||||||
|
|
||||||
|
/* K8 has only one DCT */
|
||||||
|
if (boot_cpu_data.x86 != 0xf && row_dct1)
|
||||||
|
nr_pages += amd64_csrow_nr_pages(pvt, 1, i);
|
||||||
|
|
||||||
mtype = amd64_determine_memory_type(pvt, i);
|
mtype = amd64_determine_memory_type(pvt, i);
|
||||||
|
|
||||||
edac_dbg(1, " for MC node %d csrow %d:\n", pvt->mc_node_id, i);
|
edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages);
|
||||||
edac_dbg(1, " nr_pages: %u\n",
|
|
||||||
nr_pages * pvt->channel_count);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* determine whether CHIPKILL or JUST ECC or NO ECC is operating
|
* determine whether CHIPKILL or JUST ECC or NO ECC is operating
|
||||||
@ -2247,6 +2174,7 @@ static int init_csrows(struct mem_ctl_info *mci)
|
|||||||
dimm->edac_mode = edac_mode;
|
dimm->edac_mode = edac_mode;
|
||||||
dimm->nr_pages = nr_pages;
|
dimm->nr_pages = nr_pages;
|
||||||
}
|
}
|
||||||
|
csrow->nr_pages = nr_pages;
|
||||||
}
|
}
|
||||||
|
|
||||||
return empty;
|
return empty;
|
||||||
@ -2591,6 +2519,7 @@ static int amd64_init_one_instance(struct pci_dev *F2)
|
|||||||
|
|
||||||
mci->pvt_info = pvt;
|
mci->pvt_info = pvt;
|
||||||
mci->pdev = &pvt->F2->dev;
|
mci->pdev = &pvt->F2->dev;
|
||||||
|
mci->csbased = 1;
|
||||||
|
|
||||||
setup_mci_misc_attrs(mci, fam_type);
|
setup_mci_misc_attrs(mci, fam_type);
|
||||||
|
|
||||||
|
@ -219,7 +219,7 @@
|
|||||||
#define DBAM1 0x180
|
#define DBAM1 0x180
|
||||||
|
|
||||||
/* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */
|
/* Extract the DIMM 'type' on the i'th DIMM from the DBAM reg value passed */
|
||||||
#define DBAM_DIMM(i, reg) ((((reg) >> (4*i))) & 0xF)
|
#define DBAM_DIMM(i, reg) ((((reg) >> (4*(i)))) & 0xF)
|
||||||
|
|
||||||
#define DBAM_MAX_VALUE 11
|
#define DBAM_MAX_VALUE 11
|
||||||
|
|
||||||
@ -267,18 +267,20 @@
|
|||||||
#define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7)
|
#define online_spare_bad_dramcs(pvt, c) (((pvt)->online_spare >> (4 + 4 * (c))) & 0x7)
|
||||||
|
|
||||||
#define F10_NB_ARRAY_ADDR 0xB8
|
#define F10_NB_ARRAY_ADDR 0xB8
|
||||||
#define F10_NB_ARRAY_DRAM_ECC BIT(31)
|
#define F10_NB_ARRAY_DRAM BIT(31)
|
||||||
|
|
||||||
/* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */
|
/* Bits [2:1] are used to select 16-byte section within a 64-byte cacheline */
|
||||||
#define SET_NB_ARRAY_ADDRESS(section) (((section) & 0x3) << 1)
|
#define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1)
|
||||||
|
|
||||||
#define F10_NB_ARRAY_DATA 0xBC
|
#define F10_NB_ARRAY_DATA 0xBC
|
||||||
#define SET_NB_DRAM_INJECTION_WRITE(word, bits) \
|
#define F10_NB_ARR_ECC_WR_REQ BIT(17)
|
||||||
(BIT(((word) & 0xF) + 20) | \
|
#define SET_NB_DRAM_INJECTION_WRITE(inj) \
|
||||||
BIT(17) | bits)
|
(BIT(((inj.word) & 0xF) + 20) | \
|
||||||
#define SET_NB_DRAM_INJECTION_READ(word, bits) \
|
F10_NB_ARR_ECC_WR_REQ | inj.bit_map)
|
||||||
(BIT(((word) & 0xF) + 20) | \
|
#define SET_NB_DRAM_INJECTION_READ(inj) \
|
||||||
BIT(16) | bits)
|
(BIT(((inj.word) & 0xF) + 20) | \
|
||||||
|
BIT(16) | inj.bit_map)
|
||||||
|
|
||||||
|
|
||||||
#define NBCAP 0xE8
|
#define NBCAP 0xE8
|
||||||
#define NBCAP_CHIPKILL BIT(4)
|
#define NBCAP_CHIPKILL BIT(4)
|
||||||
@ -305,9 +307,9 @@ enum amd_families {
|
|||||||
|
|
||||||
/* Error injection control structure */
|
/* Error injection control structure */
|
||||||
struct error_injection {
|
struct error_injection {
|
||||||
u32 section;
|
u32 section;
|
||||||
u32 word;
|
u32 word;
|
||||||
u32 bit_map;
|
u32 bit_map;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* low and high part of PCI config space regs */
|
/* low and high part of PCI config space regs */
|
||||||
@ -374,6 +376,23 @@ struct amd64_pvt {
|
|||||||
struct error_injection injection;
|
struct error_injection injection;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum err_codes {
|
||||||
|
DECODE_OK = 0,
|
||||||
|
ERR_NODE = -1,
|
||||||
|
ERR_CSROW = -2,
|
||||||
|
ERR_CHANNEL = -3,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct err_info {
|
||||||
|
int err_code;
|
||||||
|
struct mem_ctl_info *src_mci;
|
||||||
|
int csrow;
|
||||||
|
int channel;
|
||||||
|
u16 syndrome;
|
||||||
|
u32 page;
|
||||||
|
u32 offset;
|
||||||
|
};
|
||||||
|
|
||||||
static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i)
|
static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i)
|
||||||
{
|
{
|
||||||
u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8;
|
u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8;
|
||||||
@ -447,7 +466,7 @@ static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
|
|||||||
struct low_ops {
|
struct low_ops {
|
||||||
int (*early_channel_count) (struct amd64_pvt *pvt);
|
int (*early_channel_count) (struct amd64_pvt *pvt);
|
||||||
void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
|
void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr,
|
||||||
u16 syndrome);
|
struct err_info *);
|
||||||
int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode);
|
int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode);
|
||||||
int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset,
|
int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset,
|
||||||
u32 *val, const char *func);
|
u32 *val, const char *func);
|
||||||
@ -459,6 +478,8 @@ struct amd64_family_type {
|
|||||||
struct low_ops ops;
|
struct low_ops ops;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
|
||||||
|
u32 *val, const char *func);
|
||||||
int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
|
int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
|
||||||
u32 val, const char *func);
|
u32 val, const char *func);
|
||||||
|
|
||||||
@ -475,3 +496,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
|
|||||||
u64 *hole_offset, u64 *hole_size);
|
u64 *hole_offset, u64 *hole_size);
|
||||||
|
|
||||||
#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
|
#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
|
||||||
|
|
||||||
|
/* Injection helpers */
|
||||||
|
static inline void disable_caches(void *dummy)
|
||||||
|
{
|
||||||
|
write_cr0(read_cr0() | X86_CR0_CD);
|
||||||
|
wbinvd();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void enable_caches(void *dummy)
|
||||||
|
{
|
||||||
|
write_cr0(read_cr0() & ~X86_CR0_CD);
|
||||||
|
}
|
||||||
|
@ -22,20 +22,19 @@ static ssize_t amd64_inject_section_store(struct device *dev,
|
|||||||
struct mem_ctl_info *mci = to_mci(dev);
|
struct mem_ctl_info *mci = to_mci(dev);
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
unsigned long value;
|
unsigned long value;
|
||||||
int ret = 0;
|
int ret;
|
||||||
|
|
||||||
ret = strict_strtoul(data, 10, &value);
|
ret = strict_strtoul(data, 10, &value);
|
||||||
if (ret != -EINVAL) {
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (value > 3) {
|
if (value > 3) {
|
||||||
amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
|
amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
|
||||||
|
|
||||||
pvt->injection.section = (u32) value;
|
|
||||||
return count;
|
|
||||||
}
|
}
|
||||||
return ret;
|
|
||||||
|
pvt->injection.section = (u32) value;
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t amd64_inject_word_show(struct device *dev,
|
static ssize_t amd64_inject_word_show(struct device *dev,
|
||||||
@ -60,20 +59,19 @@ static ssize_t amd64_inject_word_store(struct device *dev,
|
|||||||
struct mem_ctl_info *mci = to_mci(dev);
|
struct mem_ctl_info *mci = to_mci(dev);
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
unsigned long value;
|
unsigned long value;
|
||||||
int ret = 0;
|
int ret;
|
||||||
|
|
||||||
ret = strict_strtoul(data, 10, &value);
|
ret = strict_strtoul(data, 10, &value);
|
||||||
if (ret != -EINVAL) {
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (value > 8) {
|
if (value > 8) {
|
||||||
amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
|
amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
|
||||||
|
|
||||||
pvt->injection.word = (u32) value;
|
|
||||||
return count;
|
|
||||||
}
|
}
|
||||||
return ret;
|
|
||||||
|
pvt->injection.word = (u32) value;
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t amd64_inject_ecc_vector_show(struct device *dev,
|
static ssize_t amd64_inject_ecc_vector_show(struct device *dev,
|
||||||
@ -97,21 +95,19 @@ static ssize_t amd64_inject_ecc_vector_store(struct device *dev,
|
|||||||
struct mem_ctl_info *mci = to_mci(dev);
|
struct mem_ctl_info *mci = to_mci(dev);
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
unsigned long value;
|
unsigned long value;
|
||||||
int ret = 0;
|
int ret;
|
||||||
|
|
||||||
ret = strict_strtoul(data, 16, &value);
|
ret = strict_strtoul(data, 16, &value);
|
||||||
if (ret != -EINVAL) {
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if (value & 0xFFFF0000) {
|
if (value & 0xFFFF0000) {
|
||||||
amd64_warn("%s: invalid EccVector: 0x%lx\n",
|
amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value);
|
||||||
__func__, value);
|
return -EINVAL;
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
pvt->injection.bit_map = (u32) value;
|
|
||||||
return count;
|
|
||||||
}
|
}
|
||||||
return ret;
|
|
||||||
|
pvt->injection.bit_map = (u32) value;
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -126,28 +122,25 @@ static ssize_t amd64_inject_read_store(struct device *dev,
|
|||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
unsigned long value;
|
unsigned long value;
|
||||||
u32 section, word_bits;
|
u32 section, word_bits;
|
||||||
int ret = 0;
|
int ret;
|
||||||
|
|
||||||
ret = strict_strtoul(data, 10, &value);
|
ret = strict_strtoul(data, 10, &value);
|
||||||
if (ret != -EINVAL) {
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
/* Form value to choose 16-byte section of cacheline */
|
/* Form value to choose 16-byte section of cacheline */
|
||||||
section = F10_NB_ARRAY_DRAM_ECC |
|
section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
|
||||||
SET_NB_ARRAY_ADDRESS(pvt->injection.section);
|
|
||||||
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
|
|
||||||
|
|
||||||
word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word,
|
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
|
||||||
pvt->injection.bit_map);
|
|
||||||
|
|
||||||
/* Issue 'word' and 'bit' along with the READ request */
|
word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection);
|
||||||
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
|
|
||||||
|
|
||||||
edac_dbg(0, "section=0x%x word_bits=0x%x\n",
|
/* Issue 'word' and 'bit' along with the READ request */
|
||||||
section, word_bits);
|
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
|
||||||
|
|
||||||
return count;
|
edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
|
||||||
}
|
|
||||||
return ret;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -160,30 +153,43 @@ static ssize_t amd64_inject_write_store(struct device *dev,
|
|||||||
{
|
{
|
||||||
struct mem_ctl_info *mci = to_mci(dev);
|
struct mem_ctl_info *mci = to_mci(dev);
|
||||||
struct amd64_pvt *pvt = mci->pvt_info;
|
struct amd64_pvt *pvt = mci->pvt_info;
|
||||||
|
u32 section, word_bits, tmp;
|
||||||
unsigned long value;
|
unsigned long value;
|
||||||
u32 section, word_bits;
|
int ret;
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
ret = strict_strtoul(data, 10, &value);
|
ret = strict_strtoul(data, 10, &value);
|
||||||
if (ret != -EINVAL) {
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
/* Form value to choose 16-byte section of cacheline */
|
/* Form value to choose 16-byte section of cacheline */
|
||||||
section = F10_NB_ARRAY_DRAM_ECC |
|
section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
|
||||||
SET_NB_ARRAY_ADDRESS(pvt->injection.section);
|
|
||||||
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
|
|
||||||
|
|
||||||
word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word,
|
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
|
||||||
pvt->injection.bit_map);
|
|
||||||
|
|
||||||
/* Issue 'word' and 'bit' along with the READ request */
|
word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection);
|
||||||
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
|
|
||||||
|
|
||||||
edac_dbg(0, "section=0x%x word_bits=0x%x\n",
|
pr_notice_once("Don't forget to decrease MCE polling interval in\n"
|
||||||
section, word_bits);
|
"/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n"
|
||||||
|
"so that you can get the error report faster.\n");
|
||||||
|
|
||||||
return count;
|
on_each_cpu(disable_caches, NULL, 1);
|
||||||
|
|
||||||
|
/* Issue 'word' and 'bit' along with the READ request */
|
||||||
|
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
|
||||||
|
|
||||||
|
retry:
|
||||||
|
/* wait until injection happens */
|
||||||
|
amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp);
|
||||||
|
if (tmp & F10_NB_ARR_ECC_WR_REQ) {
|
||||||
|
cpu_relax();
|
||||||
|
goto retry;
|
||||||
}
|
}
|
||||||
return ret;
|
|
||||||
|
on_each_cpu(enable_caches, NULL, 1);
|
||||||
|
|
||||||
|
edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
|
||||||
|
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -974,20 +974,22 @@ static void edac_ce_error(struct mem_ctl_info *mci,
|
|||||||
long grain)
|
long grain)
|
||||||
{
|
{
|
||||||
unsigned long remapped_page;
|
unsigned long remapped_page;
|
||||||
|
char *msg_aux = "";
|
||||||
|
|
||||||
|
if (*msg)
|
||||||
|
msg_aux = " ";
|
||||||
|
|
||||||
if (edac_mc_get_log_ce()) {
|
if (edac_mc_get_log_ce()) {
|
||||||
if (other_detail && *other_detail)
|
if (other_detail && *other_detail)
|
||||||
edac_mc_printk(mci, KERN_WARNING,
|
edac_mc_printk(mci, KERN_WARNING,
|
||||||
"%d CE %s on %s (%s %s - %s)\n",
|
"%d CE %s%son %s (%s %s - %s)\n",
|
||||||
error_count,
|
error_count, msg, msg_aux, label,
|
||||||
msg, label, location,
|
location, detail, other_detail);
|
||||||
detail, other_detail);
|
|
||||||
else
|
else
|
||||||
edac_mc_printk(mci, KERN_WARNING,
|
edac_mc_printk(mci, KERN_WARNING,
|
||||||
"%d CE %s on %s (%s %s)\n",
|
"%d CE %s%son %s (%s %s)\n",
|
||||||
error_count,
|
error_count, msg, msg_aux, label,
|
||||||
msg, label, location,
|
location, detail);
|
||||||
detail);
|
|
||||||
}
|
}
|
||||||
edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
|
edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
|
||||||
|
|
||||||
@ -1022,27 +1024,31 @@ static void edac_ue_error(struct mem_ctl_info *mci,
|
|||||||
const char *other_detail,
|
const char *other_detail,
|
||||||
const bool enable_per_layer_report)
|
const bool enable_per_layer_report)
|
||||||
{
|
{
|
||||||
|
char *msg_aux = "";
|
||||||
|
|
||||||
|
if (*msg)
|
||||||
|
msg_aux = " ";
|
||||||
|
|
||||||
if (edac_mc_get_log_ue()) {
|
if (edac_mc_get_log_ue()) {
|
||||||
if (other_detail && *other_detail)
|
if (other_detail && *other_detail)
|
||||||
edac_mc_printk(mci, KERN_WARNING,
|
edac_mc_printk(mci, KERN_WARNING,
|
||||||
"%d UE %s on %s (%s %s - %s)\n",
|
"%d UE %s%son %s (%s %s - %s)\n",
|
||||||
error_count,
|
error_count, msg, msg_aux, label,
|
||||||
msg, label, location, detail,
|
location, detail, other_detail);
|
||||||
other_detail);
|
|
||||||
else
|
else
|
||||||
edac_mc_printk(mci, KERN_WARNING,
|
edac_mc_printk(mci, KERN_WARNING,
|
||||||
"%d UE %s on %s (%s %s)\n",
|
"%d UE %s%son %s (%s %s)\n",
|
||||||
error_count,
|
error_count, msg, msg_aux, label,
|
||||||
msg, label, location, detail);
|
location, detail);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (edac_mc_get_panic_on_ue()) {
|
if (edac_mc_get_panic_on_ue()) {
|
||||||
if (other_detail && *other_detail)
|
if (other_detail && *other_detail)
|
||||||
panic("UE %s on %s (%s%s - %s)\n",
|
panic("UE %s%son %s (%s%s - %s)\n",
|
||||||
msg, label, location, detail, other_detail);
|
msg, msg_aux, label, location, detail, other_detail);
|
||||||
else
|
else
|
||||||
panic("UE %s on %s (%s%s)\n",
|
panic("UE %s%son %s (%s%s)\n",
|
||||||
msg, label, location, detail);
|
msg, msg_aux, label, location, detail);
|
||||||
}
|
}
|
||||||
|
|
||||||
edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
|
edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
|
||||||
@ -1101,10 +1107,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
*/
|
*/
|
||||||
for (i = 0; i < mci->n_layers; i++) {
|
for (i = 0; i < mci->n_layers; i++) {
|
||||||
if (pos[i] >= (int)mci->layers[i].size) {
|
if (pos[i] >= (int)mci->layers[i].size) {
|
||||||
if (type == HW_EVENT_ERR_CORRECTED)
|
|
||||||
p = "CE";
|
|
||||||
else
|
|
||||||
p = "UE";
|
|
||||||
|
|
||||||
edac_mc_printk(mci, KERN_ERR,
|
edac_mc_printk(mci, KERN_ERR,
|
||||||
"INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
|
"INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
|
||||||
@ -1136,6 +1138,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
grain = 0;
|
grain = 0;
|
||||||
p = label;
|
p = label;
|
||||||
*p = '\0';
|
*p = '\0';
|
||||||
|
|
||||||
for (i = 0; i < mci->tot_dimms; i++) {
|
for (i = 0; i < mci->tot_dimms; i++) {
|
||||||
struct dimm_info *dimm = mci->dimms[i];
|
struct dimm_info *dimm = mci->dimms[i];
|
||||||
|
|
||||||
@ -1203,6 +1206,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
|
|
||||||
/* Fill the RAM location data */
|
/* Fill the RAM location data */
|
||||||
p = location;
|
p = location;
|
||||||
|
|
||||||
for (i = 0; i < mci->n_layers; i++) {
|
for (i = 0; i < mci->n_layers; i++) {
|
||||||
if (pos[i] < 0)
|
if (pos[i] < 0)
|
||||||
continue;
|
continue;
|
||||||
@ -1215,7 +1219,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
|
|||||||
*(p - 1) = '\0';
|
*(p - 1) = '\0';
|
||||||
|
|
||||||
/* Report the error via the trace interface */
|
/* Report the error via the trace interface */
|
||||||
|
|
||||||
grain_bits = fls_long(grain) + 1;
|
grain_bits = fls_long(grain) + 1;
|
||||||
trace_mc_event(type, msg, label, error_count,
|
trace_mc_event(type, msg, label, error_count,
|
||||||
mci->mc_idx, top_layer, mid_layer, low_layer,
|
mci->mc_idx, top_layer, mid_layer, low_layer,
|
||||||
|
@ -180,6 +180,9 @@ static ssize_t csrow_size_show(struct device *dev,
|
|||||||
int i;
|
int i;
|
||||||
u32 nr_pages = 0;
|
u32 nr_pages = 0;
|
||||||
|
|
||||||
|
if (csrow->mci->csbased)
|
||||||
|
return sprintf(data, "%u\n", PAGES_TO_MiB(csrow->nr_pages));
|
||||||
|
|
||||||
for (i = 0; i < csrow->nr_channels; i++)
|
for (i = 0; i < csrow->nr_channels; i++)
|
||||||
nr_pages += csrow->channels[i]->dimm->nr_pages;
|
nr_pages += csrow->channels[i]->dimm->nr_pages;
|
||||||
return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages));
|
return sprintf(data, "%u\n", PAGES_TO_MiB(nr_pages));
|
||||||
@ -373,6 +376,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
|
|||||||
csrow->dev.bus = &mci->bus;
|
csrow->dev.bus = &mci->bus;
|
||||||
device_initialize(&csrow->dev);
|
device_initialize(&csrow->dev);
|
||||||
csrow->dev.parent = &mci->dev;
|
csrow->dev.parent = &mci->dev;
|
||||||
|
csrow->mci = mci;
|
||||||
dev_set_name(&csrow->dev, "csrow%d", index);
|
dev_set_name(&csrow->dev, "csrow%d", index);
|
||||||
dev_set_drvdata(&csrow->dev, csrow);
|
dev_set_drvdata(&csrow->dev, csrow);
|
||||||
|
|
||||||
@ -777,10 +781,14 @@ static ssize_t mci_size_mb_show(struct device *dev,
|
|||||||
for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) {
|
for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) {
|
||||||
struct csrow_info *csrow = mci->csrows[csrow_idx];
|
struct csrow_info *csrow = mci->csrows[csrow_idx];
|
||||||
|
|
||||||
for (j = 0; j < csrow->nr_channels; j++) {
|
if (csrow->mci->csbased) {
|
||||||
struct dimm_info *dimm = csrow->channels[j]->dimm;
|
total_pages += csrow->nr_pages;
|
||||||
|
} else {
|
||||||
|
for (j = 0; j < csrow->nr_channels; j++) {
|
||||||
|
struct dimm_info *dimm = csrow->channels[j]->dimm;
|
||||||
|
|
||||||
total_pages += dimm->nr_pages;
|
total_pages += dimm->nr_pages;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -838,14 +846,8 @@ static ssize_t edac_fake_inject_write(struct file *file,
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int debugfs_open(struct inode *inode, struct file *file)
|
|
||||||
{
|
|
||||||
file->private_data = inode->i_private;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct file_operations debug_fake_inject_fops = {
|
static const struct file_operations debug_fake_inject_fops = {
|
||||||
.open = debugfs_open,
|
.open = simple_open,
|
||||||
.write = edac_fake_inject_write,
|
.write = edac_fake_inject_write,
|
||||||
.llseek = generic_file_llseek,
|
.llseek = generic_file_llseek,
|
||||||
};
|
};
|
||||||
@ -1124,10 +1126,15 @@ int __init edac_mc_sysfs_init(void)
|
|||||||
edac_subsys = edac_get_sysfs_subsys();
|
edac_subsys = edac_get_sysfs_subsys();
|
||||||
if (edac_subsys == NULL) {
|
if (edac_subsys == NULL) {
|
||||||
edac_dbg(1, "no edac_subsys\n");
|
edac_dbg(1, "no edac_subsys\n");
|
||||||
return -EINVAL;
|
err = -EINVAL;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL);
|
mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL);
|
||||||
|
if (!mci_pdev) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
goto out_put_sysfs;
|
||||||
|
}
|
||||||
|
|
||||||
mci_pdev->bus = edac_subsys;
|
mci_pdev->bus = edac_subsys;
|
||||||
mci_pdev->type = &mc_attr_type;
|
mci_pdev->type = &mc_attr_type;
|
||||||
@ -1136,11 +1143,18 @@ int __init edac_mc_sysfs_init(void)
|
|||||||
|
|
||||||
err = device_add(mci_pdev);
|
err = device_add(mci_pdev);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
return err;
|
goto out_dev_free;
|
||||||
|
|
||||||
edac_dbg(0, "device %s created\n", dev_name(mci_pdev));
|
edac_dbg(0, "device %s created\n", dev_name(mci_pdev));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
out_dev_free:
|
||||||
|
kfree(mci_pdev);
|
||||||
|
out_put_sysfs:
|
||||||
|
edac_put_sysfs_subsys();
|
||||||
|
out:
|
||||||
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __exit edac_mc_sysfs_exit(void)
|
void __exit edac_mc_sysfs_exit(void)
|
||||||
@ -1148,4 +1162,5 @@ void __exit edac_mc_sysfs_exit(void)
|
|||||||
put_device(mci_pdev);
|
put_device(mci_pdev);
|
||||||
device_del(mci_pdev);
|
device_del(mci_pdev);
|
||||||
edac_put_sysfs_subsys();
|
edac_put_sysfs_subsys();
|
||||||
|
kfree(mci_pdev);
|
||||||
}
|
}
|
||||||
|
@ -18,9 +18,29 @@
|
|||||||
#define EDAC_VERSION "Ver: 3.0.0"
|
#define EDAC_VERSION "Ver: 3.0.0"
|
||||||
|
|
||||||
#ifdef CONFIG_EDAC_DEBUG
|
#ifdef CONFIG_EDAC_DEBUG
|
||||||
|
|
||||||
|
static int edac_set_debug_level(const char *buf, struct kernel_param *kp)
|
||||||
|
{
|
||||||
|
unsigned long val;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = kstrtoul(buf, 0, &val);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (val < 0 || val > 4)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return param_set_int(buf, kp);
|
||||||
|
}
|
||||||
|
|
||||||
/* Values of 0 to 4 will generate output */
|
/* Values of 0 to 4 will generate output */
|
||||||
int edac_debug_level = 2;
|
int edac_debug_level = 2;
|
||||||
EXPORT_SYMBOL_GPL(edac_debug_level);
|
EXPORT_SYMBOL_GPL(edac_debug_level);
|
||||||
|
|
||||||
|
module_param_call(edac_debug_level, edac_set_debug_level, param_get_int,
|
||||||
|
&edac_debug_level, 0644);
|
||||||
|
MODULE_PARM_DESC(edac_debug_level, "EDAC debug level: [0-4], default: 2");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* scope is to module level only */
|
/* scope is to module level only */
|
||||||
@ -132,10 +152,3 @@ module_exit(edac_exit);
|
|||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al");
|
MODULE_AUTHOR("Doug Thompson www.softwarebitmaker.com, et al");
|
||||||
MODULE_DESCRIPTION("Core library routines for EDAC reporting");
|
MODULE_DESCRIPTION("Core library routines for EDAC reporting");
|
||||||
|
|
||||||
/* refer to *_sysfs.c files for parameters that are exported via sysfs */
|
|
||||||
|
|
||||||
#ifdef CONFIG_EDAC_DEBUG
|
|
||||||
module_param(edac_debug_level, int, 0644);
|
|
||||||
MODULE_PARM_DESC(edac_debug_level, "Debug level");
|
|
||||||
#endif
|
|
||||||
|
@ -470,7 +470,8 @@ struct edac_pci_ctl_info *edac_pci_create_generic_ctl(struct device *dev,
|
|||||||
|
|
||||||
pci->mod_name = mod_name;
|
pci->mod_name = mod_name;
|
||||||
pci->ctl_name = EDAC_PCI_GENCTL_NAME;
|
pci->ctl_name = EDAC_PCI_GENCTL_NAME;
|
||||||
pci->edac_check = edac_pci_generic_check;
|
if (edac_op_state == EDAC_OPSTATE_POLL)
|
||||||
|
pci->edac_check = edac_pci_generic_check;
|
||||||
|
|
||||||
pdata->edac_idx = edac_pci_idx++;
|
pdata->edac_idx = edac_pci_idx++;
|
||||||
|
|
||||||
|
@ -645,20 +645,16 @@ typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* pci_dev parity list iterator
|
* pci_dev parity list iterator
|
||||||
* Scan the PCI device list for one pass, looking for SERRORs
|
*
|
||||||
* Master Parity ERRORS or Parity ERRORs on primary or secondary devices
|
* Scan the PCI device list looking for SERRORs, Master Parity ERRORS or
|
||||||
|
* Parity ERRORs on primary or secondary devices.
|
||||||
*/
|
*/
|
||||||
static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
|
static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
|
||||||
{
|
{
|
||||||
struct pci_dev *dev = NULL;
|
struct pci_dev *dev = NULL;
|
||||||
|
|
||||||
/* request for kernel access to the next PCI device, if any,
|
for_each_pci_dev(dev)
|
||||||
* and while we are looking at it have its reference count
|
|
||||||
* bumped until we are done with it
|
|
||||||
*/
|
|
||||||
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
|
|
||||||
fn(dev);
|
fn(dev);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -113,14 +113,8 @@ static ssize_t highbank_mc_err_inject_write(struct file *file,
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int debugfs_open(struct inode *inode, struct file *file)
|
|
||||||
{
|
|
||||||
file->private_data = inode->i_private;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct file_operations highbank_mc_debug_inject_fops = {
|
static const struct file_operations highbank_mc_debug_inject_fops = {
|
||||||
.open = debugfs_open,
|
.open = simple_open,
|
||||||
.write = highbank_mc_err_inject_write,
|
.write = highbank_mc_err_inject_write,
|
||||||
.llseek = generic_file_llseek,
|
.llseek = generic_file_llseek,
|
||||||
};
|
};
|
||||||
|
@ -64,7 +64,7 @@ EXPORT_SYMBOL_GPL(to_msgs);
|
|||||||
const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
|
const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
|
||||||
EXPORT_SYMBOL_GPL(ii_msgs);
|
EXPORT_SYMBOL_GPL(ii_msgs);
|
||||||
|
|
||||||
static const char * const f15h_ic_mce_desc[] = {
|
static const char * const f15h_mc1_mce_desc[] = {
|
||||||
"UC during a demand linefill from L2",
|
"UC during a demand linefill from L2",
|
||||||
"Parity error during data load from IC",
|
"Parity error during data load from IC",
|
||||||
"Parity error for IC valid bit",
|
"Parity error for IC valid bit",
|
||||||
@ -84,7 +84,7 @@ static const char * const f15h_ic_mce_desc[] = {
|
|||||||
"fetch address FIFO"
|
"fetch address FIFO"
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * const f15h_cu_mce_desc[] = {
|
static const char * const f15h_mc2_mce_desc[] = {
|
||||||
"Fill ECC error on data fills", /* xec = 0x4 */
|
"Fill ECC error on data fills", /* xec = 0x4 */
|
||||||
"Fill parity error on insn fills",
|
"Fill parity error on insn fills",
|
||||||
"Prefetcher request FIFO parity error",
|
"Prefetcher request FIFO parity error",
|
||||||
@ -101,7 +101,7 @@ static const char * const f15h_cu_mce_desc[] = {
|
|||||||
"PRB address parity error"
|
"PRB address parity error"
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * const nb_mce_desc[] = {
|
static const char * const mc4_mce_desc[] = {
|
||||||
"DRAM ECC error detected on the NB",
|
"DRAM ECC error detected on the NB",
|
||||||
"CRC error detected on HT link",
|
"CRC error detected on HT link",
|
||||||
"Link-defined sync error packets detected on HT link",
|
"Link-defined sync error packets detected on HT link",
|
||||||
@ -123,7 +123,7 @@ static const char * const nb_mce_desc[] = {
|
|||||||
"ECC Error in the Probe Filter directory"
|
"ECC Error in the Probe Filter directory"
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * const fr_ex_mce_desc[] = {
|
static const char * const mc5_mce_desc[] = {
|
||||||
"CPU Watchdog timer expire",
|
"CPU Watchdog timer expire",
|
||||||
"Wakeup array dest tag",
|
"Wakeup array dest tag",
|
||||||
"AG payload array",
|
"AG payload array",
|
||||||
@ -139,7 +139,7 @@ static const char * const fr_ex_mce_desc[] = {
|
|||||||
"DE error occurred"
|
"DE error occurred"
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool f12h_dc_mce(u16 ec, u8 xec)
|
static bool f12h_mc0_mce(u16 ec, u8 xec)
|
||||||
{
|
{
|
||||||
bool ret = false;
|
bool ret = false;
|
||||||
|
|
||||||
@ -157,26 +157,26 @@ static bool f12h_dc_mce(u16 ec, u8 xec)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool f10h_dc_mce(u16 ec, u8 xec)
|
static bool f10h_mc0_mce(u16 ec, u8 xec)
|
||||||
{
|
{
|
||||||
if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
|
if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
|
||||||
pr_cont("during data scrub.\n");
|
pr_cont("during data scrub.\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return f12h_dc_mce(ec, xec);
|
return f12h_mc0_mce(ec, xec);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool k8_dc_mce(u16 ec, u8 xec)
|
static bool k8_mc0_mce(u16 ec, u8 xec)
|
||||||
{
|
{
|
||||||
if (BUS_ERROR(ec)) {
|
if (BUS_ERROR(ec)) {
|
||||||
pr_cont("during system linefill.\n");
|
pr_cont("during system linefill.\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return f10h_dc_mce(ec, xec);
|
return f10h_mc0_mce(ec, xec);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool f14h_dc_mce(u16 ec, u8 xec)
|
static bool f14h_mc0_mce(u16 ec, u8 xec)
|
||||||
{
|
{
|
||||||
u8 r4 = R4(ec);
|
u8 r4 = R4(ec);
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
@ -228,7 +228,7 @@ static bool f14h_dc_mce(u16 ec, u8 xec)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool f15h_dc_mce(u16 ec, u8 xec)
|
static bool f15h_mc0_mce(u16 ec, u8 xec)
|
||||||
{
|
{
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
|
|
||||||
@ -275,12 +275,12 @@ static bool f15h_dc_mce(u16 ec, u8 xec)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void amd_decode_dc_mce(struct mce *m)
|
static void decode_mc0_mce(struct mce *m)
|
||||||
{
|
{
|
||||||
u16 ec = EC(m->status);
|
u16 ec = EC(m->status);
|
||||||
u8 xec = XEC(m->status, xec_mask);
|
u8 xec = XEC(m->status, xec_mask);
|
||||||
|
|
||||||
pr_emerg(HW_ERR "Data Cache Error: ");
|
pr_emerg(HW_ERR "MC0 Error: ");
|
||||||
|
|
||||||
/* TLB error signatures are the same across families */
|
/* TLB error signatures are the same across families */
|
||||||
if (TLB_ERROR(ec)) {
|
if (TLB_ERROR(ec)) {
|
||||||
@ -290,13 +290,13 @@ static void amd_decode_dc_mce(struct mce *m)
|
|||||||
: (xec ? "multimatch" : "parity")));
|
: (xec ? "multimatch" : "parity")));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else if (fam_ops->dc_mce(ec, xec))
|
} else if (fam_ops->mc0_mce(ec, xec))
|
||||||
;
|
;
|
||||||
else
|
else
|
||||||
pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
|
pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool k8_ic_mce(u16 ec, u8 xec)
|
static bool k8_mc1_mce(u16 ec, u8 xec)
|
||||||
{
|
{
|
||||||
u8 ll = LL(ec);
|
u8 ll = LL(ec);
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
@ -330,7 +330,7 @@ static bool k8_ic_mce(u16 ec, u8 xec)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool f14h_ic_mce(u16 ec, u8 xec)
|
static bool f14h_mc1_mce(u16 ec, u8 xec)
|
||||||
{
|
{
|
||||||
u8 r4 = R4(ec);
|
u8 r4 = R4(ec);
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
@ -349,7 +349,7 @@ static bool f14h_ic_mce(u16 ec, u8 xec)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool f15h_ic_mce(u16 ec, u8 xec)
|
static bool f15h_mc1_mce(u16 ec, u8 xec)
|
||||||
{
|
{
|
||||||
bool ret = true;
|
bool ret = true;
|
||||||
|
|
||||||
@ -358,19 +358,19 @@ static bool f15h_ic_mce(u16 ec, u8 xec)
|
|||||||
|
|
||||||
switch (xec) {
|
switch (xec) {
|
||||||
case 0x0 ... 0xa:
|
case 0x0 ... 0xa:
|
||||||
pr_cont("%s.\n", f15h_ic_mce_desc[xec]);
|
pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0xd:
|
case 0xd:
|
||||||
pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]);
|
pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x10:
|
case 0x10:
|
||||||
pr_cont("%s.\n", f15h_ic_mce_desc[xec-4]);
|
pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x11 ... 0x14:
|
case 0x11 ... 0x14:
|
||||||
pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]);
|
pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -379,12 +379,12 @@ static bool f15h_ic_mce(u16 ec, u8 xec)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void amd_decode_ic_mce(struct mce *m)
|
static void decode_mc1_mce(struct mce *m)
|
||||||
{
|
{
|
||||||
u16 ec = EC(m->status);
|
u16 ec = EC(m->status);
|
||||||
u8 xec = XEC(m->status, xec_mask);
|
u8 xec = XEC(m->status, xec_mask);
|
||||||
|
|
||||||
pr_emerg(HW_ERR "Instruction Cache Error: ");
|
pr_emerg(HW_ERR "MC1 Error: ");
|
||||||
|
|
||||||
if (TLB_ERROR(ec))
|
if (TLB_ERROR(ec))
|
||||||
pr_cont("%s TLB %s.\n", LL_MSG(ec),
|
pr_cont("%s TLB %s.\n", LL_MSG(ec),
|
||||||
@ -393,18 +393,18 @@ static void amd_decode_ic_mce(struct mce *m)
|
|||||||
bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
|
bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
|
||||||
|
|
||||||
pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
|
pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
|
||||||
} else if (fam_ops->ic_mce(ec, xec))
|
} else if (fam_ops->mc1_mce(ec, xec))
|
||||||
;
|
;
|
||||||
else
|
else
|
||||||
pr_emerg(HW_ERR "Corrupted IC MCE info?\n");
|
pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void amd_decode_bu_mce(struct mce *m)
|
static void decode_mc2_mce(struct mce *m)
|
||||||
{
|
{
|
||||||
u16 ec = EC(m->status);
|
u16 ec = EC(m->status);
|
||||||
u8 xec = XEC(m->status, xec_mask);
|
u8 xec = XEC(m->status, xec_mask);
|
||||||
|
|
||||||
pr_emerg(HW_ERR "Bus Unit Error");
|
pr_emerg(HW_ERR "MC2 Error");
|
||||||
|
|
||||||
if (xec == 0x1)
|
if (xec == 0x1)
|
||||||
pr_cont(" in the write data buffers.\n");
|
pr_cont(" in the write data buffers.\n");
|
||||||
@ -429,24 +429,24 @@ static void amd_decode_bu_mce(struct mce *m)
|
|||||||
pr_cont(": %s parity/ECC error during data "
|
pr_cont(": %s parity/ECC error during data "
|
||||||
"access from L2.\n", R4_MSG(ec));
|
"access from L2.\n", R4_MSG(ec));
|
||||||
else
|
else
|
||||||
goto wrong_bu_mce;
|
goto wrong_mc2_mce;
|
||||||
} else
|
} else
|
||||||
goto wrong_bu_mce;
|
goto wrong_mc2_mce;
|
||||||
} else
|
} else
|
||||||
goto wrong_bu_mce;
|
goto wrong_mc2_mce;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
wrong_bu_mce:
|
wrong_mc2_mce:
|
||||||
pr_emerg(HW_ERR "Corrupted BU MCE info?\n");
|
pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void amd_decode_cu_mce(struct mce *m)
|
static void decode_f15_mc2_mce(struct mce *m)
|
||||||
{
|
{
|
||||||
u16 ec = EC(m->status);
|
u16 ec = EC(m->status);
|
||||||
u8 xec = XEC(m->status, xec_mask);
|
u8 xec = XEC(m->status, xec_mask);
|
||||||
|
|
||||||
pr_emerg(HW_ERR "Combined Unit Error: ");
|
pr_emerg(HW_ERR "MC2 Error: ");
|
||||||
|
|
||||||
if (TLB_ERROR(ec)) {
|
if (TLB_ERROR(ec)) {
|
||||||
if (xec == 0x0)
|
if (xec == 0x0)
|
||||||
@ -454,63 +454,63 @@ static void amd_decode_cu_mce(struct mce *m)
|
|||||||
else if (xec == 0x1)
|
else if (xec == 0x1)
|
||||||
pr_cont("Poison data provided for TLB fill.\n");
|
pr_cont("Poison data provided for TLB fill.\n");
|
||||||
else
|
else
|
||||||
goto wrong_cu_mce;
|
goto wrong_f15_mc2_mce;
|
||||||
} else if (BUS_ERROR(ec)) {
|
} else if (BUS_ERROR(ec)) {
|
||||||
if (xec > 2)
|
if (xec > 2)
|
||||||
goto wrong_cu_mce;
|
goto wrong_f15_mc2_mce;
|
||||||
|
|
||||||
pr_cont("Error during attempted NB data read.\n");
|
pr_cont("Error during attempted NB data read.\n");
|
||||||
} else if (MEM_ERROR(ec)) {
|
} else if (MEM_ERROR(ec)) {
|
||||||
switch (xec) {
|
switch (xec) {
|
||||||
case 0x4 ... 0xc:
|
case 0x4 ... 0xc:
|
||||||
pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]);
|
pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x10 ... 0x14:
|
case 0x10 ... 0x14:
|
||||||
pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]);
|
pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
goto wrong_cu_mce;
|
goto wrong_f15_mc2_mce;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
wrong_cu_mce:
|
wrong_f15_mc2_mce:
|
||||||
pr_emerg(HW_ERR "Corrupted CU MCE info?\n");
|
pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void amd_decode_ls_mce(struct mce *m)
|
static void decode_mc3_mce(struct mce *m)
|
||||||
{
|
{
|
||||||
u16 ec = EC(m->status);
|
u16 ec = EC(m->status);
|
||||||
u8 xec = XEC(m->status, xec_mask);
|
u8 xec = XEC(m->status, xec_mask);
|
||||||
|
|
||||||
if (boot_cpu_data.x86 >= 0x14) {
|
if (boot_cpu_data.x86 >= 0x14) {
|
||||||
pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
|
pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
|
||||||
" please report on LKML.\n");
|
" please report on LKML.\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_emerg(HW_ERR "Load Store Error");
|
pr_emerg(HW_ERR "MC3 Error");
|
||||||
|
|
||||||
if (xec == 0x0) {
|
if (xec == 0x0) {
|
||||||
u8 r4 = R4(ec);
|
u8 r4 = R4(ec);
|
||||||
|
|
||||||
if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
|
if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
|
||||||
goto wrong_ls_mce;
|
goto wrong_mc3_mce;
|
||||||
|
|
||||||
pr_cont(" during %s.\n", R4_MSG(ec));
|
pr_cont(" during %s.\n", R4_MSG(ec));
|
||||||
} else
|
} else
|
||||||
goto wrong_ls_mce;
|
goto wrong_mc3_mce;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
wrong_ls_mce:
|
wrong_mc3_mce:
|
||||||
pr_emerg(HW_ERR "Corrupted LS MCE info?\n");
|
pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void amd_decode_nb_mce(struct mce *m)
|
static void decode_mc4_mce(struct mce *m)
|
||||||
{
|
{
|
||||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||||
int node_id = amd_get_nb_id(m->extcpu);
|
int node_id = amd_get_nb_id(m->extcpu);
|
||||||
@ -518,7 +518,7 @@ void amd_decode_nb_mce(struct mce *m)
|
|||||||
u8 xec = XEC(m->status, 0x1f);
|
u8 xec = XEC(m->status, 0x1f);
|
||||||
u8 offset = 0;
|
u8 offset = 0;
|
||||||
|
|
||||||
pr_emerg(HW_ERR "Northbridge Error (node %d): ", node_id);
|
pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
|
||||||
|
|
||||||
switch (xec) {
|
switch (xec) {
|
||||||
case 0x0 ... 0xe:
|
case 0x0 ... 0xe:
|
||||||
@ -527,9 +527,9 @@ void amd_decode_nb_mce(struct mce *m)
|
|||||||
if (xec == 0x0 || xec == 0x8) {
|
if (xec == 0x0 || xec == 0x8) {
|
||||||
/* no ECCs on F11h */
|
/* no ECCs on F11h */
|
||||||
if (c->x86 == 0x11)
|
if (c->x86 == 0x11)
|
||||||
goto wrong_nb_mce;
|
goto wrong_mc4_mce;
|
||||||
|
|
||||||
pr_cont("%s.\n", nb_mce_desc[xec]);
|
pr_cont("%s.\n", mc4_mce_desc[xec]);
|
||||||
|
|
||||||
if (nb_bus_decoder)
|
if (nb_bus_decoder)
|
||||||
nb_bus_decoder(node_id, m);
|
nb_bus_decoder(node_id, m);
|
||||||
@ -543,14 +543,14 @@ void amd_decode_nb_mce(struct mce *m)
|
|||||||
else if (BUS_ERROR(ec))
|
else if (BUS_ERROR(ec))
|
||||||
pr_cont("DMA Exclusion Vector Table Walk error.\n");
|
pr_cont("DMA Exclusion Vector Table Walk error.\n");
|
||||||
else
|
else
|
||||||
goto wrong_nb_mce;
|
goto wrong_mc4_mce;
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case 0x19:
|
case 0x19:
|
||||||
if (boot_cpu_data.x86 == 0x15)
|
if (boot_cpu_data.x86 == 0x15)
|
||||||
pr_cont("Compute Unit Data Error.\n");
|
pr_cont("Compute Unit Data Error.\n");
|
||||||
else
|
else
|
||||||
goto wrong_nb_mce;
|
goto wrong_mc4_mce;
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case 0x1c ... 0x1f:
|
case 0x1c ... 0x1f:
|
||||||
@ -558,46 +558,44 @@ void amd_decode_nb_mce(struct mce *m)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
goto wrong_nb_mce;
|
goto wrong_mc4_mce;
|
||||||
}
|
}
|
||||||
|
|
||||||
pr_cont("%s.\n", nb_mce_desc[xec - offset]);
|
pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
wrong_nb_mce:
|
wrong_mc4_mce:
|
||||||
pr_emerg(HW_ERR "Corrupted NB MCE info?\n");
|
pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
|
|
||||||
|
|
||||||
static void amd_decode_fr_mce(struct mce *m)
|
static void decode_mc5_mce(struct mce *m)
|
||||||
{
|
{
|
||||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||||
u8 xec = XEC(m->status, xec_mask);
|
u8 xec = XEC(m->status, xec_mask);
|
||||||
|
|
||||||
if (c->x86 == 0xf || c->x86 == 0x11)
|
if (c->x86 == 0xf || c->x86 == 0x11)
|
||||||
goto wrong_fr_mce;
|
goto wrong_mc5_mce;
|
||||||
|
|
||||||
pr_emerg(HW_ERR "%s Error: ",
|
pr_emerg(HW_ERR "MC5 Error: ");
|
||||||
(c->x86 == 0x15 ? "Execution Unit" : "FIROB"));
|
|
||||||
|
|
||||||
if (xec == 0x0 || xec == 0xc)
|
if (xec == 0x0 || xec == 0xc)
|
||||||
pr_cont("%s.\n", fr_ex_mce_desc[xec]);
|
pr_cont("%s.\n", mc5_mce_desc[xec]);
|
||||||
else if (xec < 0xd)
|
else if (xec < 0xd)
|
||||||
pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]);
|
pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
|
||||||
else
|
else
|
||||||
goto wrong_fr_mce;
|
goto wrong_mc5_mce;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
wrong_fr_mce:
|
wrong_mc5_mce:
|
||||||
pr_emerg(HW_ERR "Corrupted FR MCE info?\n");
|
pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void amd_decode_fp_mce(struct mce *m)
|
static void decode_mc6_mce(struct mce *m)
|
||||||
{
|
{
|
||||||
u8 xec = XEC(m->status, xec_mask);
|
u8 xec = XEC(m->status, xec_mask);
|
||||||
|
|
||||||
pr_emerg(HW_ERR "Floating Point Unit Error: ");
|
pr_emerg(HW_ERR "MC6 Error: ");
|
||||||
|
|
||||||
switch (xec) {
|
switch (xec) {
|
||||||
case 0x1:
|
case 0x1:
|
||||||
@ -621,7 +619,7 @@ static void amd_decode_fp_mce(struct mce *m)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
goto wrong_fp_mce;
|
goto wrong_mc6_mce;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -629,8 +627,8 @@ static void amd_decode_fp_mce(struct mce *m)
|
|||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
wrong_fp_mce:
|
wrong_mc6_mce:
|
||||||
pr_emerg(HW_ERR "Corrupted FP MCE info?\n");
|
pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void amd_decode_err_code(u16 ec)
|
static inline void amd_decode_err_code(u16 ec)
|
||||||
@ -669,17 +667,73 @@ static bool amd_filter_mce(struct mce *m)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *decode_error_status(struct mce *m)
|
||||||
|
{
|
||||||
|
if (m->status & MCI_STATUS_UC) {
|
||||||
|
if (m->status & MCI_STATUS_PCC)
|
||||||
|
return "System Fatal error.";
|
||||||
|
if (m->mcgstatus & MCG_STATUS_RIPV)
|
||||||
|
return "Uncorrected, software restartable error.";
|
||||||
|
return "Uncorrected, software containable error.";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m->status & MCI_STATUS_DEFERRED)
|
||||||
|
return "Deferred error.";
|
||||||
|
|
||||||
|
return "Corrected error, no action required.";
|
||||||
|
}
|
||||||
|
|
||||||
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
||||||
{
|
{
|
||||||
struct mce *m = (struct mce *)data;
|
struct mce *m = (struct mce *)data;
|
||||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
|
||||||
int ecc;
|
int ecc;
|
||||||
|
|
||||||
if (amd_filter_mce(m))
|
if (amd_filter_mce(m))
|
||||||
return NOTIFY_STOP;
|
return NOTIFY_STOP;
|
||||||
|
|
||||||
pr_emerg(HW_ERR "CPU:%d\tMC%d_STATUS[%s|%s|%s|%s|%s",
|
switch (m->bank) {
|
||||||
m->extcpu, m->bank,
|
case 0:
|
||||||
|
decode_mc0_mce(m);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
decode_mc1_mce(m);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
if (c->x86 == 0x15)
|
||||||
|
decode_f15_mc2_mce(m);
|
||||||
|
else
|
||||||
|
decode_mc2_mce(m);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
decode_mc3_mce(m);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 4:
|
||||||
|
decode_mc4_mce(m);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 5:
|
||||||
|
decode_mc5_mce(m);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 6:
|
||||||
|
decode_mc6_mce(m);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_emerg(HW_ERR "Error Status: %s\n", decode_error_status(m));
|
||||||
|
|
||||||
|
pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
|
||||||
|
m->extcpu,
|
||||||
|
c->x86, c->x86_model, c->x86_mask,
|
||||||
|
m->bank,
|
||||||
((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
|
((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
|
||||||
((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
|
((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
|
||||||
((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
|
((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
|
||||||
@ -688,8 +742,8 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
|||||||
|
|
||||||
if (c->x86 == 0x15)
|
if (c->x86 == 0x15)
|
||||||
pr_cont("|%s|%s",
|
pr_cont("|%s|%s",
|
||||||
((m->status & BIT_64(44)) ? "Deferred" : "-"),
|
((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
|
||||||
((m->status & BIT_64(43)) ? "Poison" : "-"));
|
((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
|
||||||
|
|
||||||
/* do the two bits[14:13] together */
|
/* do the two bits[14:13] together */
|
||||||
ecc = (m->status >> 45) & 0x3;
|
ecc = (m->status >> 45) & 0x3;
|
||||||
@ -699,43 +753,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
|||||||
pr_cont("]: 0x%016llx\n", m->status);
|
pr_cont("]: 0x%016llx\n", m->status);
|
||||||
|
|
||||||
if (m->status & MCI_STATUS_ADDRV)
|
if (m->status & MCI_STATUS_ADDRV)
|
||||||
pr_emerg(HW_ERR "\tMC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
|
pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr);
|
||||||
|
|
||||||
switch (m->bank) {
|
|
||||||
case 0:
|
|
||||||
amd_decode_dc_mce(m);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 1:
|
|
||||||
amd_decode_ic_mce(m);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 2:
|
|
||||||
if (c->x86 == 0x15)
|
|
||||||
amd_decode_cu_mce(m);
|
|
||||||
else
|
|
||||||
amd_decode_bu_mce(m);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 3:
|
|
||||||
amd_decode_ls_mce(m);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 4:
|
|
||||||
amd_decode_nb_mce(m);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 5:
|
|
||||||
amd_decode_fr_mce(m);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 6:
|
|
||||||
amd_decode_fp_mce(m);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
amd_decode_err_code(m->status & 0xffff);
|
amd_decode_err_code(m->status & 0xffff);
|
||||||
|
|
||||||
@ -763,35 +781,35 @@ static int __init mce_amd_init(void)
|
|||||||
|
|
||||||
switch (c->x86) {
|
switch (c->x86) {
|
||||||
case 0xf:
|
case 0xf:
|
||||||
fam_ops->dc_mce = k8_dc_mce;
|
fam_ops->mc0_mce = k8_mc0_mce;
|
||||||
fam_ops->ic_mce = k8_ic_mce;
|
fam_ops->mc1_mce = k8_mc1_mce;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x10:
|
case 0x10:
|
||||||
fam_ops->dc_mce = f10h_dc_mce;
|
fam_ops->mc0_mce = f10h_mc0_mce;
|
||||||
fam_ops->ic_mce = k8_ic_mce;
|
fam_ops->mc1_mce = k8_mc1_mce;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x11:
|
case 0x11:
|
||||||
fam_ops->dc_mce = k8_dc_mce;
|
fam_ops->mc0_mce = k8_mc0_mce;
|
||||||
fam_ops->ic_mce = k8_ic_mce;
|
fam_ops->mc1_mce = k8_mc1_mce;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x12:
|
case 0x12:
|
||||||
fam_ops->dc_mce = f12h_dc_mce;
|
fam_ops->mc0_mce = f12h_mc0_mce;
|
||||||
fam_ops->ic_mce = k8_ic_mce;
|
fam_ops->mc1_mce = k8_mc1_mce;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x14:
|
case 0x14:
|
||||||
nb_err_cpumask = 0x3;
|
nb_err_cpumask = 0x3;
|
||||||
fam_ops->dc_mce = f14h_dc_mce;
|
fam_ops->mc0_mce = f14h_mc0_mce;
|
||||||
fam_ops->ic_mce = f14h_ic_mce;
|
fam_ops->mc1_mce = f14h_mc1_mce;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x15:
|
case 0x15:
|
||||||
xec_mask = 0x1f;
|
xec_mask = 0x1f;
|
||||||
fam_ops->dc_mce = f15h_dc_mce;
|
fam_ops->mc0_mce = f15h_mc0_mce;
|
||||||
fam_ops->ic_mce = f15h_ic_mce;
|
fam_ops->mc1_mce = f15h_mc1_mce;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -29,10 +29,8 @@
|
|||||||
#define R4(x) (((x) >> 4) & 0xf)
|
#define R4(x) (((x) >> 4) & 0xf)
|
||||||
#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!")
|
#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!")
|
||||||
|
|
||||||
/*
|
#define MCI_STATUS_DEFERRED BIT_64(44)
|
||||||
* F3x4C bits (MCi_STATUS' high half)
|
#define MCI_STATUS_POISON BIT_64(43)
|
||||||
*/
|
|
||||||
#define NBSH_ERR_CPU_VAL BIT(24)
|
|
||||||
|
|
||||||
enum tt_ids {
|
enum tt_ids {
|
||||||
TT_INSTR = 0,
|
TT_INSTR = 0,
|
||||||
@ -78,14 +76,13 @@ extern const char * const ii_msgs[];
|
|||||||
* per-family decoder ops
|
* per-family decoder ops
|
||||||
*/
|
*/
|
||||||
struct amd_decoder_ops {
|
struct amd_decoder_ops {
|
||||||
bool (*dc_mce)(u16, u8);
|
bool (*mc0_mce)(u16, u8);
|
||||||
bool (*ic_mce)(u16, u8);
|
bool (*mc1_mce)(u16, u8);
|
||||||
};
|
};
|
||||||
|
|
||||||
void amd_report_gart_errors(bool);
|
void amd_report_gart_errors(bool);
|
||||||
void amd_register_ecc_decoder(void (*f)(int, struct mce *));
|
void amd_register_ecc_decoder(void (*f)(int, struct mce *));
|
||||||
void amd_unregister_ecc_decoder(void (*f)(int, struct mce *));
|
void amd_unregister_ecc_decoder(void (*f)(int, struct mce *));
|
||||||
void amd_decode_nb_mce(struct mce *);
|
|
||||||
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
|
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data);
|
||||||
|
|
||||||
#endif /* _EDAC_MCE_AMD_H */
|
#endif /* _EDAC_MCE_AMD_H */
|
||||||
|
@ -533,6 +533,7 @@ struct csrow_info {
|
|||||||
|
|
||||||
u32 ue_count; /* Uncorrectable Errors for this csrow */
|
u32 ue_count; /* Uncorrectable Errors for this csrow */
|
||||||
u32 ce_count; /* Correctable Errors for this csrow */
|
u32 ce_count; /* Correctable Errors for this csrow */
|
||||||
|
u32 nr_pages; /* combined pages count of all channels */
|
||||||
|
|
||||||
struct mem_ctl_info *mci; /* the parent */
|
struct mem_ctl_info *mci; /* the parent */
|
||||||
|
|
||||||
@ -667,6 +668,8 @@ struct mem_ctl_info {
|
|||||||
u32 fake_inject_ue;
|
u32 fake_inject_ue;
|
||||||
u16 fake_inject_count;
|
u16 fake_inject_count;
|
||||||
#endif
|
#endif
|
||||||
|
__u8 csbased : 1, /* csrow-based memory controller */
|
||||||
|
__resv : 7;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user