- Add support for Skylake-S CPUs to ie31200_edac
- Improve error decoding speed of the Intel drivers by avoiding the ACPI facilities but doing decoding in the driver itself - Other misc improvements to the Intel drivers - The usual cleanups and fixlets all over EDAC land -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmM76jAACgkQEsHwGGHe VUqSjw/+LCvRqygvaPxoEOZpnid6hXsruVGDQDeDp8PHJTapvezOzJY0KnBydAhw QzdnrFwkJjto/caRFpgJRN6cxZHP4IuNQ65UZCHj8Lw6wEkHMV4ptkYbLSihZcMn prkmASZkkTe2gcMchNidjTmYLACSHp1EoZJRWkXZbF6dy4NQHTU78fNp62WSeUoE DdMwEtzrMkqNRDJ7vVEqNLdJudOEW2Uhr5RqU8WuZcvj1pyE4ckGCiWEs6vmEtKi YxLJ7Qg+P7TVQLB+5l2kXHVgHoVj5WXiKMFi0ws2vXojN7i71LCY5zVmoWFSimcX liJ6iK3OwZCE1r0AY9DVbRuIRhW4cwGxI07Am/MNhbQsE0pdY4LryNooosEAMppk Knsbxycc/6Nzr0dnbsO9uMRautPR9jdbiQ/KZQZvcGgvCdeNQddE3mR0NSPMOzD8 CDuufe+/CudZ5ylWpLP1MW3bGrgH8HUQO85N8nDprDGypGjO08bTIUpFoZdJN2nA Qz31HDs98TcgDWQrpDjR1g/fi/drobJg6OFCQZUGEldEJnVI1Ju2uxvTkyZqH64T 52OwnqcxeNKHH6AQzN0/wLkB2vNpBSngMB0VPEbKAlDRKFZzY4vJs3XMEK2+O8QS BSRvClFRQUl06dEeb+7zukXW9z9cBXGULKW/CGtS2cDd+XVz/PE= =Gh9S -----END PGP SIGNATURE----- Merge tag 'edac_updates_for_v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras Pull EDAC updates from Borislav Petkov: - Add support for Skylake-S CPUs to ie31200_edac - Improve error decoding speed of the Intel drivers by avoiding the ACPI facilities but doing decoding in the driver itself - Other misc improvements to the Intel drivers - The usual cleanups and fixlets all over EDAC land * tag 'edac_updates_for_v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: EDAC/i7300: Correct the i7300_exit() function name in comment x86/sb_edac: Add row column translation for Broadwell EDAC/i10nm: Print an extra register set of retry_rd_err_log EDAC/i10nm: Retrieve and print retry_rd_err_log registers for HBM EDAC/skx_common: Add ChipSelect ADXL component EDAC/ppc_4xx: Reorder symbols to get rid of a few forward declarations EDAC: Remove obsolete declarations in edac_module.h EDAC/i10nm: Add driver decoder for Ice Lake and Tremont CPUs EDAC/skx_common: Make output format similar EDAC/skx_common: Use driver decoder first EDAC/mc: Drop duplicated dimm->nr_pages debug printout EDAC/mc: Replace spaces with tabs in memtype flags definition EDAC/wq: Remove unneeded flush_workqueue() EDAC/ie31200: Add Skylake-S support
This commit is contained in:
commit
bf7676251b
@ -42,6 +42,7 @@
|
||||
#define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */
|
||||
#define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38)
|
||||
#define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT)
|
||||
#define MCI_STATUS_MSCOD(m) (((m) >> 16) & 0xffff)
|
||||
|
||||
/* AMD-specific bits */
|
||||
#define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */
|
||||
|
@ -103,7 +103,6 @@ static void edac_mc_dump_dimm(struct dimm_info *dimm)
|
||||
edac_dbg(4, " dimm->label = '%s'\n", dimm->label);
|
||||
edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
|
||||
edac_dbg(4, " dimm->grain = %d\n", dimm->grain);
|
||||
edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
|
||||
}
|
||||
|
||||
static void edac_mc_dump_csrow(struct csrow_info *csrow)
|
||||
|
@ -28,13 +28,9 @@ void edac_mc_sysfs_exit(void);
|
||||
extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
|
||||
const struct attribute_group **groups);
|
||||
extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci);
|
||||
extern int edac_get_log_ue(void);
|
||||
extern int edac_get_log_ce(void);
|
||||
extern int edac_get_panic_on_ue(void);
|
||||
extern int edac_mc_get_log_ue(void);
|
||||
extern int edac_mc_get_log_ce(void);
|
||||
extern int edac_mc_get_panic_on_ue(void);
|
||||
extern int edac_get_poll_msec(void);
|
||||
extern unsigned int edac_mc_get_poll_msec(void);
|
||||
|
||||
unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
|
||||
|
@ -74,31 +74,47 @@ static struct list_head *i10nm_edac_list;
|
||||
|
||||
static struct res_config *res_cfg;
|
||||
static int retry_rd_err_log;
|
||||
static int decoding_via_mca;
|
||||
static bool mem_cfg_2lm;
|
||||
|
||||
static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
|
||||
static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
|
||||
static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8};
|
||||
static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8};
|
||||
static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
|
||||
static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
|
||||
static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10};
|
||||
static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
|
||||
static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
|
||||
|
||||
static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable)
|
||||
static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
|
||||
u32 *offsets_scrub, u32 *offsets_demand,
|
||||
u32 *offsets_demand2)
|
||||
{
|
||||
u32 s, d;
|
||||
u32 s, d, d2;
|
||||
|
||||
if (!imc->mbase)
|
||||
return;
|
||||
|
||||
s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]);
|
||||
d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]);
|
||||
s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]);
|
||||
d = I10NM_GET_REG32(imc, chan, offsets_demand[0]);
|
||||
if (offsets_demand2)
|
||||
d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]);
|
||||
|
||||
if (enable) {
|
||||
/* Save default configurations */
|
||||
imc->chan[chan].retry_rd_err_log_s = s;
|
||||
imc->chan[chan].retry_rd_err_log_d = d;
|
||||
if (offsets_demand2)
|
||||
imc->chan[chan].retry_rd_err_log_d2 = d2;
|
||||
|
||||
s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
|
||||
s |= RETRY_RD_ERR_LOG_EN;
|
||||
d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
|
||||
d |= RETRY_RD_ERR_LOG_EN;
|
||||
|
||||
if (offsets_demand2) {
|
||||
d2 &= ~RETRY_RD_ERR_LOG_UC;
|
||||
d2 |= RETRY_RD_ERR_LOG_NOOVER;
|
||||
d2 |= RETRY_RD_ERR_LOG_EN;
|
||||
}
|
||||
} else {
|
||||
/* Restore default configurations */
|
||||
if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
|
||||
@ -113,23 +129,55 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
|
||||
d |= RETRY_RD_ERR_LOG_NOOVER;
|
||||
if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
|
||||
d &= ~RETRY_RD_ERR_LOG_EN;
|
||||
|
||||
if (offsets_demand2) {
|
||||
if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC)
|
||||
d2 |= RETRY_RD_ERR_LOG_UC;
|
||||
if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER))
|
||||
d2 &= ~RETRY_RD_ERR_LOG_NOOVER;
|
||||
if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN))
|
||||
d2 &= ~RETRY_RD_ERR_LOG_EN;
|
||||
}
|
||||
}
|
||||
|
||||
I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s);
|
||||
I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d);
|
||||
I10NM_SET_REG32(imc, chan, offsets_scrub[0], s);
|
||||
I10NM_SET_REG32(imc, chan, offsets_demand[0], d);
|
||||
if (offsets_demand2)
|
||||
I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2);
|
||||
}
|
||||
|
||||
static void enable_retry_rd_err_log(bool enable)
|
||||
{
|
||||
struct skx_imc *imc;
|
||||
struct skx_dev *d;
|
||||
int i, j;
|
||||
|
||||
edac_dbg(2, "\n");
|
||||
|
||||
list_for_each_entry(d, i10nm_edac_list, list)
|
||||
for (i = 0; i < I10NM_NUM_IMC; i++)
|
||||
for (j = 0; j < I10NM_NUM_CHANNELS; j++)
|
||||
__enable_retry_rd_err_log(&d->imc[i], j, enable);
|
||||
for (i = 0; i < I10NM_NUM_IMC; i++) {
|
||||
imc = &d->imc[i];
|
||||
if (!imc->mbase)
|
||||
continue;
|
||||
|
||||
for (j = 0; j < I10NM_NUM_CHANNELS; j++) {
|
||||
if (imc->hbm_mc) {
|
||||
__enable_retry_rd_err_log(imc, j, enable,
|
||||
res_cfg->offsets_scrub_hbm0,
|
||||
res_cfg->offsets_demand_hbm0,
|
||||
NULL);
|
||||
__enable_retry_rd_err_log(imc, j, enable,
|
||||
res_cfg->offsets_scrub_hbm1,
|
||||
res_cfg->offsets_demand_hbm1,
|
||||
NULL);
|
||||
} else {
|
||||
__enable_retry_rd_err_log(imc, j, enable,
|
||||
res_cfg->offsets_scrub,
|
||||
res_cfg->offsets_demand,
|
||||
res_cfg->offsets_demand2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
|
||||
@ -138,14 +186,33 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
|
||||
struct skx_imc *imc = &res->dev->imc[res->imc];
|
||||
u32 log0, log1, log2, log3, log4;
|
||||
u32 corr0, corr1, corr2, corr3;
|
||||
u32 lxg0, lxg1, lxg3, lxg4;
|
||||
u32 *xffsets = NULL;
|
||||
u64 log2a, log5;
|
||||
u64 lxg2a, lxg5;
|
||||
u32 *offsets;
|
||||
int n;
|
||||
int n, pch;
|
||||
|
||||
if (!imc->mbase)
|
||||
return;
|
||||
|
||||
offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand;
|
||||
if (imc->hbm_mc) {
|
||||
pch = res->cs & 1;
|
||||
|
||||
if (pch)
|
||||
offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 :
|
||||
res_cfg->offsets_demand_hbm1;
|
||||
else
|
||||
offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 :
|
||||
res_cfg->offsets_demand_hbm0;
|
||||
} else {
|
||||
if (scrub_err) {
|
||||
offsets = res_cfg->offsets_scrub;
|
||||
} else {
|
||||
offsets = res_cfg->offsets_demand;
|
||||
xffsets = res_cfg->offsets_demand2;
|
||||
}
|
||||
}
|
||||
|
||||
log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
|
||||
log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
|
||||
@ -153,20 +220,52 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
|
||||
log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
|
||||
log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
|
||||
|
||||
if (xffsets) {
|
||||
lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]);
|
||||
lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]);
|
||||
lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]);
|
||||
lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]);
|
||||
lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]);
|
||||
}
|
||||
|
||||
if (res_cfg->type == SPR) {
|
||||
log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
|
||||
n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx]",
|
||||
n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx",
|
||||
log0, log1, log2a, log3, log4, log5);
|
||||
|
||||
if (len - n > 0) {
|
||||
if (xffsets) {
|
||||
lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]);
|
||||
n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]",
|
||||
lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5);
|
||||
} else {
|
||||
n += snprintf(msg + n, len - n, "]");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
|
||||
n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
|
||||
log0, log1, log2, log3, log4, log5);
|
||||
}
|
||||
|
||||
corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
|
||||
corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
|
||||
corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
|
||||
corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
|
||||
if (imc->hbm_mc) {
|
||||
if (pch) {
|
||||
corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18);
|
||||
corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c);
|
||||
corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20);
|
||||
corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24);
|
||||
} else {
|
||||
corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818);
|
||||
corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c);
|
||||
corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820);
|
||||
corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824);
|
||||
}
|
||||
} else {
|
||||
corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
|
||||
corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
|
||||
corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
|
||||
corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
|
||||
}
|
||||
|
||||
if (len - n > 0)
|
||||
snprintf(msg + n, len - n,
|
||||
@ -177,9 +276,16 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
|
||||
corr3 & 0xffff, corr3 >> 16);
|
||||
|
||||
/* Clear status bits */
|
||||
if (retry_rd_err_log == 2 && (log0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
|
||||
log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
|
||||
I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
|
||||
if (retry_rd_err_log == 2) {
|
||||
if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) {
|
||||
log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
|
||||
I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
|
||||
}
|
||||
|
||||
if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
|
||||
lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
|
||||
I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -231,6 +337,103 @@ static bool i10nm_check_2lm(struct res_config *cfg)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether the error comes from DDRT by ICX/Tremont model specific error code.
|
||||
* Refer to SDM vol3B 16.11.3 Intel IMC MC error codes for IA32_MCi_STATUS.
|
||||
*/
|
||||
static bool i10nm_mscod_is_ddrt(u32 mscod)
|
||||
{
|
||||
switch (mscod) {
|
||||
case 0x0106: case 0x0107:
|
||||
case 0x0800: case 0x0804:
|
||||
case 0x0806 ... 0x0808:
|
||||
case 0x080a ... 0x080e:
|
||||
case 0x0810: case 0x0811:
|
||||
case 0x0816: case 0x081e:
|
||||
case 0x081f:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool i10nm_mc_decode_available(struct mce *mce)
|
||||
{
|
||||
u8 bank;
|
||||
|
||||
if (!decoding_via_mca || mem_cfg_2lm)
|
||||
return false;
|
||||
|
||||
if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
|
||||
!= (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
|
||||
return false;
|
||||
|
||||
bank = mce->bank;
|
||||
|
||||
switch (res_cfg->type) {
|
||||
case I10NM:
|
||||
if (bank < 13 || bank > 26)
|
||||
return false;
|
||||
|
||||
/* DDRT errors can't be decoded from MCA bank registers */
|
||||
if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT)
|
||||
return false;
|
||||
|
||||
if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status)))
|
||||
return false;
|
||||
|
||||
/* Check whether one of {13,14,17,18,21,22,25,26} */
|
||||
return ((bank - 13) & BIT(1)) == 0;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool i10nm_mc_decode(struct decoded_addr *res)
|
||||
{
|
||||
struct mce *m = res->mce;
|
||||
struct skx_dev *d;
|
||||
u8 bank;
|
||||
|
||||
if (!i10nm_mc_decode_available(m))
|
||||
return false;
|
||||
|
||||
list_for_each_entry(d, i10nm_edac_list, list) {
|
||||
if (d->imc[0].src_id == m->socketid) {
|
||||
res->socket = m->socketid;
|
||||
res->dev = d;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (res_cfg->type) {
|
||||
case I10NM:
|
||||
bank = m->bank - 13;
|
||||
res->imc = bank / 4;
|
||||
res->channel = bank % 2;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!res->dev) {
|
||||
skx_printk(KERN_ERR, "No device for src_id %d imc %d\n",
|
||||
m->socketid, res->imc);
|
||||
return false;
|
||||
}
|
||||
|
||||
res->column = GET_BITFIELD(m->misc, 9, 18) << 2;
|
||||
res->row = GET_BITFIELD(m->misc, 19, 39);
|
||||
res->bank_group = GET_BITFIELD(m->misc, 40, 41);
|
||||
res->bank_address = GET_BITFIELD(m->misc, 42, 43);
|
||||
res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2;
|
||||
res->rank = GET_BITFIELD(m->misc, 56, 58);
|
||||
res->dimm = res->rank >> 2;
|
||||
res->rank = res->rank % 4;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int i10nm_get_ddr_munits(void)
|
||||
{
|
||||
struct pci_dev *mdev;
|
||||
@ -420,7 +623,12 @@ static struct res_config spr_cfg = {
|
||||
.sad_all_devfn = PCI_DEVFN(10, 0),
|
||||
.sad_all_offset = 0x300,
|
||||
.offsets_scrub = offsets_scrub_spr,
|
||||
.offsets_scrub_hbm0 = offsets_scrub_spr_hbm0,
|
||||
.offsets_scrub_hbm1 = offsets_scrub_spr_hbm1,
|
||||
.offsets_demand = offsets_demand_spr,
|
||||
.offsets_demand2 = offsets_demand2_spr,
|
||||
.offsets_demand_hbm0 = offsets_demand_spr_hbm0,
|
||||
.offsets_demand_hbm1 = offsets_demand_spr_hbm1,
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id i10nm_cpuids[] = {
|
||||
@ -574,7 +782,8 @@ static int __init i10nm_init(void)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
skx_set_mem_cfg(i10nm_check_2lm(cfg));
|
||||
mem_cfg_2lm = i10nm_check_2lm(cfg);
|
||||
skx_set_mem_cfg(mem_cfg_2lm);
|
||||
|
||||
rc = i10nm_get_ddr_munits();
|
||||
|
||||
@ -626,9 +835,11 @@ static int __init i10nm_init(void)
|
||||
setup_i10nm_debug();
|
||||
|
||||
if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
|
||||
skx_set_decode(NULL, show_retry_rd_err_log);
|
||||
skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
|
||||
if (retry_rd_err_log == 2)
|
||||
enable_retry_rd_err_log(true);
|
||||
} else {
|
||||
skx_set_decode(i10nm_mc_decode, NULL);
|
||||
}
|
||||
|
||||
i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
|
||||
@ -658,6 +869,34 @@ static void __exit i10nm_exit(void)
|
||||
module_init(i10nm_init);
|
||||
module_exit(i10nm_exit);
|
||||
|
||||
static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp)
|
||||
{
|
||||
unsigned long val;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoul(buf, 0, &val);
|
||||
|
||||
if (ret || val > 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (val && mem_cfg_2lm) {
|
||||
i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
ret = param_set_int(buf, kp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct kernel_param_ops decoding_via_mca_param_ops = {
|
||||
.set = set_decoding_via_mca,
|
||||
.get = param_get_int,
|
||||
};
|
||||
|
||||
module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644);
|
||||
MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable");
|
||||
|
||||
module_param(retry_rd_err_log, int, 0444);
|
||||
MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
|
||||
|
||||
|
@ -1193,7 +1193,7 @@ static int __init i7300_init(void)
|
||||
}
|
||||
|
||||
/**
|
||||
* i7300_init() - Unregisters the driver
|
||||
* i7300_exit() - Unregisters the driver
|
||||
*/
|
||||
static void __exit i7300_exit(void)
|
||||
{
|
||||
|
@ -20,11 +20,15 @@
|
||||
* 0c08: Xeon E3-1200 v3 Processor DRAM Controller
|
||||
* 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
|
||||
* 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
|
||||
* 190f: 6th Gen Core Dual-Core Processor Host Bridge/DRAM Registers
|
||||
* 191f: 6th Gen Core Quad-Core Processor Host Bridge/DRAM Registers
|
||||
* 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers
|
||||
*
|
||||
* Based on Intel specification:
|
||||
* https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
|
||||
* http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html
|
||||
* https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/desktop-6th-gen-core-family-datasheet-vol-2.pdf
|
||||
* https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v6-vol-2-datasheet.pdf
|
||||
* https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html
|
||||
* https://www.intel.com/content/www/us/en/products/docs/processors/core/8th-gen-core-family-datasheet-vol-2.html
|
||||
*
|
||||
@ -53,15 +57,17 @@
|
||||
#define ie31200_printk(level, fmt, arg...) \
|
||||
edac_printk(level, "ie31200", fmt, ##arg)
|
||||
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_1 0x0108
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_2 0x010c
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_3 0x0150
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_4 0x0158
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x5918
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_1 0x0108
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_2 0x010c
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_3 0x0150
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_4 0x0158
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x190F
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x1918
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_10 0x191F
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x5918
|
||||
|
||||
/* Coffee Lake-S */
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK 0x3e00
|
||||
@ -80,6 +86,8 @@
|
||||
#define DEVICE_ID_SKYLAKE_OR_LATER(did) \
|
||||
(((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \
|
||||
((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \
|
||||
((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \
|
||||
((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \
|
||||
(((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \
|
||||
PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK))
|
||||
|
||||
@ -577,6 +585,8 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
|
||||
{ PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
|
||||
{ PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
|
||||
{ PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
|
||||
{ PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
|
||||
{ PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
|
||||
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
|
||||
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
|
||||
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
|
||||
|
@ -178,11 +178,6 @@ struct ppc4xx_ecc_status {
|
||||
u32 wmirq;
|
||||
};
|
||||
|
||||
/* Function Prototypes */
|
||||
|
||||
static int ppc4xx_edac_probe(struct platform_device *device);
|
||||
static int ppc4xx_edac_remove(struct platform_device *device);
|
||||
|
||||
/* Global Variables */
|
||||
|
||||
/*
|
||||
@ -197,15 +192,6 @@ static const struct of_device_id ppc4xx_edac_match[] = {
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, ppc4xx_edac_match);
|
||||
|
||||
static struct platform_driver ppc4xx_edac_driver = {
|
||||
.probe = ppc4xx_edac_probe,
|
||||
.remove = ppc4xx_edac_remove,
|
||||
.driver = {
|
||||
.name = PPC4XX_EDAC_MODULE_NAME,
|
||||
.of_match_table = ppc4xx_edac_match,
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* TODO: The row and channel parameters likely need to be dynamically
|
||||
* set based on the aforementioned variant controller realizations.
|
||||
@ -1391,6 +1377,15 @@ ppc4xx_edac_opstate_init(void)
|
||||
EDAC_OPSTATE_UNKNOWN_STR)));
|
||||
}
|
||||
|
||||
static struct platform_driver ppc4xx_edac_driver = {
|
||||
.probe = ppc4xx_edac_probe,
|
||||
.remove = ppc4xx_edac_remove,
|
||||
.driver = {
|
||||
.name = PPC4XX_EDAC_MODULE_NAME,
|
||||
.of_match_table = ppc4xx_edac_match,
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* ppc4xx_edac_init - driver/module insertion entry point
|
||||
*
|
||||
|
@ -335,6 +335,12 @@ struct sbridge_info {
|
||||
struct sbridge_channel {
|
||||
u32 ranks;
|
||||
u32 dimms;
|
||||
struct dimm {
|
||||
u32 rowbits;
|
||||
u32 colbits;
|
||||
u32 bank_xor_enable;
|
||||
u32 amap_fine;
|
||||
} dimm[MAX_DIMMS];
|
||||
};
|
||||
|
||||
struct pci_id_descr {
|
||||
@ -1603,7 +1609,7 @@ static int __populate_dimms(struct mem_ctl_info *mci,
|
||||
banks = 8;
|
||||
|
||||
for (i = 0; i < channels; i++) {
|
||||
u32 mtr;
|
||||
u32 mtr, amap = 0;
|
||||
|
||||
int max_dimms_per_channel;
|
||||
|
||||
@ -1615,6 +1621,7 @@ static int __populate_dimms(struct mem_ctl_info *mci,
|
||||
max_dimms_per_channel = ARRAY_SIZE(mtr_regs);
|
||||
if (!pvt->pci_tad[i])
|
||||
continue;
|
||||
pci_read_config_dword(pvt->pci_tad[i], 0x8c, &amap);
|
||||
}
|
||||
|
||||
for (j = 0; j < max_dimms_per_channel; j++) {
|
||||
@ -1627,6 +1634,7 @@ static int __populate_dimms(struct mem_ctl_info *mci,
|
||||
mtr_regs[j], &mtr);
|
||||
}
|
||||
edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
|
||||
|
||||
if (IS_DIMM_PRESENT(mtr)) {
|
||||
if (!IS_ECC_ENABLED(pvt->info.mcmtr)) {
|
||||
sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n",
|
||||
@ -1661,6 +1669,11 @@ static int __populate_dimms(struct mem_ctl_info *mci,
|
||||
dimm->dtype = pvt->info.get_width(pvt, mtr);
|
||||
dimm->mtype = mtype;
|
||||
dimm->edac_mode = mode;
|
||||
pvt->channel[i].dimm[j].rowbits = order_base_2(rows);
|
||||
pvt->channel[i].dimm[j].colbits = order_base_2(cols);
|
||||
pvt->channel[i].dimm[j].bank_xor_enable =
|
||||
GET_BITFIELD(pvt->info.mcmtr, 9, 9);
|
||||
pvt->channel[i].dimm[j].amap_fine = GET_BITFIELD(amap, 0, 0);
|
||||
snprintf(dimm->label, sizeof(dimm->label),
|
||||
"CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
|
||||
pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j);
|
||||
@ -1922,6 +1935,99 @@ static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static u8 sb_close_row[] = {
|
||||
15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
|
||||
};
|
||||
|
||||
static u8 sb_close_column[] = {
|
||||
3, 4, 5, 14, 19, 23, 24, 25, 26, 27
|
||||
};
|
||||
|
||||
static u8 sb_open_row[] = {
|
||||
14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
|
||||
};
|
||||
|
||||
static u8 sb_open_column[] = {
|
||||
3, 4, 5, 6, 7, 8, 9, 10, 11, 12
|
||||
};
|
||||
|
||||
static u8 sb_open_fine_column[] = {
|
||||
3, 4, 5, 7, 8, 9, 10, 11, 12, 13
|
||||
};
|
||||
|
||||
static int sb_bits(u64 addr, int nbits, u8 *bits)
|
||||
{
|
||||
int i, res = 0;
|
||||
|
||||
for (i = 0; i < nbits; i++)
|
||||
res |= ((addr >> bits[i]) & 1) << i;
|
||||
return res;
|
||||
}
|
||||
|
||||
static int sb_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1)
|
||||
{
|
||||
int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1);
|
||||
|
||||
if (do_xor)
|
||||
ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool sb_decode_ddr4(struct mem_ctl_info *mci, int ch, u8 rank,
|
||||
u64 rank_addr, char *msg)
|
||||
{
|
||||
int dimmno = 0;
|
||||
int row, col, bank_address, bank_group;
|
||||
struct sbridge_pvt *pvt;
|
||||
u32 bg0 = 0, rowbits = 0, colbits = 0;
|
||||
u32 amap_fine = 0, bank_xor_enable = 0;
|
||||
|
||||
dimmno = (rank < 12) ? rank / 4 : 2;
|
||||
pvt = mci->pvt_info;
|
||||
amap_fine = pvt->channel[ch].dimm[dimmno].amap_fine;
|
||||
bg0 = amap_fine ? 6 : 13;
|
||||
rowbits = pvt->channel[ch].dimm[dimmno].rowbits;
|
||||
colbits = pvt->channel[ch].dimm[dimmno].colbits;
|
||||
bank_xor_enable = pvt->channel[ch].dimm[dimmno].bank_xor_enable;
|
||||
|
||||
if (pvt->is_lockstep) {
|
||||
pr_warn_once("LockStep row/column decode is not supported yet!\n");
|
||||
msg[0] = '\0';
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pvt->is_close_pg) {
|
||||
row = sb_bits(rank_addr, rowbits, sb_close_row);
|
||||
col = sb_bits(rank_addr, colbits, sb_close_column);
|
||||
col |= 0x400; /* C10 is autoprecharge, always set */
|
||||
bank_address = sb_bank_bits(rank_addr, 8, 9, bank_xor_enable, 22, 28);
|
||||
bank_group = sb_bank_bits(rank_addr, 6, 7, bank_xor_enable, 20, 21);
|
||||
} else {
|
||||
row = sb_bits(rank_addr, rowbits, sb_open_row);
|
||||
if (amap_fine)
|
||||
col = sb_bits(rank_addr, colbits, sb_open_fine_column);
|
||||
else
|
||||
col = sb_bits(rank_addr, colbits, sb_open_column);
|
||||
bank_address = sb_bank_bits(rank_addr, 18, 19, bank_xor_enable, 22, 23);
|
||||
bank_group = sb_bank_bits(rank_addr, bg0, 17, bank_xor_enable, 20, 21);
|
||||
}
|
||||
|
||||
row &= (1u << rowbits) - 1;
|
||||
|
||||
sprintf(msg, "row:0x%x col:0x%x bank_addr:%d bank_group:%d",
|
||||
row, col, bank_address, bank_group);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool sb_decode_ddr3(struct mem_ctl_info *mci, int ch, u8 rank,
|
||||
u64 rank_addr, char *msg)
|
||||
{
|
||||
pr_warn_once("DDR3 row/column decode not support yet!\n");
|
||||
msg[0] = '\0';
|
||||
return false;
|
||||
}
|
||||
|
||||
static int get_memory_error_data(struct mem_ctl_info *mci,
|
||||
u64 addr,
|
||||
u8 *socket, u8 *ha,
|
||||
@ -1937,12 +2043,13 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
|
||||
int interleave_mode, shiftup = 0;
|
||||
unsigned int sad_interleave[MAX_INTERLEAVE];
|
||||
u32 reg, dram_rule;
|
||||
u8 ch_way, sck_way, pkg, sad_ha = 0;
|
||||
u8 ch_way, sck_way, pkg, sad_ha = 0, rankid = 0;
|
||||
u32 tad_offset;
|
||||
u32 rir_way;
|
||||
u32 mb, gb;
|
||||
u64 ch_addr, offset, limit = 0, prv = 0;
|
||||
|
||||
u64 rank_addr;
|
||||
enum mem_type mtype;
|
||||
|
||||
/*
|
||||
* Step 0) Check if the address is at special memory ranges
|
||||
@ -2226,6 +2333,28 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
|
||||
pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], ®);
|
||||
*rank = RIR_RNK_TGT(pvt->info.type, reg);
|
||||
|
||||
if (pvt->info.type == BROADWELL) {
|
||||
if (pvt->is_close_pg)
|
||||
shiftup = 6;
|
||||
else
|
||||
shiftup = 13;
|
||||
|
||||
rank_addr = ch_addr >> shiftup;
|
||||
rank_addr /= (1 << rir_way);
|
||||
rank_addr <<= shiftup;
|
||||
rank_addr |= ch_addr & GENMASK_ULL(shiftup - 1, 0);
|
||||
rank_addr -= RIR_OFFSET(pvt->info.type, reg);
|
||||
|
||||
mtype = pvt->info.get_memory_type(pvt);
|
||||
rankid = *rank;
|
||||
if (mtype == MEM_DDR4 || mtype == MEM_RDDR4)
|
||||
sb_decode_ddr4(mci, base_ch, rankid, rank_addr, msg);
|
||||
else
|
||||
sb_decode_ddr3(mci, base_ch, rankid, rank_addr, msg);
|
||||
} else {
|
||||
msg[0] = '\0';
|
||||
}
|
||||
|
||||
edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
|
||||
n_rir,
|
||||
ch_addr,
|
||||
@ -2950,7 +3079,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
|
||||
struct mem_ctl_info *new_mci;
|
||||
struct sbridge_pvt *pvt = mci->pvt_info;
|
||||
enum hw_event_mc_err_type tp_event;
|
||||
char *optype, msg[256];
|
||||
char *optype, msg[256], msg_full[512];
|
||||
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
|
||||
bool overflow = GET_BITFIELD(m->status, 62, 62);
|
||||
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
|
||||
@ -3089,18 +3218,17 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
|
||||
*/
|
||||
if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg)
|
||||
channel = first_channel;
|
||||
|
||||
snprintf(msg, sizeof(msg),
|
||||
"%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d",
|
||||
snprintf(msg_full, sizeof(msg_full),
|
||||
"%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d %s",
|
||||
overflow ? " OVERFLOW" : "",
|
||||
(uncorrected_error && recoverable) ? " recoverable" : "",
|
||||
area_type,
|
||||
mscod, errcode,
|
||||
socket, ha,
|
||||
channel_mask,
|
||||
rank);
|
||||
rank, msg);
|
||||
|
||||
edac_dbg(0, "%s\n", msg);
|
||||
edac_dbg(0, "%s\n", msg_full);
|
||||
|
||||
/* FIXME: need support for channel mask */
|
||||
|
||||
@ -3111,7 +3239,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
|
||||
edac_mc_handle_error(tp_event, mci, core_err_cnt,
|
||||
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
|
||||
channel, dimm, -1,
|
||||
optype, msg);
|
||||
optype, msg_full);
|
||||
return;
|
||||
err_parsing:
|
||||
edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
|
||||
|
@ -714,8 +714,13 @@ static int __init skx_init(void)
|
||||
|
||||
skx_set_decode(skx_decode, skx_show_retry_rd_err_log);
|
||||
|
||||
if (nvdimm_count && skx_adxl_get() == -ENODEV)
|
||||
skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
|
||||
if (nvdimm_count && skx_adxl_get() != -ENODEV) {
|
||||
skx_set_decode(NULL, skx_show_retry_rd_err_log);
|
||||
} else {
|
||||
if (nvdimm_count)
|
||||
skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
|
||||
skx_set_decode(skx_decode, skx_show_retry_rd_err_log);
|
||||
}
|
||||
|
||||
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
|
||||
opstate_init();
|
||||
|
@ -27,9 +27,11 @@ static const char * const component_names[] = {
|
||||
[INDEX_MEMCTRL] = "MemoryControllerId",
|
||||
[INDEX_CHANNEL] = "ChannelId",
|
||||
[INDEX_DIMM] = "DimmSlotId",
|
||||
[INDEX_CS] = "ChipSelect",
|
||||
[INDEX_NM_MEMCTRL] = "NmMemoryControllerId",
|
||||
[INDEX_NM_CHANNEL] = "NmChannelId",
|
||||
[INDEX_NM_DIMM] = "NmDimmSlotId",
|
||||
[INDEX_NM_CS] = "NmChipSelect",
|
||||
};
|
||||
|
||||
static int component_indices[ARRAY_SIZE(component_names)];
|
||||
@ -40,7 +42,7 @@ static char *adxl_msg;
|
||||
static unsigned long adxl_nm_bitmap;
|
||||
|
||||
static char skx_msg[MSG_SIZE];
|
||||
static skx_decode_f skx_decode;
|
||||
static skx_decode_f driver_decode;
|
||||
static skx_show_retry_log_f skx_show_retry_rd_err_log;
|
||||
static u64 skx_tolm, skx_tohm;
|
||||
static LIST_HEAD(dev_edac_list);
|
||||
@ -139,10 +141,13 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
|
||||
(int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
|
||||
res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ?
|
||||
(int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
|
||||
res->cs = (adxl_nm_bitmap & BIT_NM_CS) ?
|
||||
(int)adxl_values[component_indices[INDEX_NM_CS]] : -1;
|
||||
} else {
|
||||
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
|
||||
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
|
||||
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
|
||||
res->cs = (int)adxl_values[component_indices[INDEX_CS]];
|
||||
}
|
||||
|
||||
if (res->imc > NUM_IMC - 1 || res->imc < 0) {
|
||||
@ -173,6 +178,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
|
||||
break;
|
||||
}
|
||||
|
||||
res->decoded_by_adxl = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -183,7 +190,7 @@ void skx_set_mem_cfg(bool mem_cfg_2lm)
|
||||
|
||||
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
|
||||
{
|
||||
skx_decode = decode;
|
||||
driver_decode = decode;
|
||||
skx_show_retry_rd_err_log = show_retry_log;
|
||||
}
|
||||
|
||||
@ -591,19 +598,19 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (adxl_component_count) {
|
||||
if (res->decoded_by_adxl) {
|
||||
len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
|
||||
overflow ? " OVERFLOW" : "",
|
||||
(uncorrected_error && recoverable) ? " recoverable" : "",
|
||||
mscod, errcode, adxl_msg);
|
||||
} else {
|
||||
len = snprintf(skx_msg, MSG_SIZE,
|
||||
"%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x",
|
||||
"%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x",
|
||||
overflow ? " OVERFLOW" : "",
|
||||
(uncorrected_error && recoverable) ? " recoverable" : "",
|
||||
mscod, errcode,
|
||||
res->socket, res->imc, res->rank,
|
||||
res->bank_group, res->bank_address, res->row, res->column);
|
||||
res->row, res->column, res->bank_address, res->bank_group);
|
||||
}
|
||||
|
||||
if (skx_show_retry_rd_err_log)
|
||||
@ -649,13 +656,14 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
|
||||
return NOTIFY_DONE;
|
||||
|
||||
memset(&res, 0, sizeof(res));
|
||||
res.mce = mce;
|
||||
res.addr = mce->addr;
|
||||
|
||||
if (adxl_component_count) {
|
||||
if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce)))
|
||||
/* Try driver decoder first */
|
||||
if (!(driver_decode && driver_decode(&res))) {
|
||||
/* Then try firmware decoder (ACPI DSM methods) */
|
||||
if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))))
|
||||
return NOTIFY_DONE;
|
||||
} else if (!skx_decode || !skx_decode(&res)) {
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
mci = res.dev->imc[res.imc].mci;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#define _SKX_COMM_EDAC_H
|
||||
|
||||
#include <linux/bits.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
#define MSG_SIZE 1024
|
||||
|
||||
@ -52,6 +53,9 @@
|
||||
#define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15)
|
||||
#define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i)
|
||||
|
||||
#define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15)
|
||||
#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
|
||||
|
||||
/*
|
||||
* Each cpu socket contains some pci devices that provide global
|
||||
* information, and also some that are local to each of the two
|
||||
@ -82,6 +86,7 @@ struct skx_dev {
|
||||
struct pci_dev *edev;
|
||||
u32 retry_rd_err_log_s;
|
||||
u32 retry_rd_err_log_d;
|
||||
u32 retry_rd_err_log_d2;
|
||||
struct skx_dimm {
|
||||
u8 close_pg;
|
||||
u8 bank_xor_enable;
|
||||
@ -108,18 +113,22 @@ enum {
|
||||
INDEX_MEMCTRL,
|
||||
INDEX_CHANNEL,
|
||||
INDEX_DIMM,
|
||||
INDEX_CS,
|
||||
INDEX_NM_FIRST,
|
||||
INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
|
||||
INDEX_NM_CHANNEL,
|
||||
INDEX_NM_DIMM,
|
||||
INDEX_NM_CS,
|
||||
INDEX_MAX
|
||||
};
|
||||
|
||||
#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
|
||||
#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
|
||||
#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
|
||||
#define BIT_NM_CS BIT_ULL(INDEX_NM_CS)
|
||||
|
||||
struct decoded_addr {
|
||||
struct mce *mce;
|
||||
struct skx_dev *dev;
|
||||
u64 addr;
|
||||
int socket;
|
||||
@ -129,6 +138,7 @@ struct decoded_addr {
|
||||
int sktways;
|
||||
int chanways;
|
||||
int dimm;
|
||||
int cs;
|
||||
int rank;
|
||||
int channel_rank;
|
||||
u64 rank_address;
|
||||
@ -136,6 +146,7 @@ struct decoded_addr {
|
||||
int column;
|
||||
int bank_address;
|
||||
int bank_group;
|
||||
bool decoded_by_adxl;
|
||||
};
|
||||
|
||||
struct res_config {
|
||||
@ -154,7 +165,12 @@ struct res_config {
|
||||
int sad_all_offset;
|
||||
/* Offsets of retry_rd_err_log registers */
|
||||
u32 *offsets_scrub;
|
||||
u32 *offsets_scrub_hbm0;
|
||||
u32 *offsets_scrub_hbm1;
|
||||
u32 *offsets_demand;
|
||||
u32 *offsets_demand2;
|
||||
u32 *offsets_demand_hbm0;
|
||||
u32 *offsets_demand_hbm1;
|
||||
};
|
||||
|
||||
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
|
||||
|
@ -37,7 +37,6 @@ int edac_workqueue_setup(void)
|
||||
|
||||
void edac_workqueue_teardown(void)
|
||||
{
|
||||
flush_workqueue(wq);
|
||||
destroy_workqueue(wq);
|
||||
wq = NULL;
|
||||
}
|
||||
|
@ -231,21 +231,21 @@ enum mem_type {
|
||||
#define MEM_FLAG_DDR BIT(MEM_DDR)
|
||||
#define MEM_FLAG_RDDR BIT(MEM_RDDR)
|
||||
#define MEM_FLAG_RMBS BIT(MEM_RMBS)
|
||||
#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
|
||||
#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
|
||||
#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
|
||||
#define MEM_FLAG_XDR BIT(MEM_XDR)
|
||||
#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
|
||||
#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
|
||||
#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3)
|
||||
#define MEM_FLAG_DDR4 BIT(MEM_DDR4)
|
||||
#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4)
|
||||
#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4)
|
||||
#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4)
|
||||
#define MEM_FLAG_DDR5 BIT(MEM_DDR5)
|
||||
#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5)
|
||||
#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5)
|
||||
#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM)
|
||||
#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
|
||||
#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
|
||||
#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
|
||||
#define MEM_FLAG_XDR BIT(MEM_XDR)
|
||||
#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
|
||||
#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
|
||||
#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3)
|
||||
#define MEM_FLAG_DDR4 BIT(MEM_DDR4)
|
||||
#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4)
|
||||
#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4)
|
||||
#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4)
|
||||
#define MEM_FLAG_DDR5 BIT(MEM_DDR5)
|
||||
#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5)
|
||||
#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5)
|
||||
#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM)
|
||||
#define MEM_FLAG_WIO2 BIT(MEM_WIO2)
|
||||
#define MEM_FLAG_HBM2 BIT(MEM_HBM2)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user