Intel EDAC fixes:
- Old igen6 driver could lose pending events during initialization - Sapphire Rapids workstations have fewer memory controllers than their bigger siblings. This confused the driver. -----BEGIN PGP SIGNATURE----- iQJIBAABCAAyFiEENIoOqscayAmBOQ5Iq6sjH5ffWIEFAmTudg4UHHRvbnkubHVj a0BpbnRlbC5jb20ACgkQq6sjH5ffWIH4wA//Z+pbRElvnWyK8rTx6SbWFu82D8a/ dAXx5V+8I6v64MPb9VZXP6KEiBQgk2jD2AsC0+2QrZL9FUnKwnBSDC3rgVWPTxBo dTxu8j1PDTlnffU+wuaB+3cCRikwa1h+Fr/SQaphwTLA3nm13CHj+dUOp3ZUR8fT vz+M4t3SRgcU/0W40jcLnn1h5hsTNjQWr//zVVdctGr++sl7xtVh7wxZPakTC9RL FBMx3elqdroeQ5ILMxC5e1V02tAZVrXxZbSNpLWhH25MBwe8P7rc+SHYfNaddnpx 3qrOOzRZl3fGifoM+GU/JsMeIYh6FYUhOfBNTjUFWQZP+6mDvgj9WaLxVgw9V99R W384K7KnjLSnE01/REZ0x9R1sehXyQIv2zGosJitRuKyLuw5UODx/khzpCG6a0P3 RPi4tNemscCIr5djX8VBqmyxS5tqUzlBBDskDnsHHS7NXLuYv1O6SqR/7kvCqhFQ 7/qGWNFbzZOMJZiLGUmmxEv3Pk+tfTlZdYOipfaHpSlNNr9zO07VXBRNK18aqQVp 3GCpRp3IhTL3EmOE2RaV2uhyRIcpSnjvqi8shoN6p1wy8jQwNKoe3/nt7QobKhCl 4kYC9q0jNDWgh/QWxgtoB6UzWHIieeVZQQcW0Da4fvlsIBwbzcpu5+j3qaCxUNBD jUt/DwSD+D91yPI= =4Uqu -----END PGP SIGNATURE----- Merge tag 'edac_updates_for_v6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras Pull intel EDAC fixes from Tony Luck: - Old igen6 driver could lose pending events during initialization - Sapphire Rapids workstations have fewer memory controllers than their bigger siblings. This confused the driver. * tag 'edac_updates_for_v6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: EDAC/igen6: Fix the issue of no error events EDAC/i10nm: Skip the absent memory controllers
This commit is contained in:
commit
bb511d4b25
@ -658,13 +658,49 @@ static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsi
|
||||
return mdev;
|
||||
}
|
||||
|
||||
/**
|
||||
* i10nm_imc_absent() - Check whether the memory controller @imc is absent
|
||||
*
|
||||
* @imc : The pointer to the structure of memory controller EDAC device.
|
||||
*
|
||||
* RETURNS : true if the memory controller EDAC device is absent, false otherwise.
|
||||
*/
|
||||
static bool i10nm_imc_absent(struct skx_imc *imc)
|
||||
{
|
||||
u32 mcmtr;
|
||||
int i;
|
||||
|
||||
switch (res_cfg->type) {
|
||||
case SPR:
|
||||
for (i = 0; i < res_cfg->ddr_chan_num; i++) {
|
||||
mcmtr = I10NM_GET_MCMTR(imc, i);
|
||||
edac_dbg(1, "ch%d mcmtr reg %x\n", i, mcmtr);
|
||||
if (mcmtr != ~0)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some workstations' absent memory controllers still
|
||||
* appear as PCIe devices, misleading the EDAC driver.
|
||||
* By observing that the MMIO registers of these absent
|
||||
* memory controllers consistently hold the value of ~0.
|
||||
*
|
||||
* We identify a memory controller as absent by checking
|
||||
* if its MMIO register "mcmtr" == ~0 in all its channels.
|
||||
*/
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static int i10nm_get_ddr_munits(void)
|
||||
{
|
||||
struct pci_dev *mdev;
|
||||
void __iomem *mbase;
|
||||
unsigned long size;
|
||||
struct skx_dev *d;
|
||||
int i, j = 0;
|
||||
int i, lmc, j = 0;
|
||||
u32 reg, off;
|
||||
u64 base;
|
||||
|
||||
@ -690,7 +726,7 @@ static int i10nm_get_ddr_munits(void)
|
||||
edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
|
||||
j++, base, reg);
|
||||
|
||||
for (i = 0; i < res_cfg->ddr_imc_num; i++) {
|
||||
for (lmc = 0, i = 0; i < res_cfg->ddr_imc_num; i++) {
|
||||
mdev = get_ddr_munit(d, i, &off, &size);
|
||||
|
||||
if (i == 0 && !mdev) {
|
||||
@ -700,8 +736,6 @@ static int i10nm_get_ddr_munits(void)
|
||||
if (!mdev)
|
||||
continue;
|
||||
|
||||
d->imc[i].mdev = mdev;
|
||||
|
||||
edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
|
||||
i, base + off, size, reg);
|
||||
|
||||
@ -712,7 +746,17 @@ static int i10nm_get_ddr_munits(void)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
d->imc[i].mbase = mbase;
|
||||
d->imc[lmc].mbase = mbase;
|
||||
if (i10nm_imc_absent(&d->imc[lmc])) {
|
||||
pci_dev_put(mdev);
|
||||
iounmap(mbase);
|
||||
d->imc[lmc].mbase = NULL;
|
||||
edac_dbg(2, "Skip absent mc%d\n", i);
|
||||
continue;
|
||||
} else {
|
||||
d->imc[lmc].mdev = mdev;
|
||||
lmc++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,7 +27,7 @@
|
||||
#include "edac_mc.h"
|
||||
#include "edac_module.h"
|
||||
|
||||
#define IGEN6_REVISION "v2.5"
|
||||
#define IGEN6_REVISION "v2.5.1"
|
||||
|
||||
#define EDAC_MOD_STR "igen6_edac"
|
||||
#define IGEN6_NMI_NAME "igen6_ibecc"
|
||||
@ -1216,9 +1216,6 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
INIT_WORK(&ecclog_work, ecclog_work_cb);
|
||||
init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);
|
||||
|
||||
/* Check if any pending errors before registering the NMI handler */
|
||||
ecclog_handler();
|
||||
|
||||
rc = register_err_handler();
|
||||
if (rc)
|
||||
goto fail3;
|
||||
@ -1230,6 +1227,9 @@ static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
goto fail4;
|
||||
}
|
||||
|
||||
/* Check if any pending errors before/during the registration of the error handler */
|
||||
ecclog_handler();
|
||||
|
||||
igen6_debug_setup();
|
||||
return 0;
|
||||
fail4:
|
||||
|
Loading…
x
Reference in New Issue
Block a user