x86/mce: Fix all mce notifiers to update the mce->kflags bitmask
If the handler took any action to log or deal with the error, set a bit in mce->kflags so that the default handler on the end of the machine check chain can see what has been done. Get rid of NOTIFY_STOP returns. Make the EDAC and dev-mcelog handlers skip over errors already processed by CEC. Signed-off-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Tested-by: Tony Luck <tony.luck@intel.com> Link: https://lkml.kernel.org/r/20200214222720.13168-5-tony.luck@intel.com
This commit is contained in:
parent
1de08dccd3
commit
23ba710a08
@ -581,8 +581,10 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
|
||||
return NOTIFY_DONE;
|
||||
|
||||
pfn = mce->addr >> PAGE_SHIFT;
|
||||
if (!memory_failure(pfn, 0))
|
||||
if (!memory_failure(pfn, 0)) {
|
||||
set_mce_nospec(pfn);
|
||||
mce->kflags |= MCE_HANDLED_UC;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
@ -39,6 +39,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
|
||||
struct mce *mce = (struct mce *)data;
|
||||
unsigned int entry;
|
||||
|
||||
if (mce->kflags & MCE_HANDLED_CEC)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
mutex_lock(&mce_chrdev_read_mutex);
|
||||
|
||||
entry = mcelog->next;
|
||||
@ -56,6 +59,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
|
||||
|
||||
memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
|
||||
mcelog->entry[entry].finished = 1;
|
||||
mcelog->entry[entry].kflags = 0;
|
||||
|
||||
/* wake processes polling /dev/mcelog */
|
||||
wake_up_interruptible(&mce_chrdev_wait);
|
||||
@ -63,6 +67,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
|
||||
unlock:
|
||||
mutex_unlock(&mce_chrdev_read_mutex);
|
||||
|
||||
mce->kflags |= MCE_HANDLED_MCELOG;
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
|
@ -146,7 +146,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
|
||||
static u32 err_seq;
|
||||
|
||||
estatus = extlog_elog_entry_check(cpu, bank);
|
||||
if (estatus == NULL)
|
||||
if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
|
||||
@ -176,7 +176,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
|
||||
}
|
||||
|
||||
out:
|
||||
return NOTIFY_STOP;
|
||||
mce->kflags |= MCE_HANDLED_EXTLOG;
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static bool __init extlog_get_l1addr(void)
|
||||
|
@ -76,6 +76,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
|
||||
*/
|
||||
acpi_nfit_ars_rescan(acpi_desc, 0);
|
||||
}
|
||||
mce->kflags |= MCE_HANDLED_NFIT;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1815,7 +1815,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
|
||||
struct mem_ctl_info *mci;
|
||||
|
||||
i7_dev = get_i7core_dev(mce->socketid);
|
||||
if (!i7_dev)
|
||||
if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
mci = i7_dev->mci;
|
||||
@ -1834,7 +1834,8 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
|
||||
i7core_check_error(mci, mce);
|
||||
|
||||
/* Advise mcelog that the errors were handled */
|
||||
return NOTIFY_STOP;
|
||||
mce->kflags |= MCE_HANDLED_EDAC;
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block i7_mce_dec = {
|
||||
|
@ -1046,6 +1046,9 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
||||
unsigned int fam = x86_family(m->cpuid);
|
||||
int ecc;
|
||||
|
||||
if (m->kflags & MCE_HANDLED_CEC)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
pr_emerg(HW_ERR "%s\n", decode_error_status(m));
|
||||
|
||||
pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
|
||||
@ -1146,7 +1149,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
|
||||
err_code:
|
||||
amd_decode_err_code(m->status & 0xffff);
|
||||
|
||||
return NOTIFY_STOP;
|
||||
m->kflags |= MCE_HANDLED_EDAC;
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block amd_mce_dec_nb = {
|
||||
|
@ -1400,7 +1400,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
|
||||
return NOTIFY_DONE;
|
||||
|
||||
mci = pnd2_mci;
|
||||
if (!mci)
|
||||
if (!mci || (mce->kflags & MCE_HANDLED_CEC))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/*
|
||||
@ -1429,7 +1429,8 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
|
||||
pnd2_mce_output_error(mci, mce, &daddr);
|
||||
|
||||
/* Advice mcelog that the error were handled */
|
||||
return NOTIFY_STOP;
|
||||
mce->kflags |= MCE_HANDLED_EDAC;
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block pnd2_mce_dec = {
|
||||
|
@ -3136,6 +3136,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
|
||||
|
||||
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
|
||||
return NOTIFY_DONE;
|
||||
if (mce->kflags & MCE_HANDLED_CEC)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/*
|
||||
* Just let mcelog handle it if the error is
|
||||
@ -3183,7 +3185,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
|
||||
sbridge_mce_output_error(mci, mce);
|
||||
|
||||
/* Advice mcelog that the error were handled */
|
||||
return NOTIFY_STOP;
|
||||
mce->kflags |= MCE_HANDLED_EDAC;
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block sbridge_mce_dec = {
|
||||
|
@ -577,6 +577,9 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
|
||||
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (mce->kflags & MCE_HANDLED_CEC)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
/* ignore unless this is memory related with an address */
|
||||
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
|
||||
return NOTIFY_DONE;
|
||||
@ -616,6 +619,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
|
||||
|
||||
skx_mce_output_error(mci, mce, &res);
|
||||
|
||||
mce->kflags |= MCE_HANDLED_EDAC;
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
|
@ -538,9 +538,12 @@ static int cec_notifier(struct notifier_block *nb, unsigned long val,
|
||||
/* We eat only correctable DRAM errors with usable addresses. */
|
||||
if (mce_is_memory_error(m) &&
|
||||
mce_is_correctable(m) &&
|
||||
mce_usable_address(m))
|
||||
if (!cec_add_elem(m->addr >> PAGE_SHIFT))
|
||||
return NOTIFY_STOP;
|
||||
mce_usable_address(m)) {
|
||||
if (!cec_add_elem(m->addr >> PAGE_SHIFT)) {
|
||||
m->kflags |= MCE_HANDLED_CEC;
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
}
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user