EDAC/skx_common: Use driver decoder first

The performance of driver decoder[1] is better than the performance
of firmware decoder[2], especially on frequent correctable errors.

So use the driver decoder first, fall back to firmware decoder if
the driver decoder is unavailable. Also rename the function pointer
skx_decode to driver_decode (better name to contrast with adxl_decode).

[1] Decode errors by extracting error information from registers of
    memory controllers and/or MCA bank registers.

[2] Decode errors by calling ACPI DSM methods.

Co-developed-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/all/20220901194310.115427-1-tony.luck@intel.com/
This commit is contained in:
Qiuxu Zhuo 2022-09-01 12:43:08 -07:00 committed by Tony Luck
parent 93df194765
commit fe32f36693
3 changed files with 17 additions and 9 deletions

View File

@ -714,8 +714,13 @@ static int __init skx_init(void)
skx_set_decode(skx_decode, skx_show_retry_rd_err_log); skx_set_decode(skx_decode, skx_show_retry_rd_err_log);
if (nvdimm_count && skx_adxl_get() == -ENODEV) if (nvdimm_count && skx_adxl_get() != -ENODEV) {
skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n"); skx_set_decode(NULL, skx_show_retry_rd_err_log);
} else {
if (nvdimm_count)
skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
skx_set_decode(skx_decode, skx_show_retry_rd_err_log);
}
/* Ensure that the OPSTATE is set correctly for POLL or NMI */ /* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init(); opstate_init();

View File

@ -40,7 +40,7 @@ static char *adxl_msg;
static unsigned long adxl_nm_bitmap; static unsigned long adxl_nm_bitmap;
static char skx_msg[MSG_SIZE]; static char skx_msg[MSG_SIZE];
static skx_decode_f skx_decode; static skx_decode_f driver_decode;
static skx_show_retry_log_f skx_show_retry_rd_err_log; static skx_show_retry_log_f skx_show_retry_rd_err_log;
static u64 skx_tolm, skx_tohm; static u64 skx_tolm, skx_tohm;
static LIST_HEAD(dev_edac_list); static LIST_HEAD(dev_edac_list);
@ -173,6 +173,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
break; break;
} }
res->decoded_by_adxl = true;
return true; return true;
} }
@ -183,7 +185,7 @@ void skx_set_mem_cfg(bool mem_cfg_2lm)
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
{ {
skx_decode = decode; driver_decode = decode;
skx_show_retry_rd_err_log = show_retry_log; skx_show_retry_rd_err_log = show_retry_log;
} }
@ -591,7 +593,7 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
break; break;
} }
} }
if (adxl_component_count) { if (res->decoded_by_adxl) {
len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
overflow ? " OVERFLOW" : "", overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "", (uncorrected_error && recoverable) ? " recoverable" : "",
@ -651,11 +653,11 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
memset(&res, 0, sizeof(res)); memset(&res, 0, sizeof(res));
res.addr = mce->addr; res.addr = mce->addr;
if (adxl_component_count) { /* Try driver decoder first */
if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))) if (!(driver_decode && driver_decode(&res))) {
/* Then try firmware decoder (ACPI DSM methods) */
if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))))
return NOTIFY_DONE; return NOTIFY_DONE;
} else if (!skx_decode || !skx_decode(&res)) {
return NOTIFY_DONE;
} }
mci = res.dev->imc[res.imc].mci; mci = res.dev->imc[res.imc].mci;

View File

@ -136,6 +136,7 @@ struct decoded_addr {
int column; int column;
int bank_address; int bank_address;
int bank_group; int bank_group;
bool decoded_by_adxl;
}; };
struct res_config { struct res_config {