From fca6116564181a76c32bf89a0452585f5cb10004 Mon Sep 17 00:00:00 2001 From: Len Baker Date: Fri, 3 Sep 2021 17:05:39 +0200 Subject: [PATCH 1/6] EDAC/mc: Replace strcpy(), sprintf() and snprintf() with strscpy() or scnprintf() strcpy() performs no bounds checking on the destination buffer. This could result in linear overflows beyond the end of the buffer, leading to all kinds of misbehavior. The safe replacement is strscpy(). [1][2] However, to simplify and clarify the code, to concatenate labels use the scnprintf() function. This way it is not necessary to check the return value of strscpy() (-E2BIG if the parameter count is 0 or the src was truncated) since scnprintf() always returns the number of chars written into the buffer. This function always returns a nul-terminated string even if it needs to be truncated. While at it, fix all other broken string generation code that wrongly interprets snprintf()'s return code or just uses sprintf(), implement that using scnprintf() here too. Drop breaks in loops around scnprintf() as it is safe now to loop. Moreover, the check is not needed: for the case when the buffer is exhausted, len never gets zero because scnprintf() takes the full buffer length as input parameter, but excludes the trailing '\0' in its return code and thus, 1 is the minimum len. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy [2] https://github.com/KSPP/linux/issues/88 [ rric: Replace snprintf() with scnprintf(), rework sprintf() user, drop breaks in loops around scnprintf(), introduce 'end' pointer to reduce pointer arithmetic, use prefix pattern for e->location, adjust subject and description ] Co-developed-by: Joe Perches Signed-off-by: Joe Perches Signed-off-by: Len Baker Signed-off-by: Robert Richter Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210903150539.7282-1-len.baker@gmx.com --- drivers/edac/edac_mc.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 2c5975674723..9f82ca295353 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -66,14 +66,12 @@ unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf, char *p = buf; for (i = 0; i < mci->n_layers; i++) { - n = snprintf(p, len, "%s %d ", + n = scnprintf(p, len, "%s %d ", edac_layer_name[mci->layers[i].type], dimm->location[i]); p += n; len -= n; count += n; - if (!len) - break; } return count; @@ -341,19 +339,16 @@ static int edac_mc_alloc_dimms(struct mem_ctl_info *mci) */ len = sizeof(dimm->label); p = dimm->label; - n = snprintf(p, len, "mc#%u", mci->mc_idx); + n = scnprintf(p, len, "mc#%u", mci->mc_idx); p += n; len -= n; for (layer = 0; layer < mci->n_layers; layer++) { - n = snprintf(p, len, "%s#%u", - edac_layer_name[mci->layers[layer].type], - pos[layer]); + n = scnprintf(p, len, "%s#%u", + edac_layer_name[mci->layers[layer].type], + pos[layer]); p += n; len -= n; dimm->location[layer] = pos[layer]; - - if (len <= 0) - break; } /* Link it to the csrows old API data */ @@ -1027,12 +1022,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *other_detail) { struct dimm_info *dimm; - char *p; + char *p, *end; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; int i, n_labels = 0; struct edac_raw_error_desc *e = &mci->error_desc; bool any_memory = true; + const char *prefix; edac_dbg(3, "MC%d\n", mci->mc_idx); @@ -1087,6 +1083,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, */ p = e->label; *p = '\0'; + end = p + sizeof(e->label); + prefix = ""; mci_for_each_dimm(mci, dimm) { if (top_layer >= 0 && top_layer != dimm->location[0]) @@ -1114,12 +1112,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, p = e->label; *p = '\0'; } else { - if (p != e->label) { - strcpy(p, OTHER_LABEL); - p += strlen(OTHER_LABEL); - } - strcpy(p, dimm->label); - p += strlen(p); + p += scnprintf(p, end - p, "%s%s", prefix, dimm->label); + prefix = OTHER_LABEL; } /* @@ -1141,25 +1135,25 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } if (any_memory) - strcpy(e->label, "any memory"); + strscpy(e->label, "any memory", sizeof(e->label)); else if (!*e->label) - strcpy(e->label, "unknown memory"); + strscpy(e->label, "unknown memory", sizeof(e->label)); edac_inc_csrow(e, row, chan); /* Fill the RAM location data */ p = e->location; + end = p + sizeof(e->location); + prefix = ""; for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) continue; - p += sprintf(p, "%s:%d ", - edac_layer_name[mci->layers[i].type], - pos[i]); + p += scnprintf(p, end - p, "%s%s:%d", prefix, + edac_layer_name[mci->layers[i].type], pos[i]); + prefix = " "; } - if (p > e->location) - *(p - 1) = '\0'; edac_raw_mc_handle_error(e); } From 470b52564cceef62e982283cafbada41ff47903b Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Wed, 22 Sep 2021 20:59:23 +0800 Subject: [PATCH 2/6] EDAC/al_mc: Make use of the helper function devm_add_action_or_reset() The helper function devm_add_action_or_reset() will internally call devm_add_action(), and if devm_add_action() fails then it will execute the action mentioned and return the error code. So use devm_add_action_or_reset() instead of devm_add_action() to simplify the error handling, reduce the code. Signed-off-by: Cai Huoqing Signed-off-by: Borislav Petkov Acked-by: Talel Shenhar Link: https://lkml.kernel.org/r/20210922125924.321-1-caihuoqing@baidu.com --- drivers/edac/al_mc_edac.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/edac/al_mc_edac.c b/drivers/edac/al_mc_edac.c index 7d4f396c27b5..178b9e581a72 100644 --- a/drivers/edac/al_mc_edac.c +++ b/drivers/edac/al_mc_edac.c @@ -238,11 +238,9 @@ static int al_mc_edac_probe(struct platform_device *pdev) if (!mci) return -ENOMEM; - ret = devm_add_action(&pdev->dev, devm_al_mc_edac_free, mci); - if (ret) { - edac_mc_free(mci); + ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_free, mci); + if (ret) return ret; - } platform_set_drvdata(pdev, mci); al_mc = mci->pvt_info; @@ -293,11 +291,9 @@ static int al_mc_edac_probe(struct platform_device *pdev) return ret; } - ret = devm_add_action(&pdev->dev, devm_al_mc_edac_del, &pdev->dev); - if (ret) { - edac_mc_del_mc(&pdev->dev); + ret = devm_add_action_or_reset(&pdev->dev, devm_al_mc_edac_del, &pdev->dev); + if (ret) return ret; - } if (al_mc->irq_ue > 0) { ret = devm_request_irq(&pdev->dev, From 34417f27b9fbdf043207c8518374ac84dc7cdfc9 Mon Sep 17 00:00:00 2001 From: Eric Badger Date: Sun, 3 Oct 2021 11:16:53 -0700 Subject: [PATCH 3/6] EDAC/mc_sysfs: Print MC-scope sysfs counters unsigned This is cosmetically nicer for counts > INT32_MAX, and aligns the MC-scope format with that of the lower layer sysfs counter files. Signed-off-by: Eric Badger Signed-off-by: Borislav Petkov Acked-by: Tony Luck Link: https://lkml.kernel.org/r/20211003181653.GA685515@ebadger-ThinkPad-T590 --- drivers/edac/edac_mc_sysfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 2f9f1e74bb35..0a638c97702a 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -744,7 +744,7 @@ static ssize_t mci_ue_count_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%d\n", mci->ue_mc); + return sprintf(data, "%u\n", mci->ue_mc); } static ssize_t mci_ce_count_show(struct device *dev, @@ -753,7 +753,7 @@ static ssize_t mci_ce_count_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%d\n", mci->ce_mc); + return sprintf(data, "%u\n", mci->ce_mc); } static ssize_t mci_ce_noinfo_show(struct device *dev, @@ -762,7 +762,7 @@ static ssize_t mci_ce_noinfo_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%d\n", mci->ce_noinfo_count); + return sprintf(data, "%u\n", mci->ce_noinfo_count); } static ssize_t mci_ue_noinfo_show(struct device *dev, @@ -771,7 +771,7 @@ static ssize_t mci_ue_noinfo_show(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); - return sprintf(data, "%d\n", mci->ue_noinfo_count); + return sprintf(data, "%u\n", mci->ue_noinfo_count); } static ssize_t mci_seconds_show(struct device *dev, From 9f4873fb6af7966de8fcbd95c36b61351c1c4b1f Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 5 Oct 2021 15:44:19 +0000 Subject: [PATCH 4/6] EDAC/amd64: Handle three rank interleaving mode AMD Rome systems and later support interleaving between three identical ranks within a channel. Check for this mode by counting the number of enabled chip selects and comparing their masks. If there are exactly three enabled chip selects and their masks are identical, then three rank interleaving is enabled. The size of a rank is determined from its mask value. However, three rank interleaving doesn't follow the method of swapping an interleave bit with the most significant bit. Rather, the interleave bit is flipped and the most significant bit remains the same. There is only a single interleave bit in this case. Account for this when determining the chip select size by keeping the most significant bit at its original value and ignoring any zero bits. This will return a full bitmask in [MSB:1]. Fixes: e53a3b267fb0 ("EDAC/amd64: Find Chip Select memory size using Address Mask") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211005154419.2060504-1-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 99b06a3e8fb1..4fce75013674 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1065,12 +1065,14 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) #define CS_ODD_PRIMARY BIT(1) #define CS_EVEN_SECONDARY BIT(2) #define CS_ODD_SECONDARY BIT(3) +#define CS_3R_INTERLEAVE BIT(4) #define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) #define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { + u8 base, count = 0; int cs_mode = 0; if (csrow_enabled(2 * dimm, ctrl, pvt)) @@ -1083,6 +1085,20 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_SECONDARY; + /* + * 3 Rank inteleaving support. + * There should be only three bases enabled and their two masks should + * be equal. + */ + for_each_chip_select(base, ctrl, pvt) + count += csrow_enabled(base, ctrl, pvt); + + if (count == 3 && + pvt->csels[ctrl].csmasks[0] == pvt->csels[ctrl].csmasks[1]) { + edac_dbg(1, "3R interleaving in use.\n"); + cs_mode |= CS_3R_INTERLEAVE; + } + return cs_mode; } @@ -1891,10 +1907,14 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, * * The MSB is the number of bits in the full mask because BIT[0] is * always 0. + * + * In the special 3 Rank interleaving case, a single bit is flipped + * without swapping with the most significant bit. This can be handled + * by keeping the MSB where it is and ignoring the single zero bit. */ msb = fls(addr_mask_orig) - 1; weight = hweight_long(addr_mask_orig); - num_zero_bits = msb - weight; + num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); /* Take the number of zero bits off from the top of the mask. */ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); From 0b6d4ab2165c46c7f236744a2d05264f40172ae9 Mon Sep 17 00:00:00 2001 From: Tang Bin Date: Wed, 11 Aug 2021 19:26:26 +0800 Subject: [PATCH 5/6] EDAC/ti: Remove redundant error messages In the function ti_edac_probe(), devm_ioremap_resource() and platform_get_irq() already issue error messages when they fail so remove the redundant error messages in the EDAC driver. Signed-off-by: Tang Bin Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210811112626.27848-1-tangbin@cmss.chinamobile.com --- drivers/edac/ti_edac.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c index 169f96e51c29..6971ded598de 100644 --- a/drivers/edac/ti_edac.c +++ b/drivers/edac/ti_edac.c @@ -245,11 +245,8 @@ static int ti_edac_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); reg = devm_ioremap_resource(dev, res); - if (IS_ERR(reg)) { - edac_printk(KERN_ERR, EDAC_MOD_NAME, - "EMIF controller regs not defined\n"); + if (IS_ERR(reg)) return PTR_ERR(reg); - } layers[0].type = EDAC_MC_LAYER_ALL_MEM; layers[0].size = 1; @@ -281,8 +278,6 @@ static int ti_edac_probe(struct platform_device *pdev) error_irq = platform_get_irq(pdev, 0); if (error_irq < 0) { ret = error_irq; - edac_printk(KERN_ERR, EDAC_MOD_NAME, - "EMIF irq number not defined.\n"); goto err; } From 537bddd069c743759addf422d0b8f028ff0f8dbc Mon Sep 17 00:00:00 2001 From: Eric Badger Date: Sun, 10 Oct 2021 10:06:56 -0700 Subject: [PATCH 6/6] EDAC/sb_edac: Fix top-of-high-memory value for Broadwell/Haswell The computation of TOHM is off by one bit. This missed bit results in too low a value for TOHM, which can cause errors in regular memory to incorrectly report: EDAC MC0: 1 CE Error at MMIOH area, on addr 0x000000207fffa680 on any memory Fixes: 50d1bb93672f ("sb_edac: add support for Haswell based systems") Cc: stable@vger.kernel.org Reported-by: Meeta Saggi Signed-off-by: Eric Badger Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20211010170127.848113-1-ebadger@purestorage.com --- drivers/edac/sb_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 4c626fcd4dcb..1522d4aa2ca6 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1052,7 +1052,7 @@ static u64 haswell_get_tohm(struct sbridge_pvt *pvt) pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOHM_1, ®); rc = ((reg << 6) | rc) << 26; - return rc | 0x1ffffff; + return rc | 0x3ffffff; } static u64 knl_get_tolm(struct sbridge_pvt *pvt)