From 6ca53a75c425cb1cb0929ce07f58d9e41094500c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 9 Sep 2022 11:20:21 +0200 Subject: [PATCH 001/328] dt-bindings: hwlock: qcom-hwspinlock: add support for MMIO on older SoCs Older Qualcomm SoCs have TCSR mutex registers with 0x80 stride, instead of 0x1000. Add dedicated compatibles for such case. Unfortunately the binding started using a generic "qcom,tcsr-mutex" compatible without specifying the SoC part, thus it looks now quite inconsistent. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220909092035.223915-2-krzysztof.kozlowski@linaro.org --- .../bindings/hwlock/qcom-hwspinlock.yaml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml b/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml index 1c7149f7d171..de98b961fb38 100644 --- a/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml +++ b/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml @@ -15,9 +15,18 @@ description: properties: compatible: - enum: - - qcom,sfpb-mutex - - qcom,tcsr-mutex + oneOf: + - enum: + - qcom,sfpb-mutex + - qcom,tcsr-mutex + - items: + - enum: + - qcom,apq8084-tcsr-mutex + - qcom,ipq6018-tcsr-mutex + - qcom,msm8226-tcsr-mutex + - qcom,msm8974-tcsr-mutex + - qcom,msm8994-tcsr-mutex + - const: qcom,tcsr-mutex reg: maxItems: 1 From 276a4f1a5fb118bfd6980d42732d530e43f2916a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 9 Sep 2022 11:20:22 +0200 Subject: [PATCH 002/328] dt-bindings: hwlock: qcom-hwspinlock: correct example indentation Use some consistent indentation (4-space) for DTS example. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220909092035.223915-3-krzysztof.kozlowski@linaro.org --- .../devicetree/bindings/hwlock/qcom-hwspinlock.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml b/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml index de98b961fb38..1a3adf75934b 100644 --- a/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml +++ b/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml @@ -43,9 +43,9 @@ additionalProperties: false examples: - | - tcsr_mutex: hwlock@1f40000 { - compatible = "qcom,tcsr-mutex"; - reg = <0x01f40000 0x40000>; - #hwlock-cells = <1>; - }; + hwlock@1f40000 { + compatible = "qcom,tcsr-mutex"; + reg = <0x01f40000 0x40000>; + #hwlock-cells = <1>; + }; ... From 90cb380f9ceb811059340d06ff5fd0c0e93ecbe1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 9 Sep 2022 11:20:23 +0200 Subject: [PATCH 003/328] hwspinlock: qcom: correct MMIO max register for newer SoCs Newer ARMv8 Qualcomm SoCs using 0x1000 register stride have maximum register 0x20000 (32 mutexes * 0x1000). Fixes: 7a1e6fb1c606 ("hwspinlock: qcom: Allow mmio usage in addition to syscon") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220909092035.223915-4-krzysztof.kozlowski@linaro.org --- drivers/hwspinlock/qcom_hwspinlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwspinlock/qcom_hwspinlock.c b/drivers/hwspinlock/qcom_hwspinlock.c index 80ea45b3a815..9734e149d981 100644 --- a/drivers/hwspinlock/qcom_hwspinlock.c +++ b/drivers/hwspinlock/qcom_hwspinlock.c @@ -121,7 +121,7 @@ static const struct regmap_config tcsr_mutex_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0x40000, + .max_register = 0x20000, .fast_io = true, }; From 5d4753f741d824e04e7ba46f46ec016be120f383 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 9 Sep 2022 11:20:24 +0200 Subject: [PATCH 004/328] hwspinlock: qcom: add support for MMIO on older SoCs Older Qualcomm SoCs have TCSR mutex registers with 0x80 stride, instead of 0x1000. Add dedicated compatibles and regmap for such case. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220909092035.223915-5-krzysztof.kozlowski@linaro.org --- drivers/hwspinlock/qcom_hwspinlock.c | 42 +++++++++++++++++++++------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/hwspinlock/qcom_hwspinlock.c b/drivers/hwspinlock/qcom_hwspinlock.c index 9734e149d981..9cf186362ae2 100644 --- a/drivers/hwspinlock/qcom_hwspinlock.c +++ b/drivers/hwspinlock/qcom_hwspinlock.c @@ -22,6 +22,7 @@ struct qcom_hwspinlock_of_data { u32 offset; u32 stride; + const struct regmap_config *regmap_config; }; static int qcom_hwspinlock_trylock(struct hwspinlock *lock) @@ -73,15 +74,42 @@ static const struct qcom_hwspinlock_of_data of_sfpb_mutex = { .stride = 0x4, }; -/* All modern platform has offset 0 and stride of 4k */ +static const struct regmap_config tcsr_msm8226_mutex_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .max_register = 0x1000, + .fast_io = true, +}; + +static const struct qcom_hwspinlock_of_data of_msm8226_tcsr_mutex = { + .offset = 0, + .stride = 0x80, + .regmap_config = &tcsr_msm8226_mutex_config, +}; + +static const struct regmap_config tcsr_mutex_config = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .max_register = 0x20000, + .fast_io = true, +}; + static const struct qcom_hwspinlock_of_data of_tcsr_mutex = { .offset = 0, .stride = 0x1000, + .regmap_config = &tcsr_mutex_config, }; static const struct of_device_id qcom_hwspinlock_of_match[] = { { .compatible = "qcom,sfpb-mutex", .data = &of_sfpb_mutex }, { .compatible = "qcom,tcsr-mutex", .data = &of_tcsr_mutex }, + { .compatible = "qcom,apq8084-tcsr-mutex", .data = &of_msm8226_tcsr_mutex }, + { .compatible = "qcom,ipq6018-tcsr-mutex", .data = &of_msm8226_tcsr_mutex }, + { .compatible = "qcom,msm8226-tcsr-mutex", .data = &of_msm8226_tcsr_mutex }, + { .compatible = "qcom,msm8974-tcsr-mutex", .data = &of_msm8226_tcsr_mutex }, + { .compatible = "qcom,msm8994-tcsr-mutex", .data = &of_msm8226_tcsr_mutex }, { } }; MODULE_DEVICE_TABLE(of, qcom_hwspinlock_of_match); @@ -117,14 +145,6 @@ static struct regmap *qcom_hwspinlock_probe_syscon(struct platform_device *pdev, return regmap; } -static const struct regmap_config tcsr_mutex_config = { - .reg_bits = 32, - .reg_stride = 4, - .val_bits = 32, - .max_register = 0x20000, - .fast_io = true, -}; - static struct regmap *qcom_hwspinlock_probe_mmio(struct platform_device *pdev, u32 *offset, u32 *stride) { @@ -133,6 +153,8 @@ static struct regmap *qcom_hwspinlock_probe_mmio(struct platform_device *pdev, void __iomem *base; data = of_device_get_match_data(dev); + if (!data->regmap_config) + return ERR_PTR(-EINVAL); *offset = data->offset; *stride = data->stride; @@ -141,7 +163,7 @@ static struct regmap *qcom_hwspinlock_probe_mmio(struct platform_device *pdev, if (IS_ERR(base)) return ERR_CAST(base); - return devm_regmap_init_mmio(dev, base, &tcsr_mutex_config); + return devm_regmap_init_mmio(dev, base, data->regmap_config); } static int qcom_hwspinlock_probe(struct platform_device *pdev) From 76845ba539c3693f572377b38563d19a87266a5b Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 14 Oct 2022 11:54:24 +0530 Subject: [PATCH 005/328] MAINTAINERS: Update Kishon's email address in GENERIC PHY FRAMEWORK Update Kishon's email address in GENERIC PHY FRAMEWORK maintainer entry. Cc: Kishon Vijay Abraham I Signed-off-by: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/20221014062424.3327-1-kishon@ti.com Signed-off-by: Vinod Koul --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..2526755bf977 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8595,8 +8595,8 @@ F: include/asm-generic/ F: include/uapi/asm-generic/ GENERIC PHY FRAMEWORK -M: Kishon Vijay Abraham I M: Vinod Koul +M: Kishon Vijay Abraham I L: linux-phy@lists.infradead.org S: Supported Q: https://patchwork.kernel.org/project/linux-phy/list/ From ca1c73628f5bd0c1ef6e46073cc3be2450605b06 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2022 12:25:06 +0300 Subject: [PATCH 006/328] phy: stm32: fix an error code in probe If "index > usbphyc->nphys" is true then this returns success but it should return -EINVAL. Fixes: 94c358da3a05 ("phy: stm32: add support for STM32 USB PHY Controller (USBPHYC)") Signed-off-by: Dan Carpenter Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/Y0kq8j6S+5nDdMpr@kili Signed-off-by: Vinod Koul --- drivers/phy/st/phy-stm32-usbphyc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index a98c911cc37a..5bb9647b078f 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -710,6 +710,8 @@ static int stm32_usbphyc_probe(struct platform_device *pdev) ret = of_property_read_u32(child, "reg", &index); if (ret || index > usbphyc->nphys) { dev_err(&phy->dev, "invalid reg property: %d\n", ret); + if (!ret) + ret = -EINVAL; goto put_child; } From 2a4ea83bb8e54986703187edce59c9130a75eb26 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 10 Oct 2022 14:51:32 +0100 Subject: [PATCH 007/328] phy: tegra: xusb: Fix crash during pad power on/down Commit a88520bfc0ec ("usb: gadget: tegra: Reduce pad power") added calls to tegra_phy_xusb_utmi_pad_power_on/down in the Tegra XUDC driver to control the pad power. This change is causing a kernel panic when powering down the pads on entering suspend with the Jetson TX2 platform. The panic occurs because the 'xudc->curr_utmi_phy' is not configured on this platform and we do not check to see if the pointer is valid before attempting to deference the pointer. Fix this by checking to see if the 'phy' pointer passed to tegra_phy_xusb_utmi_pad_power_on/down is valid. Fixes: a88520bfc0ec ("usb: gadget: tegra: Reduce pad power") Signed-off-by: Jon Hunter Link: https://lore.kernel.org/r/20221010135132.30809-1-jonathanh@nvidia.com Signed-off-by: Vinod Koul --- drivers/phy/tegra/xusb.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 95091876c422..dce45fbbd699 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -1461,8 +1461,14 @@ EXPORT_SYMBOL_GPL(tegra_phy_xusb_utmi_port_reset); void tegra_phy_xusb_utmi_pad_power_on(struct phy *phy) { - struct tegra_xusb_lane *lane = phy_get_drvdata(phy); - struct tegra_xusb_padctl *padctl = lane->pad->padctl; + struct tegra_xusb_lane *lane; + struct tegra_xusb_padctl *padctl; + + if (!phy) + return; + + lane = phy_get_drvdata(phy); + padctl = lane->pad->padctl; if (padctl->soc->ops->utmi_pad_power_on) padctl->soc->ops->utmi_pad_power_on(phy); @@ -1471,8 +1477,14 @@ EXPORT_SYMBOL_GPL(tegra_phy_xusb_utmi_pad_power_on); void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy) { - struct tegra_xusb_lane *lane = phy_get_drvdata(phy); - struct tegra_xusb_padctl *padctl = lane->pad->padctl; + struct tegra_xusb_lane *lane; + struct tegra_xusb_padctl *padctl; + + if (!phy) + return; + + lane = phy_get_drvdata(phy); + padctl = lane->pad->padctl; if (padctl->soc->ops->utmi_pad_power_down) padctl->soc->ops->utmi_pad_power_down(phy); From 10f3f10f202109e3841c0c75add5a743ca197205 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 13 Oct 2022 14:46:39 -0700 Subject: [PATCH 008/328] MAINTAINERS: git://github -> https://github.com for broadcom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Github deprecated the git:// links about a year ago, so let's move to the https:// URLs instead. Acked-by: William Zhang Reviewed-by: Philippe Mathieu-Daudé Reported-by: Conor Dooley Link: https://github.blog/2021-09-01-improving-git-protocol-security-github/ Signed-off-by: Palmer Dabbelt Signed-off-by: Florian Fainelli --- MAINTAINERS | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..83612d907bed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3984,7 +3984,7 @@ M: Rafał Miłecki R: Broadcom internal kernel review list L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml F: arch/arm64/boot/dts/broadcom/bcmbca/* N: bcmbca @@ -4009,7 +4009,7 @@ R: Broadcom internal kernel review list L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml F: drivers/pci/controller/pcie-brcmstb.c F: drivers/staging/vc04_services @@ -4023,7 +4023,7 @@ M: Ray Jui M: Scott Branden R: Broadcom internal kernel review list S: Maintained -T: git git://github.com/broadcom/mach-bcm +T: git https://github.com/broadcom/mach-bcm F: arch/arm/mach-bcm/ N: bcm281* N: bcm113* @@ -4088,7 +4088,7 @@ M: Florian Fainelli R: Broadcom internal kernel review list L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml F: arch/arm/boot/dts/bcm7*.dts* F: arch/arm/include/asm/hardware/cache-b15-rac.h @@ -4120,7 +4120,7 @@ M: Florian Fainelli R: Broadcom internal kernel review list L: linux-mips@vger.kernel.org S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: arch/mips/bmips/* F: arch/mips/boot/dts/brcm/bcm*.dts* F: arch/mips/include/asm/mach-bmips/* @@ -4259,7 +4259,7 @@ M: Scott Branden R: Broadcom internal kernel review list L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: arch/arm64/boot/dts/broadcom/northstar2/* F: arch/arm64/boot/dts/broadcom/stingray/* F: drivers/clk/bcm/clk-ns* @@ -4329,7 +4329,7 @@ M: Florian Fainelli R: Broadcom internal kernel review list L: linux-pm@vger.kernel.org S: Maintained -T: git git://github.com/broadcom/stblinux.git +T: git https://github.com/broadcom/stblinux.git F: drivers/soc/bcm/bcm63xx/bcm-pmb.c F: include/dt-bindings/soc/bcm-pmb.h From 2ff4ba9e37024735f5cefc5ea2a73fc66addfe0e Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 29 Sep 2022 21:55:21 +0200 Subject: [PATCH 009/328] clk: rs9: Fix I2C accessors Add custom I2C accessors to this driver, since the regular I2C regmap ones do not generate the exact I2C transfers required by the chip. On I2C write, it is mandatory to send transfer length first, on read the chip returns the transfer length in first byte. Instead of always reading back 8 bytes, which is the default and also the size of the entire register file, set BCP register to 1 to read out 1 byte which is less wasteful. Fixes: 892e0ddea1aa ("clk: rs9: Add Renesas 9-series PCIe clock generator driver") Reported-by: Alexander Stein Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20220929195521.284497-1-marex@denx.de Reviewed-by: Alexander Stein Signed-off-by: Stephen Boyd --- drivers/clk/clk-renesas-pcie.c | 65 ++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/drivers/clk/clk-renesas-pcie.c b/drivers/clk/clk-renesas-pcie.c index 4f5df1fc74b4..e6247141d0c0 100644 --- a/drivers/clk/clk-renesas-pcie.c +++ b/drivers/clk/clk-renesas-pcie.c @@ -90,13 +90,66 @@ static const struct regmap_access_table rs9_writeable_table = { .n_yes_ranges = ARRAY_SIZE(rs9_writeable_ranges), }; +static int rs9_regmap_i2c_write(void *context, + unsigned int reg, unsigned int val) +{ + struct i2c_client *i2c = context; + const u8 data[3] = { reg, 1, val }; + const int count = ARRAY_SIZE(data); + int ret; + + ret = i2c_master_send(i2c, data, count); + if (ret == count) + return 0; + else if (ret < 0) + return ret; + else + return -EIO; +} + +static int rs9_regmap_i2c_read(void *context, + unsigned int reg, unsigned int *val) +{ + struct i2c_client *i2c = context; + struct i2c_msg xfer[2]; + u8 txdata = reg; + u8 rxdata[2]; + int ret; + + xfer[0].addr = i2c->addr; + xfer[0].flags = 0; + xfer[0].len = 1; + xfer[0].buf = (void *)&txdata; + + xfer[1].addr = i2c->addr; + xfer[1].flags = I2C_M_RD; + xfer[1].len = 2; + xfer[1].buf = (void *)rxdata; + + ret = i2c_transfer(i2c->adapter, xfer, 2); + if (ret < 0) + return ret; + if (ret != 2) + return -EIO; + + /* + * Byte 0 is transfer length, which is always 1 due + * to BCP register programming to 1 in rs9_probe(), + * ignore it and use data from Byte 1. + */ + *val = rxdata[1]; + return 0; +} + static const struct regmap_config rs9_regmap_config = { .reg_bits = 8, .val_bits = 8, - .cache_type = REGCACHE_FLAT, - .max_register = 0x8, + .cache_type = REGCACHE_NONE, + .max_register = RS9_REG_BCP, .rd_table = &rs9_readable_table, .wr_table = &rs9_writeable_table, + .reg_write = rs9_regmap_i2c_write, + .reg_read = rs9_regmap_i2c_read, }; static int rs9_get_output_config(struct rs9_driver_data *rs9, int idx) @@ -242,11 +295,17 @@ static int rs9_probe(struct i2c_client *client) return ret; } - rs9->regmap = devm_regmap_init_i2c(client, &rs9_regmap_config); + rs9->regmap = devm_regmap_init(&client->dev, NULL, + client, &rs9_regmap_config); if (IS_ERR(rs9->regmap)) return dev_err_probe(&client->dev, PTR_ERR(rs9->regmap), "Failed to allocate register map\n"); + /* Always read back 1 Byte via I2C */ + ret = regmap_write(rs9->regmap, RS9_REG_BCP, 1); + if (ret < 0) + return ret; + /* Register clock */ for (i = 0; i < rs9->chip_info->num_clks; i++) { snprintf(name, 5, "DIF%d", i); From 7e3e6e1b75c9643e25e8ca7d6caf1b1faf8f022e Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 5 Oct 2022 18:13:44 +0100 Subject: [PATCH 010/328] clk: sifive: select by default if SOC_SIFIVE With the aim of dropping direct selects of drivers from Kconfig.socs, default the SiFive clock drivers to the value of SOC_SIFIVE. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20221005171348.167476-2-conor@kernel.org Signed-off-by: Stephen Boyd --- drivers/clk/sifive/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/sifive/Kconfig b/drivers/clk/sifive/Kconfig index 9132c3c4aa86..b7fde0aadfcb 100644 --- a/drivers/clk/sifive/Kconfig +++ b/drivers/clk/sifive/Kconfig @@ -2,7 +2,8 @@ menuconfig CLK_SIFIVE bool "SiFive SoC driver support" - depends on RISCV || COMPILE_TEST + depends on SOC_SIFIVE || COMPILE_TEST + default SOC_SIFIVE help SoC drivers for SiFive Linux-capable SoCs. @@ -10,6 +11,7 @@ if CLK_SIFIVE config CLK_SIFIVE_PRCI bool "PRCI driver for SiFive SoCs" + default SOC_SIFIVE select RESET_CONTROLLER select RESET_SIMPLE select CLK_ANALOGBITS_WRPLL_CLN28HPC From 8fbf8636cd37b821ce3482748340008dbbe2dcb5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sun, 9 Oct 2022 10:50:56 +0800 Subject: [PATCH 011/328] clk: mediatek: clk-mt8195-topckgen: Fix error return code in clk_mt8195_topck_probe() If devm_clk_hw_register_mux() fails in clk_mt8195_topck_probe(), it should return error code. Fixes: deeb2af77cf6 ("clk: mediatek: clk-mt8195-topckgen: Register mfg_ck_fast_ref as generic mux") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221009025056.35311-1-yangyingliang@huawei.com Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/clk-mt8195-topckgen.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/mediatek/clk-mt8195-topckgen.c b/drivers/clk/mediatek/clk-mt8195-topckgen.c index 8cbab5ca2e58..1e016329c1d2 100644 --- a/drivers/clk/mediatek/clk-mt8195-topckgen.c +++ b/drivers/clk/mediatek/clk-mt8195-topckgen.c @@ -1270,8 +1270,10 @@ static int clk_mt8195_topck_probe(struct platform_device *pdev) hw = devm_clk_hw_register_mux(&pdev->dev, "mfg_ck_fast_ref", mfg_fast_parents, ARRAY_SIZE(mfg_fast_parents), CLK_SET_RATE_PARENT, (base + 0x250), 8, 1, 0, &mt8195_clk_lock); - if (IS_ERR(hw)) + if (IS_ERR(hw)) { + r = PTR_ERR(hw); goto unregister_muxes; + } top_clk_data->hws[CLK_TOP_MFG_CK_FAST_REF] = hw; r = clk_mt8195_reg_mfg_mux_notifier(&pdev->dev, From c29f446108e10bb791220ffbba54d82722d60d45 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 20 Sep 2022 17:04:12 +0200 Subject: [PATCH 012/328] dt-bindings: hwlock: qcom-hwspinlock: add syscon to MSM8974 The TCSR_MUTEX region contains two set of registers: mutex and halt. Add syscon, so the TCSR mutex device (hwspinlock) can use MMIO based method and in the same time share regmap with other devices for the halt regs. Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring Tested-by: Luca Weiss # fairphone-fp2 Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220920150414.637634-2-krzysztof.kozlowski@linaro.org --- .../devicetree/bindings/hwlock/qcom-hwspinlock.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml b/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml index 1a3adf75934b..ee2726149cf3 100644 --- a/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml +++ b/Documentation/devicetree/bindings/hwlock/qcom-hwspinlock.yaml @@ -24,9 +24,13 @@ properties: - qcom,apq8084-tcsr-mutex - qcom,ipq6018-tcsr-mutex - qcom,msm8226-tcsr-mutex - - qcom,msm8974-tcsr-mutex - qcom,msm8994-tcsr-mutex - const: qcom,tcsr-mutex + - items: + - enum: + - qcom,msm8974-tcsr-mutex + - const: qcom,tcsr-mutex + - const: syscon reg: maxItems: 1 From ba5284ebe497044f37c9bb9c7b1564932f4b6610 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 7 Oct 2022 15:10:00 +0200 Subject: [PATCH 013/328] clk: renesas: r8a779g0: Add SASYNCPER clocks On R-Car V4H, all PLLs except PLL5 support Spread Spectrum and/or Fractional Multiplication to reduce electromagnetic interference. Add the SASYNCPER and SASYNCPERD[124] clocks, which are used as clock sources for modules that must not be affected by Spread Spectrum and/or Fractional Multiplication. Signed-off-by: Geert Uytterhoeven Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/d0f35c35e1f96c5a649ab477e7ba5d8025957cd0.1665147497.git.geert+renesas@glider.be --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index 9641122133b5..3e8c93facfa1 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -47,6 +47,7 @@ enum clk_ids { CLK_S0_VIO, CLK_S0_VC, CLK_S0_HSC, + CLK_SASYNCPER, CLK_SV_VIP, CLK_SV_IR, CLK_SDSRC, @@ -84,6 +85,7 @@ static const struct cpg_core_clk r8a779g0_core_clks[] __initconst = { DEF_FIXED(".s0_vio", CLK_S0_VIO, CLK_PLL1_DIV2, 2, 1), DEF_FIXED(".s0_vc", CLK_S0_VC, CLK_PLL1_DIV2, 2, 1), DEF_FIXED(".s0_hsc", CLK_S0_HSC, CLK_PLL1_DIV2, 2, 1), + DEF_FIXED(".sasyncper", CLK_SASYNCPER, CLK_PLL5_DIV4, 3, 1), DEF_FIXED(".sv_vip", CLK_SV_VIP, CLK_PLL1, 5, 1), DEF_FIXED(".sv_ir", CLK_SV_IR, CLK_PLL1, 5, 1), DEF_BASE(".sdsrc", CLK_SDSRC, CLK_TYPE_GEN4_SDSRC, CLK_PLL5), @@ -128,6 +130,9 @@ static const struct cpg_core_clk r8a779g0_core_clks[] __initconst = { DEF_FIXED("s0d4_hsc", R8A779G0_CLK_S0D4_HSC, CLK_S0_HSC, 4, 1), DEF_FIXED("cl16m_hsc", R8A779G0_CLK_CL16M_HSC, CLK_S0_HSC, 48, 1), DEF_FIXED("s0d2_cc", R8A779G0_CLK_S0D2_CC, CLK_S0, 2, 1), + DEF_FIXED("sasyncperd1",R8A779G0_CLK_SASYNCPERD1, CLK_SASYNCPER,1, 1), + DEF_FIXED("sasyncperd2",R8A779G0_CLK_SASYNCPERD2, CLK_SASYNCPER,2, 1), + DEF_FIXED("sasyncperd4",R8A779G0_CLK_SASYNCPERD4, CLK_SASYNCPER,4, 1), DEF_FIXED("svd1_ir", R8A779G0_CLK_SVD1_IR, CLK_SV_IR, 1, 1), DEF_FIXED("svd2_ir", R8A779G0_CLK_SVD2_IR, CLK_SV_IR, 2, 1), DEF_FIXED("svd1_vip", R8A779G0_CLK_SVD1_VIP, CLK_SV_VIP, 1, 1), From 97cf79677ecb50a38517253ae2fd705849a7e51a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 16 Oct 2022 17:54:40 -0700 Subject: [PATCH 014/328] xfs: avoid a UAF when log intent item recovery fails KASAN reported a UAF bug when I was running xfs/235: BUG: KASAN: use-after-free in xlog_recover_process_intents+0xa77/0xae0 [xfs] Read of size 8 at addr ffff88804391b360 by task mount/5680 CPU: 2 PID: 5680 Comm: mount Not tainted 6.0.0-xfsx #6.0.0 77e7b52a4943a975441e5ac90a5ad7748b7867f6 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl+0x34/0x44 print_report.cold+0x2cc/0x682 kasan_report+0xa3/0x120 xlog_recover_process_intents+0xa77/0xae0 [xfs fb841c7180aad3f8359438576e27867f5795667e] xlog_recover_finish+0x7d/0x970 [xfs fb841c7180aad3f8359438576e27867f5795667e] xfs_log_mount_finish+0x2d7/0x5d0 [xfs fb841c7180aad3f8359438576e27867f5795667e] xfs_mountfs+0x11d4/0x1d10 [xfs fb841c7180aad3f8359438576e27867f5795667e] xfs_fs_fill_super+0x13d5/0x1a80 [xfs fb841c7180aad3f8359438576e27867f5795667e] get_tree_bdev+0x3da/0x6e0 vfs_get_tree+0x7d/0x240 path_mount+0xdd3/0x17d0 __x64_sys_mount+0x1fa/0x270 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7ff5bc069eae Code: 48 8b 0d 85 1f 0f 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 52 1f 0f 00 f7 d8 64 89 01 48 RSP: 002b:00007ffe433fd448 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ff5bc069eae RDX: 00005575d7213290 RSI: 00005575d72132d0 RDI: 00005575d72132b0 RBP: 00005575d7212fd0 R08: 00005575d7213230 R09: 00005575d7213fe0 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00005575d7213290 R14: 00005575d72132b0 R15: 00005575d7212fd0 Allocated by task 5680: kasan_save_stack+0x1e/0x40 __kasan_slab_alloc+0x66/0x80 kmem_cache_alloc+0x152/0x320 xfs_rui_init+0x17a/0x1b0 [xfs] xlog_recover_rui_commit_pass2+0xb9/0x2e0 [xfs] xlog_recover_items_pass2+0xe9/0x220 [xfs] xlog_recover_commit_trans+0x673/0x900 [xfs] xlog_recovery_process_trans+0xbe/0x130 [xfs] xlog_recover_process_data+0x103/0x2a0 [xfs] xlog_do_recovery_pass+0x548/0xc60 [xfs] xlog_do_log_recovery+0x62/0xc0 [xfs] xlog_do_recover+0x73/0x480 [xfs] xlog_recover+0x229/0x460 [xfs] xfs_log_mount+0x284/0x640 [xfs] xfs_mountfs+0xf8b/0x1d10 [xfs] xfs_fs_fill_super+0x13d5/0x1a80 [xfs] get_tree_bdev+0x3da/0x6e0 vfs_get_tree+0x7d/0x240 path_mount+0xdd3/0x17d0 __x64_sys_mount+0x1fa/0x270 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Freed by task 5680: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_set_free_info+0x20/0x30 ____kasan_slab_free+0x144/0x1b0 slab_free_freelist_hook+0xab/0x180 kmem_cache_free+0x1f1/0x410 xfs_rud_item_release+0x33/0x80 [xfs] xfs_trans_free_items+0xc3/0x220 [xfs] xfs_trans_cancel+0x1fa/0x590 [xfs] xfs_rui_item_recover+0x913/0xd60 [xfs] xlog_recover_process_intents+0x24e/0xae0 [xfs] xlog_recover_finish+0x7d/0x970 [xfs] xfs_log_mount_finish+0x2d7/0x5d0 [xfs] xfs_mountfs+0x11d4/0x1d10 [xfs] xfs_fs_fill_super+0x13d5/0x1a80 [xfs] get_tree_bdev+0x3da/0x6e0 vfs_get_tree+0x7d/0x240 path_mount+0xdd3/0x17d0 __x64_sys_mount+0x1fa/0x270 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The buggy address belongs to the object at ffff88804391b300 which belongs to the cache xfs_rui_item of size 688 The buggy address is located 96 bytes inside of 688-byte region [ffff88804391b300, ffff88804391b5b0) The buggy address belongs to the physical page: page:ffffea00010e4600 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888043919320 pfn:0x43918 head:ffffea00010e4600 order:2 compound_mapcount:0 compound_pincount:0 flags: 0x4fff80000010200(slab|head|node=1|zone=1|lastcpupid=0xfff) raw: 04fff80000010200 0000000000000000 dead000000000122 ffff88807f0eadc0 raw: ffff888043919320 0000000080140010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88804391b200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88804391b280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88804391b300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88804391b380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88804391b400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== The test fuzzes an rmap btree block and starts writer threads to induce a filesystem shutdown on the corrupt block. When the filesystem is remounted, recovery will try to replay the committed rmap intent item, but the corruption problem causes the recovery transaction to fail. Cancelling the transaction frees the RUD, which frees the RUI that we recovered. When we return to xlog_recover_process_intents, @lip is now a dangling pointer, and we cannot use it to find the iop_recover method for the tracepoint. Hence we must store the item ops before calling ->iop_recover if we want to give it to the tracepoint so that the trace data will tell us exactly which intent item failed. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_log_recover.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 17e923b9c5fa..322eb2ee6c55 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2552,6 +2552,8 @@ xlog_recover_process_intents( for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL; lip = xfs_trans_ail_cursor_next(ailp, &cur)) { + const struct xfs_item_ops *ops; + if (!xlog_item_is_intent(lip)) break; @@ -2567,13 +2569,17 @@ xlog_recover_process_intents( * deferred ops, you /must/ attach them to the capture list in * the recover routine or else those subsequent intents will be * replayed in the wrong order! + * + * The recovery function can free the log item, so we must not + * access lip after it returns. */ spin_unlock(&ailp->ail_lock); - error = lip->li_ops->iop_recover(lip, &capture_list); + ops = lip->li_ops; + error = ops->iop_recover(lip, &capture_list); spin_lock(&ailp->ail_lock); if (error) { trace_xlog_intent_recovery_failed(log->l_mp, error, - lip->li_ops->iop_recover); + ops->iop_recover); break; } } From 091873e47ef700e935aa80079b63929af599a0b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Wed, 19 Oct 2022 22:05:36 +0200 Subject: [PATCH 015/328] selftests/landlock: Build without static libraries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only (forced) static test binary doesn't depend on libcap. Because using -lcap on systems that don't have such static library would fail (e.g. on Arch Linux), let's be more specific and require only dynamic libcap linking. Fixes: a52540522c95 ("selftests/landlock: Fix out-of-tree builds") Cc: Anders Roxell Cc: Guillaume Tucker Cc: Mark Brown Cc: Shuah Khan Cc: stable@vger.kernel.org Signed-off-by: Mickaël Salaün Link: https://lore.kernel.org/r/20221019200536.2771316-1-mic@digikod.net --- tools/testing/selftests/landlock/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile index 6632bfff486b..348e2dbdb4e0 100644 --- a/tools/testing/selftests/landlock/Makefile +++ b/tools/testing/selftests/landlock/Makefile @@ -3,7 +3,6 @@ # First run: make -C ../../../.. headers_install CFLAGS += -Wall -O2 $(KHDR_INCLUDES) -LDLIBS += -lcap LOCAL_HDRS += common.h @@ -13,10 +12,12 @@ TEST_GEN_PROGS := $(src_test:.c=) TEST_GEN_PROGS_EXTENDED := true -# Static linking for short targets: +# Short targets: +$(TEST_GEN_PROGS): LDLIBS += -lcap $(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static include ../lib.mk -# Static linking for targets with $(OUTPUT)/ prefix: +# Targets with $(OUTPUT)/ prefix: +$(TEST_GEN_PROGS): LDLIBS += -lcap $(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static From 9fa248c65bdbf5af0a2f74dd38575acfc8dfd2bf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Oct 2022 17:18:58 +0200 Subject: [PATCH 016/328] fuse: fix readdir cache race There's a race in fuse's readdir cache that can result in an uninitilized page being read. The page lock is supposed to prevent this from happening but in the following case it doesn't: Two fuse_add_dirent_to_cache() start out and get the same parameters (size=0,offset=0). One of them wins the race to create and lock the page, after which it fills in data, sets rdc.size and unlocks the page. In the meantime the page gets evicted from the cache before the other instance gets to run. That one also creates the page, but finds the size to be mismatched, bails out and leaves the uninitialized page in the cache. Fix by marking a filled page uptodate and ignoring non-uptodate pages. Reported-by: Frank Sorenson Fixes: 5d7bc7e8680c ("fuse: allow using readdir cache") Cc: # v4.20 Signed-off-by: Miklos Szeredi --- fs/fuse/readdir.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index b4e565711045..e8deaacf1832 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -77,8 +77,10 @@ static void fuse_add_dirent_to_cache(struct file *file, goto unlock; addr = kmap_local_page(page); - if (!offset) + if (!offset) { clear_page(addr); + SetPageUptodate(page); + } memcpy(addr + offset, dirent, reclen); kunmap_local(addr); fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; @@ -516,6 +518,12 @@ retry_locked: page = find_get_page_flags(file->f_mapping, index, FGP_ACCESSED | FGP_LOCK); + /* Page gone missing, then re-added to cache, but not initialized? */ + if (page && !PageUptodate(page)) { + unlock_page(page); + put_page(page); + page = NULL; + } spin_lock(&fi->rdc.lock); if (!page) { /* From 13cf24e00665c9751951a422756d975812b71173 Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Tue, 18 Oct 2022 14:32:35 -0700 Subject: [PATCH 017/328] xfs: fix exception caused by unexpected illegal bestcount in leaf dir For leaf dir, In most cases, there should be as many bestfree slots as the dir data blocks that can fit under i_size (except for [1]). Root cause is we don't examin the number bestfree slots, when the slots number less than dir data blocks, if we need to allocate new dir data block and update the bestfree array, we will use the dir block number as index to assign bestfree array, while we did not check the leaf buf boundary which may cause UAF or other memory access problem. This issue can also triggered with test cases xfs/473 from fstests. According to Dave Chinner & Darrick's suggestion, adding buffer verifier to detect this abnormal situation in time. Simplify the testcase for fstest xfs/554 [1] The error log is shown as follows: ================================================================== BUG: KASAN: use-after-free in xfs_dir2_leaf_addname+0x1995/0x1ac0 Write of size 2 at addr ffff88810168b000 by task touch/1552 CPU: 5 PID: 1552 Comm: touch Not tainted 6.0.0-rc3+ #101 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: dump_stack_lvl+0x4d/0x66 print_report.cold+0xf6/0x691 kasan_report+0xa8/0x120 xfs_dir2_leaf_addname+0x1995/0x1ac0 xfs_dir_createname+0x58c/0x7f0 xfs_create+0x7af/0x1010 xfs_generic_create+0x270/0x5e0 path_openat+0x270b/0x3450 do_filp_open+0x1cf/0x2b0 do_sys_openat2+0x46b/0x7a0 do_sys_open+0xb7/0x130 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fe4d9e9312b Code: 25 00 00 41 00 3d 00 00 41 00 74 4b 64 8b 04 25 18 00 00 00 85 c0 75 67 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 91 00 00 00 48 8b 4c 24 28 64 48 33 0c 25 RSP: 002b:00007ffda4c16c20 EFLAGS: 00000246 ORIG_RAX: 0000000000000101 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fe4d9e9312b RDX: 0000000000000941 RSI: 00007ffda4c17f33 RDI: 00000000ffffff9c RBP: 00007ffda4c17f33 R08: 0000000000000000 R09: 0000000000000000 R10: 00000000000001b6 R11: 0000000000000246 R12: 0000000000000941 R13: 00007fe4d9f631a4 R14: 00007ffda4c17f33 R15: 0000000000000000 The buggy address belongs to the physical page: page:ffffea000405a2c0 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10168b flags: 0x2fffff80000000(node=0|zone=2|lastcpupid=0x1fffff) raw: 002fffff80000000 ffffea0004057788 ffffea000402dbc8 0000000000000000 raw: 0000000000000000 0000000000170000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88810168af00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88810168af80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff88810168b000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff88810168b080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88810168b100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint 00000000: 58 44 44 33 5b 53 35 c2 00 00 00 00 00 00 00 78 XDD3[S5........x XFS (sdb): Internal error xfs_dir2_data_use_free at line 1200 of file fs/xfs/libxfs/xfs_dir2_data.c. Caller xfs_dir2_data_use_free+0x28a/0xeb0 CPU: 5 PID: 1552 Comm: touch Tainted: G B 6.0.0-rc3+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: dump_stack_lvl+0x4d/0x66 xfs_corruption_error+0x132/0x150 xfs_dir2_data_use_free+0x198/0xeb0 xfs_dir2_leaf_addname+0xa59/0x1ac0 xfs_dir_createname+0x58c/0x7f0 xfs_create+0x7af/0x1010 xfs_generic_create+0x270/0x5e0 path_openat+0x270b/0x3450 do_filp_open+0x1cf/0x2b0 do_sys_openat2+0x46b/0x7a0 do_sys_open+0xb7/0x130 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fe4d9e9312b Code: 25 00 00 41 00 3d 00 00 41 00 74 4b 64 8b 04 25 18 00 00 00 85 c0 75 67 44 89 e2 48 89 ee bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 0f 87 91 00 00 00 48 8b 4c 24 28 64 48 33 0c 25 RSP: 002b:00007ffda4c16c20 EFLAGS: 00000246 ORIG_RAX: 0000000000000101 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fe4d9e9312b RDX: 0000000000000941 RSI: 00007ffda4c17f46 RDI: 00000000ffffff9c RBP: 00007ffda4c17f46 R08: 0000000000000000 R09: 0000000000000001 R10: 00000000000001b6 R11: 0000000000000246 R12: 0000000000000941 R13: 00007fe4d9f631a4 R14: 00007ffda4c17f46 R15: 0000000000000000 XFS (sdb): Corruption detected. Unmount and run xfs_repair [1] https://lore.kernel.org/all/20220928095355.2074025-1-guoxuenan@huawei.com/ Reviewed-by: Hou Tao Signed-off-by: Guo Xuenan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_dir2_leaf.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index d9b66306a9a7..cb9e950a911d 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -146,6 +146,8 @@ xfs_dir3_leaf_check_int( xfs_dir2_leaf_tail_t *ltp; int stale; int i; + bool isleaf1 = (hdr->magic == XFS_DIR2_LEAF1_MAGIC || + hdr->magic == XFS_DIR3_LEAF1_MAGIC); ltp = xfs_dir2_leaf_tail_p(geo, leaf); @@ -158,8 +160,7 @@ xfs_dir3_leaf_check_int( return __this_address; /* Leaves and bests don't overlap in leaf format. */ - if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC || - hdr->magic == XFS_DIR3_LEAF1_MAGIC) && + if (isleaf1 && (char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp)) return __this_address; @@ -175,6 +176,10 @@ xfs_dir3_leaf_check_int( } if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) stale++; + if (isleaf1 && xfs_dir2_dataptr_to_db(geo, + be32_to_cpu(hdr->ents[i].address)) >= + be32_to_cpu(ltp->bestcount)) + return __this_address; } if (hdr->stale != stale) return __this_address; From fc93812c725068e6a491ce574f058a4530130c00 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 18 Oct 2022 14:37:39 -0700 Subject: [PATCH 018/328] xfs: remove redundant pointer lip The assignment to pointer lip is not really required, the pointer lip is redundant and can be removed. Cleans up clang-scan warning: warning: Although the value stored to 'lip' is used in the enclosing expression, the value is never actually read from 'lip' [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_trans_ail.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 16fbf2a1144c..f51df7d94ef7 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -730,11 +730,10 @@ void xfs_ail_push_all_sync( struct xfs_ail *ailp) { - struct xfs_log_item *lip; DEFINE_WAIT(wait); spin_lock(&ailp->ail_lock); - while ((lip = xfs_ail_max(ailp)) != NULL) { + while (xfs_ail_max(ailp) != NULL) { prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE); wake_up_process(ailp->ail_task); spin_unlock(&ailp->ail_lock); From cf4f4c12dea7a977a143c8fe5af1740b7f9876f8 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Tue, 18 Oct 2022 14:38:14 -0700 Subject: [PATCH 019/328] xfs: fix memory leak in xfs_errortag_init When `xfs_sysfs_init` returns failed, `mp->m_errortag` needs to free. Otherwise kmemleak would report memory leak after mounting xfs image: unreferenced object 0xffff888101364900 (size 192): comm "mount", pid 13099, jiffies 4294915218 (age 335.207s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000f08ad25c>] __kmalloc+0x41/0x1b0 [<00000000dca9aeb6>] kmem_alloc+0xfd/0x430 [<0000000040361882>] xfs_errortag_init+0x20/0x110 [<00000000b384a0f6>] xfs_mountfs+0x6ea/0x1a30 [<000000003774395d>] xfs_fs_fill_super+0xe10/0x1a80 [<000000009cf07b6c>] get_tree_bdev+0x3e7/0x700 [<00000000046b5426>] vfs_get_tree+0x8e/0x2e0 [<00000000952ec082>] path_mount+0xf8c/0x1990 [<00000000beb1f838>] do_mount+0xee/0x110 [<000000000e9c41bb>] __x64_sys_mount+0x14b/0x1f0 [<00000000f7bb938e>] do_syscall_64+0x3b/0x90 [<000000003fcd67a9>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: c68401011522 ("xfs: expose errortag knobs via sysfs") Signed-off-by: Zeng Heng Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_error.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 7db588ed0be5..c6b2aabd6f18 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -234,13 +234,18 @@ int xfs_errortag_init( struct xfs_mount *mp) { + int ret; + mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX, KM_MAYFAIL); if (!mp->m_errortag) return -ENOMEM; - return xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, - &mp->m_kobj, "errortag"); + ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype, + &mp->m_kobj, "errortag"); + if (ret) + kmem_free(mp->m_errortag); + return ret; } void From d08af40340cad0e025d643c3982781a8f99d5032 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Tue, 18 Oct 2022 14:38:29 -0700 Subject: [PATCH 020/328] xfs: Fix unreferenced object reported by kmemleak in xfs_sysfs_init() kmemleak reported a sequence of memory leaks, and one of them indicated we failed to free a pointer: comm "mount", pid 19610, jiffies 4297086464 (age 60.635s) hex dump (first 8 bytes): 73 64 61 00 81 88 ff ff sda..... backtrace: [<00000000d77f3e04>] kstrdup_const+0x46/0x70 [<00000000e51fa804>] kobject_set_name_vargs+0x2f/0xb0 [<00000000247cd595>] kobject_init_and_add+0xb0/0x120 [<00000000f9139aaf>] xfs_mountfs+0x367/0xfc0 [<00000000250d3caf>] xfs_fs_fill_super+0xa16/0xdc0 [<000000008d873d38>] get_tree_bdev+0x256/0x390 [<000000004881f3fa>] vfs_get_tree+0x41/0xf0 [<000000008291ab52>] path_mount+0x9b3/0xdd0 [<0000000022ba8f2d>] __x64_sys_mount+0x190/0x1d0 As mentioned in kobject_init_and_add() comment, if this function returns an error, kobject_put() must be called to properly clean up the memory associated with the object. Apparently, xfs_sysfs_init() does not follow such a requirement. When kobject_init_and_add() returns an error, the space of kobj->kobject.name alloced by kstrdup_const() is unfree, which will cause the above stack. Fix it by adding kobject_put() when kobject_init_and_add returns an error. Fixes: a31b1d3d89e4 ("xfs: add xfs_mount sysfs kobject") Signed-off-by: Li Zetao Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_sysfs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index 43585850f154..513095e353a5 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -33,10 +33,15 @@ xfs_sysfs_init( const char *name) { struct kobject *parent; + int err; parent = parent_kobj ? &parent_kobj->kobject : NULL; init_completion(&kobj->complete); - return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); + err = kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name); + if (err) + kobject_put(&kobj->kobject); + + return err; } static inline void From cf00b33058b196b4db928419dde68993b15a975b Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 15 Aug 2022 16:40:43 +0100 Subject: [PATCH 021/328] cxl/mbox: Add a check on input payload size A bug in the LSA code resulted in transfers slightly larger than the mailbox size. Let us make it easier to catch similar issues in future by adding a low level check. Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20220815154044.24733-2-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 16176b9278b4..0c90f13870a4 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -174,7 +174,7 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, }; int rc; - if (out_size > cxlds->payload_size) + if (in_size > cxlds->payload_size || out_size > cxlds->payload_size) return -E2BIG; rc = cxlds->mbox_send(cxlds, &mbox_cmd); From 2816e24b0510e0c185c0c46acff1ce7aa4c4443f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 18 Aug 2022 17:42:10 +0100 Subject: [PATCH 022/328] cxl/region: Fix null pointer dereference due to pass through decoder commit Not all decoders have a commit callback. The CXL specification allows a host bridge with a single root port to have no explicit HDM decoders. Currently the region driver assumes there are none. As such the CXL core creates a special pass through decoder instance without a commit callback. Prior to this patch, the ->commit() callback was called unconditionally. Thus a configuration with 1 Host Bridge, 1 Root Port, 1 switch with multiple downstream ports below which there are multiple CXL type 3 devices results in a situation where committing the region causes a null pointer dereference. Reported-by: Bobo WL Fixes: 176baefb2eb5 ("cxl/hdm: Commit decoder state to hardware") Signed-off-by: Jonathan Cameron Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/20220818164210.2084-1-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 401148016978..c49d9a5f1091 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -174,7 +174,8 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr) iter = to_cxl_port(iter->dev.parent)) { cxl_rr = cxl_rr_load(iter, cxlr); cxld = cxl_rr->decoder; - rc = cxld->commit(cxld); + if (cxld->commit) + rc = cxld->commit(cxld); if (rc) break; } From f010c75c05299ecd65adfd31a7841eea3476ce1f Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 15 Aug 2022 16:40:44 +0100 Subject: [PATCH 023/328] cxl/pmem: Fix failure to account for 8 byte header for writes to the device LSA. Writes to the device must include an offset and size as defined in CXL 2.0 8.2.9.5.2.4 Set LSA (Opcode 4103h) Fixes tag is non obvious as this code has been through several reworks and variable names + wasn't in use until the addition of the region code. Due to a bug in QEMU CXL emulation this overrun resulted in QEMU crashing. Reported-by: Bobo WL Signed-off-by: Jonathan Cameron Fixes: 60b8f17215de ("cxl/pmem: Translate NVDIMM label commands to CXL label commands") Link: https://lore.kernel.org/r/20220815154044.24733-3-Jonathan.Cameron@huawei.com Signed-off-by: Dan Williams --- drivers/cxl/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 7dc0a2fa1a6b..115a7b79f343 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -107,7 +107,7 @@ static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds, *cmd = (struct nd_cmd_get_config_size) { .config_size = cxlds->lsa_size, - .max_xfer = cxlds->payload_size, + .max_xfer = cxlds->payload_size - sizeof(struct cxl_mbox_set_lsa), }; return 0; From 66063033f77e10b985258126a97573f84bb8d3b4 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 19 Oct 2022 09:55:41 -0600 Subject: [PATCH 024/328] wifi: rt2x00: use explicitly signed or unsigned types On some platforms, `char` is unsigned, but this driver, for the most part, assumed it was signed. In other places, it uses `char` to mean an unsigned number, but only in cases when the values are small. And in still other places, `char` is used as a boolean. Put an end to this confusion by declaring explicit types, depending on the context. Cc: Andrew Morton Cc: Andy Shevchenko Cc: Stanislaw Gruszka Cc: Helmut Schaa Cc: Kalle Valo Signed-off-by: Jason A. Donenfeld Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221019155541.3410813-1-Jason@zx2c4.com --- .../net/wireless/ralink/rt2x00/rt2400pci.c | 8 +-- .../net/wireless/ralink/rt2x00/rt2400pci.h | 2 +- .../net/wireless/ralink/rt2x00/rt2500pci.c | 8 +-- .../net/wireless/ralink/rt2x00/rt2500pci.h | 2 +- .../net/wireless/ralink/rt2x00/rt2500usb.c | 8 +-- .../net/wireless/ralink/rt2x00/rt2500usb.h | 2 +- .../net/wireless/ralink/rt2x00/rt2800lib.c | 60 +++++++++---------- .../net/wireless/ralink/rt2x00/rt2800lib.h | 8 +-- .../net/wireless/ralink/rt2x00/rt2x00usb.c | 6 +- drivers/net/wireless/ralink/rt2x00/rt61pci.c | 4 +- drivers/net/wireless/ralink/rt2x00/rt61pci.h | 2 +- drivers/net/wireless/ralink/rt2x00/rt73usb.c | 4 +- drivers/net/wireless/ralink/rt2x00/rt73usb.h | 2 +- 13 files changed, 58 insertions(+), 58 deletions(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2400pci.c b/drivers/net/wireless/ralink/rt2x00/rt2400pci.c index 273c5eac3362..ddfc16de1b26 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2400pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2400pci.c @@ -1023,9 +1023,9 @@ static int rt2400pci_set_state(struct rt2x00_dev *rt2x00dev, { u32 reg, reg2; unsigned int i; - char put_to_sleep; - char bbp_state; - char rf_state; + bool put_to_sleep; + u8 bbp_state; + u8 rf_state; put_to_sleep = (state != STATE_AWAKE); @@ -1561,7 +1561,7 @@ static int rt2400pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev) { struct hw_mode_spec *spec = &rt2x00dev->spec; struct channel_info *info; - char *tx_power; + u8 *tx_power; unsigned int i; /* diff --git a/drivers/net/wireless/ralink/rt2x00/rt2400pci.h b/drivers/net/wireless/ralink/rt2x00/rt2400pci.h index b8187b6de143..979d5fd8babf 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2400pci.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2400pci.h @@ -939,7 +939,7 @@ #define DEFAULT_TXPOWER 39 #define __CLAMP_TX(__txpower) \ - clamp_t(char, (__txpower), MIN_TXPOWER, MAX_TXPOWER) + clamp_t(u8, (__txpower), MIN_TXPOWER, MAX_TXPOWER) #define TXPOWER_FROM_DEV(__txpower) \ ((__CLAMP_TX(__txpower) - MAX_TXPOWER) + MIN_TXPOWER) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500pci.c b/drivers/net/wireless/ralink/rt2x00/rt2500pci.c index 8faa0a80e73a..cd6371e25062 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2500pci.c @@ -1176,9 +1176,9 @@ static int rt2500pci_set_state(struct rt2x00_dev *rt2x00dev, { u32 reg, reg2; unsigned int i; - char put_to_sleep; - char bbp_state; - char rf_state; + bool put_to_sleep; + u8 bbp_state; + u8 rf_state; put_to_sleep = (state != STATE_AWAKE); @@ -1856,7 +1856,7 @@ static int rt2500pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev) { struct hw_mode_spec *spec = &rt2x00dev->spec; struct channel_info *info; - char *tx_power; + u8 *tx_power; unsigned int i; /* diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500pci.h b/drivers/net/wireless/ralink/rt2x00/rt2500pci.h index 7e64aee2a172..ba362675c52c 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2500pci.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2500pci.h @@ -1219,6 +1219,6 @@ (((u8)(__txpower)) > MAX_TXPOWER) ? DEFAULT_TXPOWER : (__txpower) #define TXPOWER_TO_DEV(__txpower) \ - clamp_t(char, __txpower, MIN_TXPOWER, MAX_TXPOWER) + clamp_t(u8, __txpower, MIN_TXPOWER, MAX_TXPOWER) #endif /* RT2500PCI_H */ diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c index bb5ed6630645..4f3b0e6c6256 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2500usb.c @@ -984,9 +984,9 @@ static int rt2500usb_set_state(struct rt2x00_dev *rt2x00dev, u16 reg; u16 reg2; unsigned int i; - char put_to_sleep; - char bbp_state; - char rf_state; + bool put_to_sleep; + u8 bbp_state; + u8 rf_state; put_to_sleep = (state != STATE_AWAKE); @@ -1663,7 +1663,7 @@ static int rt2500usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev) { struct hw_mode_spec *spec = &rt2x00dev->spec; struct channel_info *info; - char *tx_power; + u8 *tx_power; unsigned int i; /* diff --git a/drivers/net/wireless/ralink/rt2x00/rt2500usb.h b/drivers/net/wireless/ralink/rt2x00/rt2500usb.h index 0c070288a140..746f0e950b76 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2500usb.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2500usb.h @@ -839,6 +839,6 @@ (((u8)(__txpower)) > MAX_TXPOWER) ? DEFAULT_TXPOWER : (__txpower) #define TXPOWER_TO_DEV(__txpower) \ - clamp_t(char, __txpower, MIN_TXPOWER, MAX_TXPOWER) + clamp_t(u8, __txpower, MIN_TXPOWER, MAX_TXPOWER) #endif /* RT2500USB_H */ diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c index cbbb1a4849cf..12b700c7b9c3 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c @@ -3372,10 +3372,10 @@ static void rt2800_config_channel_rf53xx(struct rt2x00_dev *rt2x00dev, if (rt2x00_has_cap_bt_coexist(rt2x00dev)) { if (rt2x00_rt_rev_gte(rt2x00dev, RT5390, REV_RT5390F)) { /* r55/r59 value array of channel 1~14 */ - static const char r55_bt_rev[] = {0x83, 0x83, + static const u8 r55_bt_rev[] = {0x83, 0x83, 0x83, 0x73, 0x73, 0x63, 0x53, 0x53, 0x53, 0x43, 0x43, 0x43, 0x43, 0x43}; - static const char r59_bt_rev[] = {0x0e, 0x0e, + static const u8 r59_bt_rev[] = {0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0b, 0x0a, 0x09, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07}; @@ -3384,7 +3384,7 @@ static void rt2800_config_channel_rf53xx(struct rt2x00_dev *rt2x00dev, rt2800_rfcsr_write(rt2x00dev, 59, r59_bt_rev[idx]); } else { - static const char r59_bt[] = {0x8b, 0x8b, 0x8b, + static const u8 r59_bt[] = {0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8b, 0x8a, 0x89, 0x88, 0x88, 0x86, 0x85, 0x84}; @@ -3392,10 +3392,10 @@ static void rt2800_config_channel_rf53xx(struct rt2x00_dev *rt2x00dev, } } else { if (rt2x00_rt_rev_gte(rt2x00dev, RT5390, REV_RT5390F)) { - static const char r55_nonbt_rev[] = {0x23, 0x23, + static const u8 r55_nonbt_rev[] = {0x23, 0x23, 0x23, 0x23, 0x13, 0x13, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03}; - static const char r59_nonbt_rev[] = {0x07, 0x07, + static const u8 r59_nonbt_rev[] = {0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x06, 0x05, 0x04, 0x04}; @@ -3406,14 +3406,14 @@ static void rt2800_config_channel_rf53xx(struct rt2x00_dev *rt2x00dev, } else if (rt2x00_rt(rt2x00dev, RT5390) || rt2x00_rt(rt2x00dev, RT5392) || rt2x00_rt(rt2x00dev, RT6352)) { - static const char r59_non_bt[] = {0x8f, 0x8f, + static const u8 r59_non_bt[] = {0x8f, 0x8f, 0x8f, 0x8f, 0x8f, 0x8f, 0x8f, 0x8d, 0x8a, 0x88, 0x88, 0x87, 0x87, 0x86}; rt2800_rfcsr_write(rt2x00dev, 59, r59_non_bt[idx]); } else if (rt2x00_rt(rt2x00dev, RT5350)) { - static const char r59_non_bt[] = {0x0b, 0x0b, + static const u8 r59_non_bt[] = {0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0a, 0x0a, 0x09, 0x08, 0x07, 0x07, 0x06}; @@ -4035,23 +4035,23 @@ static void rt2800_iq_calibrate(struct rt2x00_dev *rt2x00dev, int channel) rt2800_bbp_write(rt2x00dev, 159, cal != 0xff ? cal : 0); } -static char rt2800_txpower_to_dev(struct rt2x00_dev *rt2x00dev, +static s8 rt2800_txpower_to_dev(struct rt2x00_dev *rt2x00dev, unsigned int channel, - char txpower) + s8 txpower) { if (rt2x00_rt(rt2x00dev, RT3593) || rt2x00_rt(rt2x00dev, RT3883)) txpower = rt2x00_get_field8(txpower, EEPROM_TXPOWER_ALC); if (channel <= 14) - return clamp_t(char, txpower, MIN_G_TXPOWER, MAX_G_TXPOWER); + return clamp_t(s8, txpower, MIN_G_TXPOWER, MAX_G_TXPOWER); if (rt2x00_rt(rt2x00dev, RT3593) || rt2x00_rt(rt2x00dev, RT3883)) - return clamp_t(char, txpower, MIN_A_TXPOWER_3593, + return clamp_t(s8, txpower, MIN_A_TXPOWER_3593, MAX_A_TXPOWER_3593); else - return clamp_t(char, txpower, MIN_A_TXPOWER, MAX_A_TXPOWER); + return clamp_t(s8, txpower, MIN_A_TXPOWER, MAX_A_TXPOWER); } static void rt3883_bbp_adjust(struct rt2x00_dev *rt2x00dev, @@ -8530,7 +8530,7 @@ static void rt2800_r_calibration(struct rt2x00_dev *rt2x00dev) u8 bytevalue = 0; int rcalcode; u8 r_cal_code = 0; - char d1 = 0, d2 = 0; + s8 d1 = 0, d2 = 0; u8 rfvalue; u32 MAC_RF_BYPASS0, MAC_RF_CONTROL0, MAC_PWR_PIN_CFG; u32 maccfg; @@ -8591,7 +8591,7 @@ static void rt2800_r_calibration(struct rt2x00_dev *rt2x00dev) if (bytevalue > 128) d1 = bytevalue - 256; else - d1 = (char)bytevalue; + d1 = (s8)bytevalue; rt2800_bbp_write(rt2x00dev, 22, 0x0); rt2800_rfcsr_write_bank(rt2x00dev, 0, 35, 0x01); @@ -8601,7 +8601,7 @@ static void rt2800_r_calibration(struct rt2x00_dev *rt2x00dev) if (bytevalue > 128) d2 = bytevalue - 256; else - d2 = (char)bytevalue; + d2 = (s8)bytevalue; rt2800_bbp_write(rt2x00dev, 22, 0x0); rcalcode = rt2800_calcrcalibrationcode(rt2x00dev, d1, d2); @@ -8703,7 +8703,7 @@ static void rt2800_rxdcoc_calibration(struct rt2x00_dev *rt2x00dev) static u32 rt2800_do_sqrt_accumulation(u32 si) { u32 root, root_pre, bit; - char i; + s8 i; bit = 1 << 15; root = 0; @@ -9330,11 +9330,11 @@ static void rt2800_loft_search(struct rt2x00_dev *rt2x00dev, u8 ch_idx, u8 alc_idx, u8 dc_result[][RF_ALC_NUM][2]) { u32 p0 = 0, p1 = 0, pf = 0; - char idx0 = 0, idx1 = 0; + s8 idx0 = 0, idx1 = 0; u8 idxf[] = {0x00, 0x00}; u8 ibit = 0x20; u8 iorq; - char bidx; + s8 bidx; rt2800_bbp_write(rt2x00dev, 158, 0xb0); rt2800_bbp_write(rt2x00dev, 159, 0x80); @@ -9384,17 +9384,17 @@ static void rt2800_loft_search(struct rt2x00_dev *rt2x00dev, u8 ch_idx, static void rt2800_iq_search(struct rt2x00_dev *rt2x00dev, u8 ch_idx, u8 *ges, u8 *pes) { u32 p0 = 0, p1 = 0, pf = 0; - char perr = 0, gerr = 0, iq_err = 0; - char pef = 0, gef = 0; - char psta, pend; - char gsta, gend; + s8 perr = 0, gerr = 0, iq_err = 0; + s8 pef = 0, gef = 0; + s8 psta, pend; + s8 gsta, gend; u8 ibit = 0x20; u8 first_search = 0x00, touch_neg_max = 0x00; - char idx0 = 0, idx1 = 0; + s8 idx0 = 0, idx1 = 0; u8 gop; u8 bbp = 0; - char bidx; + s8 bidx; for (bidx = 5; bidx >= 1; bidx--) { for (gop = 0; gop < 2; gop++) { @@ -10043,11 +10043,11 @@ static int rt2800_rf_lp_config(struct rt2x00_dev *rt2x00dev, bool btxcal) return 0; } -static char rt2800_lp_tx_filter_bw_cal(struct rt2x00_dev *rt2x00dev) +static s8 rt2800_lp_tx_filter_bw_cal(struct rt2x00_dev *rt2x00dev) { unsigned int cnt; u8 bbp_val; - char cal_val; + s8 cal_val; rt2800_bbp_dcoc_write(rt2x00dev, 0, 0x82); @@ -10079,7 +10079,7 @@ static void rt2800_bw_filter_calibration(struct rt2x00_dev *rt2x00dev, u8 rx_filter_target_20m = 0x27, rx_filter_target_40m = 0x31; int loop = 0, is_ht40, cnt; u8 bbp_val, rf_val; - char cal_r32_init, cal_r32_val, cal_diff; + s8 cal_r32_init, cal_r32_val, cal_diff; u8 saverfb5r00, saverfb5r01, saverfb5r03, saverfb5r04, saverfb5r05; u8 saverfb5r06, saverfb5r07; u8 saverfb5r08, saverfb5r17, saverfb5r18, saverfb5r19, saverfb5r20; @@ -11550,9 +11550,9 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev) { struct hw_mode_spec *spec = &rt2x00dev->spec; struct channel_info *info; - char *default_power1; - char *default_power2; - char *default_power3; + s8 *default_power1; + s8 *default_power2; + s8 *default_power3; unsigned int i, tx_chains, rx_chains; u32 reg; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.h b/drivers/net/wireless/ralink/rt2x00/rt2800lib.h index 3cbef77b4bd3..194de676df8f 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.h @@ -32,10 +32,10 @@ struct rf_reg_pair { struct rt2800_drv_data { u8 calibration_bw20; u8 calibration_bw40; - char rx_calibration_bw20; - char rx_calibration_bw40; - char tx_calibration_bw20; - char tx_calibration_bw40; + s8 rx_calibration_bw20; + s8 rx_calibration_bw40; + s8 tx_calibration_bw20; + s8 tx_calibration_bw40; u8 bbp25; u8 bbp26; u8 txmixer_gain_24g; diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index 0827bc860bf8..8fd22c69855f 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -117,12 +117,12 @@ int rt2x00usb_vendor_request_buff(struct rt2x00_dev *rt2x00dev, const u16 buffer_length) { int status = 0; - unsigned char *tb; + u8 *tb; u16 off, len, bsize; mutex_lock(&rt2x00dev->csr_mutex); - tb = (char *)buffer; + tb = (u8 *)buffer; off = offset; len = buffer_length; while (len && !status) { @@ -215,7 +215,7 @@ void rt2x00usb_register_read_async(struct rt2x00_dev *rt2x00dev, rd->cr.wLength = cpu_to_le16(sizeof(u32)); usb_fill_control_urb(urb, usb_dev, usb_rcvctrlpipe(usb_dev, 0), - (unsigned char *)(&rd->cr), &rd->reg, sizeof(rd->reg), + (u8 *)(&rd->cr), &rd->reg, sizeof(rd->reg), rt2x00usb_register_read_async_cb, rd); usb_anchor_urb(urb, rt2x00dev->anchor); if (usb_submit_urb(urb, GFP_ATOMIC) < 0) { diff --git a/drivers/net/wireless/ralink/rt2x00/rt61pci.c b/drivers/net/wireless/ralink/rt2x00/rt61pci.c index d92f9eb07dc9..81db7f57c7e4 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt61pci.c +++ b/drivers/net/wireless/ralink/rt2x00/rt61pci.c @@ -1709,7 +1709,7 @@ static int rt61pci_set_state(struct rt2x00_dev *rt2x00dev, enum dev_state state) { u32 reg, reg2; unsigned int i; - char put_to_sleep; + bool put_to_sleep; put_to_sleep = (state != STATE_AWAKE); @@ -2656,7 +2656,7 @@ static int rt61pci_probe_hw_mode(struct rt2x00_dev *rt2x00dev) { struct hw_mode_spec *spec = &rt2x00dev->spec; struct channel_info *info; - char *tx_power; + u8 *tx_power; unsigned int i; /* diff --git a/drivers/net/wireless/ralink/rt2x00/rt61pci.h b/drivers/net/wireless/ralink/rt2x00/rt61pci.h index 5f208ad509bd..d72d0ffd1127 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt61pci.h +++ b/drivers/net/wireless/ralink/rt2x00/rt61pci.h @@ -1484,6 +1484,6 @@ struct hw_pairwise_ta_entry { (((u8)(__txpower)) > MAX_TXPOWER) ? DEFAULT_TXPOWER : (__txpower) #define TXPOWER_TO_DEV(__txpower) \ - clamp_t(char, __txpower, MIN_TXPOWER, MAX_TXPOWER) + clamp_t(u8, __txpower, MIN_TXPOWER, MAX_TXPOWER) #endif /* RT61PCI_H */ diff --git a/drivers/net/wireless/ralink/rt2x00/rt73usb.c b/drivers/net/wireless/ralink/rt2x00/rt73usb.c index e3269fd7c59e..861035444374 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt73usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt73usb.c @@ -1378,7 +1378,7 @@ static int rt73usb_set_state(struct rt2x00_dev *rt2x00dev, enum dev_state state) { u32 reg, reg2; unsigned int i; - char put_to_sleep; + bool put_to_sleep; put_to_sleep = (state != STATE_AWAKE); @@ -2090,7 +2090,7 @@ static int rt73usb_probe_hw_mode(struct rt2x00_dev *rt2x00dev) { struct hw_mode_spec *spec = &rt2x00dev->spec; struct channel_info *info; - char *tx_power; + u8 *tx_power; unsigned int i; /* diff --git a/drivers/net/wireless/ralink/rt2x00/rt73usb.h b/drivers/net/wireless/ralink/rt2x00/rt73usb.h index 1b56d285c34b..bb0a68516c08 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt73usb.h +++ b/drivers/net/wireless/ralink/rt2x00/rt73usb.h @@ -1063,6 +1063,6 @@ struct hw_pairwise_ta_entry { (((u8)(__txpower)) > MAX_TXPOWER) ? DEFAULT_TXPOWER : (__txpower) #define TXPOWER_TO_DEV(__txpower) \ - clamp_t(char, __txpower, MIN_TXPOWER, MAX_TXPOWER) + clamp_t(u8, __txpower, MIN_TXPOWER, MAX_TXPOWER) #endif /* RT73USB_H */ From 03c0ad4b06c3566de624b4f4b78ac1a5d1e4c8e7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Oct 2022 19:41:51 +0200 Subject: [PATCH 025/328] wifi: cfg80211: silence a sparse RCU warning All we're going to do with this pointer is assign it to another __rcu pointer, but sparse can't see that, so use rcu_access_pointer() to silence the warning here. Fixes: c90b93b5b782 ("wifi: cfg80211: update hidden BSSes to avoid WARN_ON") Signed-off-by: Johannes Berg --- net/wireless/scan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 806a5f1330ff..da752b0cc752 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1674,7 +1674,9 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, if (old == rcu_access_pointer(known->pub.ies)) rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies); - cfg80211_update_hidden_bsses(known, new->pub.beacon_ies, old); + cfg80211_update_hidden_bsses(known, + rcu_access_pointer(new->pub.beacon_ies), + old); if (old) kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); From 50b2e8711462409cd368c41067405aa446dfa2af Mon Sep 17 00:00:00 2001 From: taozhang Date: Sat, 15 Oct 2022 17:38:31 +0800 Subject: [PATCH 026/328] wifi: mac80211: fix memory free error when registering wiphy fail ieee80211_register_hw free the allocated cipher suites when registering wiphy fail, and ieee80211_free_hw will re-free it. set wiphy_ciphers_allocated to false after freeing allocated cipher suites. Signed-off-by: taozhang Signed-off-by: Johannes Berg --- net/mac80211/main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 46f3eddc2388..02b5abc7326b 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1439,8 +1439,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_led_exit(local); destroy_workqueue(local->workqueue); fail_workqueue: - if (local->wiphy_ciphers_allocated) + if (local->wiphy_ciphers_allocated) { kfree(local->hw.wiphy->cipher_suites); + local->wiphy_ciphers_allocated = false; + } kfree(local->int_scan_req); return result; } @@ -1508,8 +1510,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) mutex_destroy(&local->iflist_mtx); mutex_destroy(&local->mtx); - if (local->wiphy_ciphers_allocated) + if (local->wiphy_ciphers_allocated) { kfree(local->hw.wiphy->cipher_suites); + local->wiphy_ciphers_allocated = false; + } idr_for_each(&local->ack_status_frames, ieee80211_free_ack_frame, NULL); From 57b962e627ec0ae53d4d16d7bd1033e27e67677a Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 20 Oct 2022 13:40:40 +0200 Subject: [PATCH 027/328] wifi: cfg80211: fix memory leak in query_regdb_file() In the function query_regdb_file() the alpha2 parameter is duplicated using kmemdup() and subsequently freed in regdb_fw_cb(). However, request_firmware_nowait() can fail without calling regdb_fw_cb() and thus leak memory. Fixes: 007f6c5e6eb4 ("cfg80211: support loading regulatory database as firmware file") Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- net/wireless/reg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index d5c7a5aa6853..c3d950d29432 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1084,6 +1084,8 @@ MODULE_FIRMWARE("regulatory.db"); static int query_regdb_file(const char *alpha2) { + int err; + ASSERT_RTNL(); if (regdb) @@ -1093,9 +1095,13 @@ static int query_regdb_file(const char *alpha2) if (!alpha2) return -ENOMEM; - return request_firmware_nowait(THIS_MODULE, true, "regulatory.db", - ®_pdev->dev, GFP_KERNEL, - (void *)alpha2, regdb_fw_cb); + err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db", + ®_pdev->dev, GFP_KERNEL, + (void *)alpha2, regdb_fw_cb); + if (err) + kfree(alpha2); + + return err; } int reg_reload_regdb(void) From 18429c51c7ff6e6bfd627316c54670230967a7e5 Mon Sep 17 00:00:00 2001 From: Paul Zhang Date: Tue, 11 Oct 2022 21:04:28 +0800 Subject: [PATCH 028/328] wifi: cfg80211: Fix bitrates overflow issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When invoking function cfg80211_calculate_bitrate_eht about (320 MHz, EHT-MCS 13, EHT-NSS 2, EHT-GI 0), which means the parameters as flags: 0x80, bw: 7, mcs: 13, eht_gi: 0, nss: 2, this formula (result * rate->nss) will overflow and causes the returned bitrate to be 3959 when it should be 57646. Here is the explanation: u64 tmp; u32 result; … /* tmp = result = 4 * rates_996[0] * = 4 * 480388888 = 0x72889c60 */ tmp = result; /* tmp = 0x72889c60 * 6144 = 0xabccea90000 */ tmp *= SCALE; /* tmp = 0xabccea90000 / mcs_divisors[13] * = 0xabccea90000 / 5120 = 0x8970bba6 */ do_div(tmp, mcs_divisors[rate->mcs]); /* result = 0x8970bba6 */ result = tmp; /* normally (result * rate->nss) = 0x8970bba6 * 2 = 0x112e1774c, * but since result is u32, (result * rate->nss) = 0x12e1774c, * overflow happens and it loses the highest bit. * Then result = 0x12e1774c / 8 = 39595753, */ result = (result * rate->nss) / 8; Signed-off-by: Paul Zhang Signed-off-by: Johannes Berg --- net/wireless/util.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 1f285b515028..39680e7bad45 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1557,10 +1557,12 @@ static u32 cfg80211_calculate_bitrate_eht(struct rate_info *rate) tmp = result; tmp *= SCALE; do_div(tmp, mcs_divisors[rate->mcs]); - result = tmp; /* and take NSS */ - result = (result * rate->nss) / 8; + tmp *= rate->nss; + do_div(tmp, 8); + + result = tmp; return result / 10000; } From 69188df5f6e4cecc6b76b958979ba363cd5240e8 Mon Sep 17 00:00:00 2001 From: Jonas Jelonek Date: Fri, 14 Oct 2022 16:54:39 +0200 Subject: [PATCH 029/328] wifi: mac80211_hwsim: fix debugfs attribute ps with rc table support Fixes a warning that occurs when rc table support is enabled (IEEE80211_HW_SUPPORTS_RC_TABLE) in mac80211_hwsim and the PS mode is changed via the exported debugfs attribute. When the PS mode is changed, a packet is broadcasted via hwsim_send_nullfunc by creating and transmitting a plain skb with only header initialized. The ieee80211 rate array in the control buffer is zero-initialized. When ratetbl support is enabled, ieee80211_get_tx_rates is called for the skb with sta parameter set to NULL and thus no ratetbl can be used. The final rate array then looks like [-1,0; 0,0; 0,0; 0,0] which causes the warning in ieee80211_get_tx_rate. The issue is fixed by setting the count of the first rate with idx '0' to 1 and hence ieee80211_get_tx_rates won't overwrite it with idx '-1'. Signed-off-by: Jonas Jelonek Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index a40636c90ec3..0d81098c7b45 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -910,6 +910,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, struct hwsim_vif_priv *vp = (void *)vif->drv_priv; struct sk_buff *skb; struct ieee80211_hdr *hdr; + struct ieee80211_tx_info *cb; if (!vp->assoc) return; @@ -931,6 +932,10 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, memcpy(hdr->addr2, mac, ETH_ALEN); memcpy(hdr->addr3, vp->bssid, ETH_ALEN); + cb = IEEE80211_SKB_CB(skb); + cb->control.rates[0].count = 1; + cb->control.rates[1].idx = -1; + rcu_read_lock(); mac80211_hwsim_tx_frame(data->hw, skb, rcu_dereference(vif->bss_conf.chanctx_conf)->def.chan); From 24f0692bfd41fd207d99c993a5785c3426762046 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 20 Oct 2022 16:54:55 -0700 Subject: [PATCH 030/328] ACPI: NUMA: Add CXL CFMWS 'nodes' to the possible nodes set The ACPI CEDT.CFMWS indicates a range of possible address where new CXL regions can appear. Each range is associated with a QTG id (QoS Throttling Group id). For each range + QTG pair that is not covered by a proximity domain in the SRAT, Linux creates a new NUMA node. However, the commit that added the new ranges missed updating the node_possible mask which causes memory_group_register() to fail. Add the new nodes to the nodes_possible mask. Cc: Fixes: fd49f99c1809 ("ACPI: NUMA: Add a node and memblk for each CFMWS not in SRAT") Cc: Alison Schofield Cc: Rafael J. Wysocki Reported-by: Vishal Verma Tested-by: Vishal Verma Acked-by: Rafael J. Wysocki Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166631003537.1167078.9373680312035292395.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/acpi/numa/srat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 3b818ab186be..1f4fc5f8a819 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -327,6 +327,7 @@ static int __init acpi_parse_cfmws(union acpi_subtable_headers *header, pr_warn("ACPI NUMA: Failed to add memblk for CFMWS node %d [mem %#llx-%#llx]\n", node, start, end); } + node_set(node, numa_nodes_parsed); /* Set the next available fake_pxm value */ (*fake_pxm)++; From f23f1a1e8437e38014fe34a2f12e37e861e5bcc7 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 21 Sep 2022 03:10:08 +0200 Subject: [PATCH 031/328] arm64: dts: imx8mm: Enable CPLD_Dn pull down resistor on MX8Menlo Enable CPLD_Dn pull down resistor instead of pull up to avoid intefering with CPLD power off functionality. Fixes: 510c527b4ff57 ("arm64: dts: imx8mm: Add i.MX8M Mini Toradex Verdin based Menlo board") Signed-off-by: Marek Vasut Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- .../arm64/boot/dts/freescale/imx8mm-mx8menlo.dts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts b/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts index 32f6f2f50c10..43e89859c044 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts @@ -250,21 +250,21 @@ /* SODIMM 96 */ MX8MM_IOMUXC_SAI1_RXD2_GPIO4_IO4 0x1c4 /* CPLD_D[7] */ - MX8MM_IOMUXC_SAI1_RXD3_GPIO4_IO5 0x1c4 + MX8MM_IOMUXC_SAI1_RXD3_GPIO4_IO5 0x184 /* CPLD_D[6] */ - MX8MM_IOMUXC_SAI1_RXFS_GPIO4_IO0 0x1c4 + MX8MM_IOMUXC_SAI1_RXFS_GPIO4_IO0 0x184 /* CPLD_D[5] */ - MX8MM_IOMUXC_SAI1_TXC_GPIO4_IO11 0x1c4 + MX8MM_IOMUXC_SAI1_TXC_GPIO4_IO11 0x184 /* CPLD_D[4] */ - MX8MM_IOMUXC_SAI1_TXD0_GPIO4_IO12 0x1c4 + MX8MM_IOMUXC_SAI1_TXD0_GPIO4_IO12 0x184 /* CPLD_D[3] */ - MX8MM_IOMUXC_SAI1_TXD1_GPIO4_IO13 0x1c4 + MX8MM_IOMUXC_SAI1_TXD1_GPIO4_IO13 0x184 /* CPLD_D[2] */ - MX8MM_IOMUXC_SAI1_TXD2_GPIO4_IO14 0x1c4 + MX8MM_IOMUXC_SAI1_TXD2_GPIO4_IO14 0x184 /* CPLD_D[1] */ - MX8MM_IOMUXC_SAI1_TXD3_GPIO4_IO15 0x1c4 + MX8MM_IOMUXC_SAI1_TXD3_GPIO4_IO15 0x184 /* CPLD_D[0] */ - MX8MM_IOMUXC_SAI1_TXD4_GPIO4_IO16 0x1c4 + MX8MM_IOMUXC_SAI1_TXD4_GPIO4_IO16 0x184 /* KBD_intK */ MX8MM_IOMUXC_SAI2_MCLK_GPIO4_IO27 0x1c4 /* DISP_reset */ From f4cd18c5b2000df0c382f6530eeca9141ea41faf Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Sat, 22 Oct 2022 08:23:52 -0700 Subject: [PATCH 032/328] efi/tpm: Pass correct address to memblock_reserve memblock_reserve() expects a physical address, but the address being passed for the TPM final events log is what was returned from early_memremap(). This results in something like the following: [ 0.000000] memblock_reserve: [0xffffffffff2c0000-0xffffffffff2c00e4] efi_tpm_eventlog_init+0x324/0x370 Pass the address from efi like what is done for the TPM events log. Fixes: c46f3405692d ("tpm: Reserve the TPM final events table") Cc: Matthew Garrett Cc: Jarkko Sakkinen Cc: Bartosz Szczepanek Cc: Ard Biesheuvel Signed-off-by: Jerry Snitselaar Acked-by: Jarkko Sakkinen Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index 8f665678e9e3..e8d69bd548f3 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -97,7 +97,7 @@ int __init efi_tpm_eventlog_init(void) goto out_calc; } - memblock_reserve((unsigned long)final_tbl, + memblock_reserve(efi.tpm_final_log, tbl_size + sizeof(*final_tbl)); efi_tpm_final_log_size = tbl_size; From 161a438d730dade2ba2b1bf8785f0759aba4ca5f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Oct 2022 10:39:08 +0200 Subject: [PATCH 033/328] efi: random: reduce seed size to 32 bytes We no longer need at least 64 bytes of random seed to permit the early crng init to complete. The RNG is now based on Blake2s, so reduce the EFI seed size to the Blake2s hash size, which is sufficient for our purposes. While at it, drop the READ_ONCE(), which was supposed to prevent size from being evaluated after seed was unmapped. However, this cannot actually happen, so READ_ONCE() is unnecessary here. Cc: # v4.14+ Signed-off-by: Ard Biesheuvel Reviewed-by: Jason A. Donenfeld Acked-by: Ilias Apalodimas --- drivers/firmware/efi/efi.c | 2 +- include/linux/efi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 3ecdc43a3f2b..a46df5d1d094 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -611,7 +611,7 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables, seed = early_memremap(efi_rng_seed, sizeof(*seed)); if (seed != NULL) { - size = READ_ONCE(seed->size); + size = min(seed->size, EFI_RANDOM_SEED_SIZE); early_memunmap(seed, sizeof(*seed)); } else { pr_err("Could not map UEFI random seed!\n"); diff --git a/include/linux/efi.h b/include/linux/efi.h index 80f3c1c7827d..929d559ad41d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1222,7 +1222,7 @@ efi_status_t efi_random_get_seed(void); arch_efi_call_virt_teardown(); \ }) -#define EFI_RANDOM_SEED_SIZE 64U +#define EFI_RANDOM_SEED_SIZE 32U // BLAKE2S_HASH_SIZE struct linux_efi_random_seed { u32 size; From 7d866e38c7e9ece8a096d0d098fa9d92b9d4f97e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Oct 2022 10:39:09 +0200 Subject: [PATCH 034/328] efi: random: Use 'ACPI reclaim' memory for random seed EFI runtime services data is guaranteed to be preserved by the OS, making it a suitable candidate for the EFI random seed table, which may be passed to kexec kernels as well (after refreshing the seed), and so we need to ensure that the memory is preserved without support from the OS itself. However, runtime services data is intended for allocations that are relevant to the implementations of the runtime services themselves, and so they are unmapped from the kernel linear map, and mapped into the EFI page tables that are active while runtime service invocations are in progress. None of this is needed for the RNG seed. So let's switch to EFI 'ACPI reclaim' memory: in spite of the name, there is nothing exclusively ACPI about it, it is simply a type of allocation that carries firmware provided data which may or may not be relevant to the OS, and it is left up to the OS to decide whether to reclaim it after having consumed its contents. Given that in Linux, we never reclaim these allocations, it is a good choice for the EFI RNG seed, as the allocation is guaranteed to survive kexec reboots. One additional reason for changing this now is to align it with the upcoming recommendation for EFI bootloader provided RNG seeds, which must not use EFI runtime services code/data allocations. Cc: # v4.14+ Signed-off-by: Ard Biesheuvel Reviewed-by: Ilias Apalodimas --- drivers/firmware/efi/libstub/random.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 24aa37535372..33ab56769595 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -75,7 +75,12 @@ efi_status_t efi_random_get_seed(void) if (status != EFI_SUCCESS) return status; - status = efi_bs_call(allocate_pool, EFI_RUNTIME_SERVICES_DATA, + /* + * Use EFI_ACPI_RECLAIM_MEMORY here so that it is guaranteed that the + * allocation will survive a kexec reboot (although we refresh the seed + * beforehand) + */ + status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, sizeof(*seed) + EFI_RANDOM_SEED_SIZE, (void **)&seed); if (status != EFI_SUCCESS) From 9440c42941606af4c379afa3cf8624f0dc43a629 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 18 Oct 2022 14:27:08 +0200 Subject: [PATCH 035/328] x86/syscall: Include asm/ptrace.h in syscall_wrapper header With just the forward declaration of the 'struct pt_regs' in syscall_wrapper.h, the syscall stub functions: __[x64|ia32]_sys_*(struct pt_regs *regs) will have different definition of 'regs' argument in BTF data based on which object file they are defined in. If the syscall's object includes 'struct pt_regs' definition, the BTF argument data will point to a 'struct pt_regs' record, like: [226] STRUCT 'pt_regs' size=168 vlen=21 'r15' type_id=1 bits_offset=0 'r14' type_id=1 bits_offset=64 'r13' type_id=1 bits_offset=128 ... If not, it will point to a fwd declaration record: [15439] FWD 'pt_regs' fwd_kind=struct and make bpf tracing program hooking on those functions unable to access fields from 'struct pt_regs'. Include asm/ptrace.h directly in syscall_wrapper.h to make sure all syscalls see 'struct pt_regs' definition. This then results in BTF for '__*_sys_*(struct pt_regs *regs)' functions to point to the actual struct, not just the forward declaration. [ bp: No Fixes tag as this is not really a bug fix but "adjustment" so that BTF is happy. ] Reported-by: Akihiro HARAI Signed-off-by: Jiri Olsa Signed-off-by: Borislav Petkov Acked-by: Andrii Nakryiko Cc: # this is needed only for BTF so kernels >= 5.15 Link: https://lore.kernel.org/r/20221018122708.823792-1-jolsa@kernel.org --- arch/x86/include/asm/syscall_wrapper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 59358d1bf880..fd2669b1cb2d 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -6,7 +6,7 @@ #ifndef _ASM_X86_SYSCALL_WRAPPER_H #define _ASM_X86_SYSCALL_WRAPPER_H -struct pt_regs; +#include extern long __x64_sys_ni_syscall(const struct pt_regs *regs); extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); From 6853a71726b6f5930b4450889faf02e8f1cfe35c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 25 Oct 2022 14:51:56 +0000 Subject: [PATCH 036/328] KVM: arm64: Use correct accessor to parse stage-1 PTEs hyp_get_page_state() is used with pKVM to retrieve metadata about a page by parsing a hypervisor stage-1 PTE. However, it incorrectly uses a helper which parses *stage-2* mappings. Ouch. Luckily, pkvm_getstate() only looks at the software bits, which happen to be in the same place for stage-1 and stage-2 PTEs, and this all ends up working correctly by accident. But clearly, we should do better. Fix hyp_get_page_state() to use the correct helper. Fixes: e82edcc75c4e ("KVM: arm64: Implement do_share() helper for sharing memory") Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221025145156.855308-1-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 1e78acf9662e..07f9dc9848ef 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -516,7 +516,7 @@ static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) if (!kvm_pte_valid(pte)) return PKVM_NOPAGE; - return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); + return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); } static int __hyp_check_page_state_range(u64 addr, u64 size, From a9003f74f5a2f487e101f3aa1dd5c3d3a78c6999 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 7 Oct 2022 15:10:01 +0200 Subject: [PATCH 037/328] clk: renesas: r8a779g0: Fix HSCIF parent clocks As serial communication requires a clean clock signal, the High Speed Serial Communication Interfaces with FIFO (HSCIF) is clocked by a clock that is not affected by Spread Spectrum or Fractional Multiplication. Hence change the parent clocks for the HSCIF modules from the S0D3_PER clock to the SASYNCPERD1 clock (which has the same clock rate), cfr. R-Car V4H Hardware User's Manual rev. 0.54. Fixes: 0ab55cf1834177a2 ("clk: renesas: cpg-mssr: Add support for R-Car V4H") Signed-off-by: Geert Uytterhoeven Acked-by: Stephen Boyd Link: https://lore.kernel.org/r/b7928abc8b9f53d5b06ec8624342f449de3d24ec.1665147497.git.geert+renesas@glider.be --- drivers/clk/renesas/r8a779g0-cpg-mssr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/renesas/r8a779g0-cpg-mssr.c b/drivers/clk/renesas/r8a779g0-cpg-mssr.c index 3e8c93facfa1..d5b325e3c539 100644 --- a/drivers/clk/renesas/r8a779g0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779g0-cpg-mssr.c @@ -158,10 +158,10 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = { DEF_MOD("avb0", 211, R8A779G0_CLK_S0D4_HSC), DEF_MOD("avb1", 212, R8A779G0_CLK_S0D4_HSC), DEF_MOD("avb2", 213, R8A779G0_CLK_S0D4_HSC), - DEF_MOD("hscif0", 514, R8A779G0_CLK_S0D3_PER), - DEF_MOD("hscif1", 515, R8A779G0_CLK_S0D3_PER), - DEF_MOD("hscif2", 516, R8A779G0_CLK_S0D3_PER), - DEF_MOD("hscif3", 517, R8A779G0_CLK_S0D3_PER), + DEF_MOD("hscif0", 514, R8A779G0_CLK_SASYNCPERD1), + DEF_MOD("hscif1", 515, R8A779G0_CLK_SASYNCPERD1), + DEF_MOD("hscif2", 516, R8A779G0_CLK_SASYNCPERD1), + DEF_MOD("hscif3", 517, R8A779G0_CLK_SASYNCPERD1), DEF_MOD("i2c0", 518, R8A779G0_CLK_S0D6_PER), DEF_MOD("i2c1", 519, R8A779G0_CLK_S0D6_PER), DEF_MOD("i2c2", 520, R8A779G0_CLK_S0D6_PER), From e07ee6fe21f47cfd72ae566395c67a80e7c66163 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Tue, 25 Oct 2022 12:16:27 -0700 Subject: [PATCH 038/328] xfs: increase rename inode reservation xfs_rename can update up to 5 inodes: src_dp, target_dp, src_ip, target_ip and wip. So we need to increase the inode reservation to match. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_trans_resv.c | 4 ++-- fs/xfs/xfs_inode.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 2c4ad6e4bb14..5b2f27cbdb80 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -422,7 +422,7 @@ xfs_calc_itruncate_reservation_minlogsize( /* * In renaming a files we can modify: - * the four inodes involved: 4 * inode size + * the five inodes involved: 5 * inode size * the two directory btrees: 2 * (max depth + v2) * dir block size * the two directory bmap btrees: 2 * max depth * block size * And the bmap_finish transaction can free dir and bmap blocks (two sets @@ -437,7 +437,7 @@ xfs_calc_rename_reservation( struct xfs_mount *mp) { return XFS_DQUOT_LOGRES(mp) + - max((xfs_calc_inode_res(mp, 4) + + max((xfs_calc_inode_res(mp, 5) + xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1))), (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c000b74dd203..aa303be11576 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2818,7 +2818,7 @@ retry: * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source - * directory, we can lock from 2 to 4 inodes. + * directory, we can lock from 2 to 5 inodes. */ xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); From 5c1df62ca65936139a4b008561110d9fc285c58a Mon Sep 17 00:00:00 2001 From: Stefan Hansson Date: Mon, 17 Oct 2022 17:01:14 +0200 Subject: [PATCH 039/328] kbuild: use POSIX-compatible grep option --file is a GNU extension to grep which is not available in all implementations (such as BusyBox). Use the -f option instead which is eqvuialent according to the GNU grep manpage[1] and is present in POSIX[2]. [1] https://www.gnu.org/software/grep/manual/grep.html [2] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/grep.html Signed-off-by: Stefan Hansson Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d148a55bfd0f..e90bb2b38607 100644 --- a/Makefile +++ b/Makefile @@ -1218,7 +1218,7 @@ quiet_cmd_ar_vmlinux.a = AR $@ cmd_ar_vmlinux.a = \ rm -f $@; \ $(AR) cDPrST $@ $(KBUILD_VMLINUX_OBJS); \ - $(AR) mPiT $$($(AR) t $@ | head -n1) $@ $$($(AR) t $@ | grep -F --file=$(srctree)/scripts/head-object-list.txt) + $(AR) mPiT $$($(AR) t $@ | head -n1) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) targets += vmlinux.a vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt autoksyms_recursive FORCE From 114ff6fe6cfbe81659f9e517d0b25f53db5dfc5d Mon Sep 17 00:00:00 2001 From: Dan Li Date: Thu, 20 Oct 2022 03:38:23 -0700 Subject: [PATCH 040/328] Documentation: kbuild: Add description of git for reproducible builds The status of git will affect the final compilation result, add it to the documentation of reproducible builds. Signed-off-by: Dan Li Signed-off-by: Masahiro Yamada --- Documentation/kbuild/reproducible-builds.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/kbuild/reproducible-builds.rst b/Documentation/kbuild/reproducible-builds.rst index 071f0151a7a4..f2dcc39044e6 100644 --- a/Documentation/kbuild/reproducible-builds.rst +++ b/Documentation/kbuild/reproducible-builds.rst @@ -119,6 +119,16 @@ To avoid this, you can make the vDSO different for different kernel versions by including an arbitrary string of "salt" in it. This is specified by the Kconfig symbol ``CONFIG_BUILD_SALT``. +Git +--- + +Uncommitted changes or different commit ids in git can also lead +to different compilation results. For example, after executing +``git reset HEAD^``, even if the code is the same, the +``include/config/kernel.release`` generated during compilation +will be different, which will eventually lead to binary differences. +See ``scripts/setlocalversion`` for details. + .. _KBUILD_BUILD_TIMESTAMP: kbuild.html#kbuild-build-timestamp .. _KBUILD_BUILD_USER and KBUILD_BUILD_HOST: kbuild.html#kbuild-build-user-kbuild-build-host .. _KCFLAGS: kbuild.html#kcflags From 3b1e0dd2dc8a280b1e89c0df6d38cd28768575a5 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Tue, 25 Oct 2022 13:17:44 -0700 Subject: [PATCH 041/328] kbuild: fix typo in modpost Commit f73edc8951b2 ("kbuild: unify two modpost invocations") introduced a typo (moudle.symvers-if-present) which results in the kernel's Module.symvers to not be included as a prerequisite for $(KBUILD_EXTMOD)/Module.symvers. Fix the typo to restore the intended functionality. Fixes: f73edc8951b2 ("kbuild: unify two modpost invocations") Signed-off-by: Will McVicker Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 8489a3402eb8..e41dee64d429 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -122,7 +122,7 @@ quiet_cmd_modpost = MODPOST $@ sed 's/ko$$/o/' $(or $(modorder-if-needed), /dev/null) | $(MODPOST) $(modpost-args) -T - $(vmlinux.o-if-present) targets += $(output-symdump) -$(output-symdump): $(modorder-if-needed) $(vmlinux.o-if-present) $(moudle.symvers-if-present) $(MODPOST) FORCE +$(output-symdump): $(modorder-if-needed) $(vmlinux.o-if-present) $(module.symvers-if-present) $(MODPOST) FORCE $(call if_changed,modpost) __modpost: $(output-symdump) From b6bcdc9f6b8321e4471ff45413b6410e16762a8d Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 27 Oct 2022 13:09:45 +0100 Subject: [PATCH 042/328] KVM: arm64: Fix bad dereference on MTE-enabled systems enter_exception64() performs an MTE check, which involves dereferencing vcpu->kvm. While vcpu has already been fixed up to be a HYP VA pointer, kvm is still a pointer in the kernel VA space. This only affects nVHE configurations with MTE enabled, as in other cases, the pointer is either valid (VHE) or not dereferenced (!MTE). Fix this by first converting kvm to a HYP VA pointer. Fixes: ea7fc1bb1cd1 ("KVM: arm64: Introduce MTE VM feature") Signed-off-by: Ryan Roberts Reviewed-by: Steven Price [maz: commit message tidy-up] Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221027120945.29679-1-ryan.roberts@arm.com --- arch/arm64/kvm/hyp/exception.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index b7557b25ed56..791d3de76771 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -13,6 +13,7 @@ #include #include #include +#include #if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__) #error Hypervisor code only! @@ -115,7 +116,7 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, new |= (old & PSR_C_BIT); new |= (old & PSR_V_BIT); - if (kvm_has_mte(vcpu->kvm)) + if (kvm_has_mte(kern_hyp_va(vcpu->kvm))) new |= PSR_TCO_BIT; new |= (old & PSR_DIT_BIT); From 08a32902a56e1670850fe5d518d8203e9ce354b8 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 18 Oct 2022 15:52:56 +0200 Subject: [PATCH 043/328] clk: Remove WARN_ON NULL parent in clk_core_init_rate_req() If a clock has CLK_SET_RATE_PARENT, but core->parent is NULL (most likely because it's orphan), callers of clk_core_init_rate_req() will blindly call this function leading to a very verbose warning. Since it's a fairly common situation, let's just remove the WARN_ON but keep the check that prevents us from dereferencing the pointer. Interestingly, it fixes a regression on the Mediatek MT8195 where the GPU would stall during a clk_set_rate for its main clock. We couldn't come up with a proper explanation since the condition is essentially the same. It was then assumed that it could be timing related since printing the warning stacktrace takes a while, but we couldn't replicate the failure by using fairly large (10ms) mdelays. Fixes: 262ca38f4b6e ("clk: Stop forwarding clk_rate_requests to the parent") Reported-by: AngeloGioacchino Del Regno Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20221018-clk-range-checks-fixes-v1-1-f3ef80518140@cerno.tech Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index c3c3f8c07258..37d623c7b73b 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1459,7 +1459,7 @@ static void clk_core_init_rate_req(struct clk_core * const core, { struct clk_core *parent; - if (WARN_ON(!core || !req)) + if (!core || WARN_ON(!req)) return; memset(req, 0, sizeof(*req)); From 2079d029387adfc0cc123f01a6fcf9eb6540ee4d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 18 Oct 2022 15:52:57 +0200 Subject: [PATCH 044/328] clk: Initialize the clk_rate_request even if clk_core is NULL Since commit c35e84b09776 ("clk: Introduce clk_hw_init_rate_request()"), users that used to initialize their clk_rate_request by initializing their local structure now rely on clk_hw_init_rate_request(). This function is backed by clk_core_init_rate_req(), which will skip the initialization if either the pointer to struct clk_core or to struct clk_rate_request are NULL. However, the core->parent pointer might be NULL because the clock is orphan, and we will thus end up with our local struct clk_rate_request left untouched. And since clk_hw_init_rate_request() doesn't return an error, we will then call a determine_rate variant with that unitialized structure. In order to avoid this, let's clear our clk_rate_request if the pointer to it is valid but the pointer to struct clk_core isn't. Fixes: c35e84b09776 ("clk: Introduce clk_hw_init_rate_request()") Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20221018-clk-range-checks-fixes-v1-2-f3ef80518140@cerno.tech Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 37d623c7b73b..eb2f9be9b9aa 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1459,11 +1459,14 @@ static void clk_core_init_rate_req(struct clk_core * const core, { struct clk_core *parent; - if (!core || WARN_ON(!req)) + if (WARN_ON(!req)) return; memset(req, 0, sizeof(*req)); + if (!core) + return; + req->rate = rate; clk_core_get_boundaries(core, &req->min_rate, &req->max_rate); From 774560cf28fe115f106b6176c54ec641491136ac Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 18 Oct 2022 15:52:58 +0200 Subject: [PATCH 045/328] clk: Initialize max_rate in struct clk_rate_request Since commit b46fd8dbe8ad ("clk: Zero the clk_rate_request structure"), the clk_core_init_rate_req() function clears the struct clk_rate_request passed as argument. However, the default value for max_rate isn't 0 but ULONG_MAX, and we end up creating a clk_rate_request instance where the maximum rate is 0. Let's initialize max_rate to ULONG_MAX properly. Fixes: b46fd8dbe8ad ("clk: Zero the clk_rate_request structure") Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20221018-clk-range-checks-fixes-v1-3-f3ef80518140@cerno.tech Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index eb2f9be9b9aa..57b83665e5c3 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1463,6 +1463,7 @@ static void clk_core_init_rate_req(struct clk_core * const core, return; memset(req, 0, sizeof(*req)); + req->max_rate = ULONG_MAX; if (!core) return; From ffa20aa581cf5377fc397b0d0ff9d67ea823629b Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Wed, 19 Oct 2022 11:35:35 +0530 Subject: [PATCH 046/328] clk: qcom: Update the force mem core bit for GPU clocks There are few GPU clocks which are powering up the memories and thus enable the FORCE_MEM_PERIPH always for these clocks to force the periph_on signal to remain active during halt state of the clock. Fixes: a3cc092196ef ("clk: qcom: Add Global Clock controller (GCC) driver for SC7280") Fixes: 3e0f01d6c7e7 ("clk: qcom: Add graphics clock controller driver for SC7280") Signed-off-by: Taniya Das Signed-off-by: Satya Priya Link: https://lore.kernel.org/r/1666159535-6447-1-git-send-email-quic_c_skakit@quicinc.com Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sc7280.c | 1 + drivers/clk/qcom/gpucc-sc7280.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/clk/qcom/gcc-sc7280.c b/drivers/clk/qcom/gcc-sc7280.c index 8afb7575e712..46d41ebce2b0 100644 --- a/drivers/clk/qcom/gcc-sc7280.c +++ b/drivers/clk/qcom/gcc-sc7280.c @@ -3467,6 +3467,7 @@ static int gcc_sc7280_probe(struct platform_device *pdev) regmap_update_bits(regmap, 0x28004, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x28014, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x7100C, BIT(13), BIT(13)); ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks, ARRAY_SIZE(gcc_dfs_clocks)); diff --git a/drivers/clk/qcom/gpucc-sc7280.c b/drivers/clk/qcom/gpucc-sc7280.c index 9a832f2bcf49..1490cd45a654 100644 --- a/drivers/clk/qcom/gpucc-sc7280.c +++ b/drivers/clk/qcom/gpucc-sc7280.c @@ -463,6 +463,7 @@ static int gpu_cc_sc7280_probe(struct platform_device *pdev) */ regmap_update_bits(regmap, 0x1170, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x1098, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x1098, BIT(13), BIT(13)); return qcom_cc_really_probe(pdev, &gpu_cc_sc7280_desc, regmap); } From 9a8c5b0d061554fedd7dbe894e63aa34d0bac7c4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 27 Oct 2022 16:04:36 -0400 Subject: [PATCH 047/328] ext4: update the backup superblock's at the end of the online resize When expanding a file system using online resize, various fields in the superblock (e.g., s_blocks_count, s_inodes_count, etc.) change. To update the backup superblocks, the online resize uses the function update_backups() in fs/ext4/resize.c. This function was not updating the checksum field in the backup superblocks. This wasn't a big deal previously, because e2fsck didn't care about the checksum field in the backup superblock. (And indeed, update_backups() goes all the way back to the ext3 days, well before we had support for metadata checksums.) However, there is an alternate, more general way of updating superblock fields, ext4_update_primary_sb() in fs/ext4/ioctl.c. This function does check the checksum of the backup superblock, and if it doesn't match will mark the file system as corrupted. That was clearly not the intent, so avoid to aborting the resize when a bad superblock is found. In addition, teach update_backups() to properly update the checksum in the backup superblocks. We will eventually want to unify updapte_backups() with the infrasture in ext4_update_primary_sb(), but that's for another day. Note: The problem has been around for a while; it just didn't really matter until ext4_update_primary_sb() was added by commit bbc605cdb1e1 ("ext4: implement support for get/set fs label"). And it became trivially easy to reproduce after commit 827891a38acc ("ext4: update the s_overhead_clusters in the backup sb's when resizing") in v6.0. Cc: stable@kernel.org # 5.17+ Fixes: bbc605cdb1e1 ("ext4: implement support for get/set fs label") Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 3 +-- fs/ext4/resize.c | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 4d49c5cfb690..790d5ffe8559 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -145,9 +145,8 @@ static int ext4_update_backup_sb(struct super_block *sb, if (ext4_has_metadata_csum(sb) && es->s_checksum != ext4_superblock_csum(sb, es)) { ext4_msg(sb, KERN_ERR, "Invalid checksum for backup " - "superblock %llu\n", sb_block); + "superblock %llu", sb_block); unlock_buffer(bh); - err = -EFSBADCRC; goto out_bh; } func(es, arg); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6dfe9ccae0c5..46b87ffeb304 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1158,6 +1158,7 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, while (group < sbi->s_groups_count) { struct buffer_head *bh; ext4_fsblk_t backup_block; + struct ext4_super_block *es; /* Out of journal space, and can't get more - abort - so sad */ err = ext4_resize_ensure_credits_batch(handle, 1); @@ -1186,6 +1187,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data, memcpy(bh->b_data, data, size); if (rest) memset(bh->b_data + size, 0, rest); + es = (struct ext4_super_block *) bh->b_data; + es->s_block_group_nr = cpu_to_le16(group); + if (ext4_has_metadata_csum(sb)) + es->s_checksum = ext4_superblock_csum(sb, es); set_buffer_uptodate(bh); unlock_buffer(bh); err = ext4_handle_dirty_metadata(handle, NULL, bh); From fb3041d61f6867158088c627c2790f94e208d1ea Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 28 Oct 2022 01:28:39 +0900 Subject: [PATCH 048/328] kbuild: fix SIGPIPE error message for AR=gcc-ar and AR=llvm-ar Jiri Slaby reported that building the kernel with AR=gcc-ar shows: /usr/bin/ar terminated with signal 13 [Broken pipe] Nathan Chancellor reported the latest AR=llvm-ar shows: error: write on a pipe with no reader The latter occurs since LLVM commit 51b557adc131 ("Add an error message to the default SIGPIPE handler"). The resulting vmlinux is correct, but it is better to silence it. 'head -n1' exits after reading the first line, so the pipe is closed. Use 'sed -n 1p' to eat the stream till the end. Fixes: 321648455061 ("kbuild: use obj-y instead extra-y for objects placed at the head") Link: https://github.com/ClangBuiltLinux/linux/issues/1651 Reported-by: Jiri Slaby Reported-by: Nathan Chancellor Signed-off-by: Masahiro Yamada Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Tested-by: Nathan Chancellor --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e90bb2b38607..e9e7eff906a5 100644 --- a/Makefile +++ b/Makefile @@ -1218,7 +1218,7 @@ quiet_cmd_ar_vmlinux.a = AR $@ cmd_ar_vmlinux.a = \ rm -f $@; \ $(AR) cDPrST $@ $(KBUILD_VMLINUX_OBJS); \ - $(AR) mPiT $$($(AR) t $@ | head -n1) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) + $(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt) targets += vmlinux.a vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt autoksyms_recursive FORCE From 13c30a755847c7e804e1bf755e66e3ff7b7f9367 Mon Sep 17 00:00:00 2001 From: Sjoerd Simons Date: Thu, 20 Oct 2022 09:56:24 +0800 Subject: [PATCH 049/328] soundwire: intel: Initialize clock stop timeout The bus->clk_stop_timeout member is only initialized to a non-zero value during the codec driver probe. This can lead to corner cases where this value remains pegged at zero when the bus suspends, which results in an endless loop in sdw_bus_wait_for_clk_prep_deprep(). Corner cases include configurations with no codecs described in the firmware, or delays in probing codec drivers. Initializing the default timeout to the smallest non-zero value avoid this problem and allows for the existing logic to be preserved: the bus->clk_stop_timeout is set as the maximum required by all codecs connected on the bus. Fixes: 1f2dcf3a154ac ("soundwire: intel: set dev_num_ida_min") Signed-off-by: Sjoerd Simons Reviewed-by: Pierre-Louis Bossart Reviewed-by: Chao Song Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20221020015624.1703950-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c index 244209358784..8c76541d553f 100644 --- a/drivers/soundwire/intel.c +++ b/drivers/soundwire/intel.c @@ -1513,6 +1513,7 @@ static int intel_link_probe(struct auxiliary_device *auxdev, bus->link_id = auxdev->id; bus->dev_num_ida_min = INTEL_DEV_NUM_IDA_MIN; + bus->clk_stop_timeout = 1; sdw_cdns_probe(cdns); From f936fa7a954b262cb3908bbc8f01ba19dfaf9fbf Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Oct 2022 12:02:05 +0100 Subject: [PATCH 050/328] soundwire: qcom: reinit broadcast completion For some reason we never reinit the broadcast completion, there is a danger that broadcast commands could be treated as completed by driver from previous complete status. Fix this by reinitializing the completion before sending a broadcast command. Fixes: ddea6cf7b619 ("soundwire: qcom: update register read/write routine") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221026110210.6575-2-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index b33d5db494a5..8f1a1eb017a2 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -344,6 +344,9 @@ static int qcom_swrm_cmd_fifo_wr_cmd(struct qcom_swrm_ctrl *swrm, u8 cmd_data, if (swrm_wait_for_wr_fifo_avail(swrm)) return SDW_CMD_FAIL_OTHER; + if (cmd_id == SWR_BROADCAST_CMD_ID) + reinit_completion(&swrm->broadcast); + /* Its assumed that write is okay as we do not get any status back */ swrm->reg_write(swrm, SWRM_CMD_FIFO_WR_CMD, val); From 49a467310dc4fae591a3547860ee04d8730780f4 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 26 Oct 2022 12:02:06 +0100 Subject: [PATCH 051/328] soundwire: qcom: check for outanding writes before doing a read Reading will increase the fifo count, so check for outstanding cmd wrt. write fifo depth to avoid overflow as read will also increase write fifo cnt. Fixes: a661308c34de ("soundwire: qcom: wait for fifo space to be available before read/write") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221026110210.6575-3-srinivas.kandagatla@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 8f1a1eb017a2..cee2b2223141 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -380,6 +380,12 @@ static int qcom_swrm_cmd_fifo_rd_cmd(struct qcom_swrm_ctrl *swrm, val = swrm_get_packed_reg_val(&swrm->rcmd_id, len, dev_addr, reg_addr); + /* + * Check for outstanding cmd wrt. write fifo depth to avoid + * overflow as read will also increase write fifo cnt. + */ + swrm_wait_for_wr_fifo_avail(swrm); + /* wait for FIFO RD to complete to avoid overflow */ usleep_range(100, 105); swrm->reg_write(swrm, SWRM_CMD_FIFO_RD_CMD, val); From 4a6f278d4827b59ba26ceae0ff4529ee826aa258 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Oct 2022 14:25:20 +0200 Subject: [PATCH 052/328] fuse: add file_modified() to fallocate Add missing file_modified() call to fuse_file_fallocate(). Without this fallocate on fuse failed to clear privileges. Fixes: 05ba1f082300 ("fuse: add FALLOCATE operation") Cc: Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 1a3afd469e3a..71bfb663aac5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3001,6 +3001,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, goto out; } + err = file_modified(file); + if (err) + goto out; + if (!(mode & FALLOC_FL_KEEP_SIZE)) set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); From 04948e757148f870a31f4887ea2239403f516c3c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 26 Oct 2022 18:21:16 +0200 Subject: [PATCH 053/328] phy: qcom-qmp-combo: fix NULL-deref on runtime resume Commit fc64623637da ("phy: qcom-qmp-combo,usb: add support for separate PCS_USB region") started treating the PCS_USB registers as potentially separate from the PCS registers but used the wrong base when no PCS_USB offset has been provided. Fix the PCS_USB base used at runtime resume to prevent dereferencing a NULL pointer on platforms that do not provide a PCS_USB offset (e.g. SC7180). Fixes: fc64623637da ("phy: qcom-qmp-combo,usb: add support for separate PCS_USB region") Cc: stable@vger.kernel.org # 5.20 Signed-off-by: Johan Hovold Reviewed-by: Dmitry Baryshkov Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20221026162116.26462-1-johan+linaro@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 9807c4d935cd..ba9d761ec49a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2240,7 +2240,7 @@ static void qmp_combo_enable_autonomous_mode(struct qmp_phy *qphy) static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy) { const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs_usb; + void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs; void __iomem *pcs_misc = qphy->pcs_misc; /* Disable i/o clamp_n on resume for normal mode */ From 2124becad797245d49252d2d733aee0322233d7e Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 14 Oct 2022 07:11:36 -0500 Subject: [PATCH 054/328] ACPI: x86: Add another system to quirk list for forcing StorageD3Enable commit 018d6711c26e4 ("ACPI: x86: Add a quirk for Dell Inspiron 14 2-in-1 for StorageD3Enable") introduced a quirk to allow a system with ambiguous use of _ADR 0 to force StorageD3Enable. Julius Brockmann reports that Inspiron 16 5625 suffers that same symptoms. Add this other system to the list as well. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216440 Reported-and-tested-by: Julius Brockmann Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/utils.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index f8a2cbdc0ce2..d7d3f1669d4c 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -219,6 +219,12 @@ static const struct dmi_system_id force_storage_d3_dmi[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 14 7425 2-in-1"), } }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 16 5625"), + } + }, {} }; From f11a74b45d330ad1ab986852b099747161052526 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Oct 2022 15:52:31 +0200 Subject: [PATCH 055/328] efi: efivars: Fix variable writes with unsupported query_variable_store() Commit 8a254d90a775 ("efi: efivars: Fix variable writes without query_variable_store()") addressed an issue that was introduced during the EFI variable store refactor, where alternative implementations of the efivars layer that lacked query_variable_store() would no longer work. Unfortunately, there is another case to consider here, which was missed: if the efivars layer is backed by the EFI runtime services as usual, but the EFI implementation predates the introduction of QueryVariableInfo(), we will return EFI_UNSUPPORTED, and this is no longer being dealt with correctly. So let's fix this, and while at it, clean up the code a bit, by merging the check_var_size() routines as well as their callers. Cc: # v6.0 Fixes: bbc6d2c6ef22 ("efi: vars: Switch to new wrapper layer") Signed-off-by: Ard Biesheuvel Tested-by: Aditya Garg --- drivers/firmware/efi/vars.c | 68 +++++++++++-------------------------- 1 file changed, 20 insertions(+), 48 deletions(-) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 433b61587139..0ba9f18312f5 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -21,29 +21,22 @@ static struct efivars *__efivars; static DEFINE_SEMAPHORE(efivars_lock); -static efi_status_t check_var_size(u32 attributes, unsigned long size) +static efi_status_t check_var_size(bool nonblocking, u32 attributes, + unsigned long size) { const struct efivar_operations *fops; + efi_status_t status; fops = __efivars->ops; if (!fops->query_variable_store) + status = EFI_UNSUPPORTED; + else + status = fops->query_variable_store(attributes, size, + nonblocking); + if (status == EFI_UNSUPPORTED) return (size <= SZ_64K) ? EFI_SUCCESS : EFI_OUT_OF_RESOURCES; - - return fops->query_variable_store(attributes, size, false); -} - -static -efi_status_t check_var_size_nonblocking(u32 attributes, unsigned long size) -{ - const struct efivar_operations *fops; - - fops = __efivars->ops; - - if (!fops->query_variable_store) - return (size <= SZ_64K) ? EFI_SUCCESS : EFI_OUT_OF_RESOURCES; - - return fops->query_variable_store(attributes, size, true); + return status; } /** @@ -195,26 +188,6 @@ efi_status_t efivar_get_next_variable(unsigned long *name_size, } EXPORT_SYMBOL_NS_GPL(efivar_get_next_variable, EFIVAR); -/* - * efivar_set_variable_blocking() - local helper function for set_variable - * - * Must be called with efivars_lock held. - */ -static efi_status_t -efivar_set_variable_blocking(efi_char16_t *name, efi_guid_t *vendor, - u32 attr, unsigned long data_size, void *data) -{ - efi_status_t status; - - if (data_size > 0) { - status = check_var_size(attr, data_size + - ucs2_strsize(name, 1024)); - if (status != EFI_SUCCESS) - return status; - } - return __efivars->ops->set_variable(name, vendor, attr, data_size, data); -} - /* * efivar_set_variable_locked() - set a variable identified by name/vendor * @@ -228,23 +201,21 @@ efi_status_t efivar_set_variable_locked(efi_char16_t *name, efi_guid_t *vendor, efi_set_variable_t *setvar; efi_status_t status; - if (!nonblocking) - return efivar_set_variable_blocking(name, vendor, attr, - data_size, data); + if (data_size > 0) { + status = check_var_size(nonblocking, attr, + data_size + ucs2_strsize(name, 1024)); + if (status != EFI_SUCCESS) + return status; + } /* * If no _nonblocking variant exists, the ordinary one * is assumed to be non-blocking. */ - setvar = __efivars->ops->set_variable_nonblocking ?: - __efivars->ops->set_variable; + setvar = __efivars->ops->set_variable_nonblocking; + if (!setvar || !nonblocking) + setvar = __efivars->ops->set_variable; - if (data_size > 0) { - status = check_var_size_nonblocking(attr, data_size + - ucs2_strsize(name, 1024)); - if (status != EFI_SUCCESS) - return status; - } return setvar(name, vendor, attr, data_size, data); } EXPORT_SYMBOL_NS_GPL(efivar_set_variable_locked, EFIVAR); @@ -264,7 +235,8 @@ efi_status_t efivar_set_variable(efi_char16_t *name, efi_guid_t *vendor, if (efivar_lock()) return EFI_ABORTED; - status = efivar_set_variable_blocking(name, vendor, attr, data_size, data); + status = efivar_set_variable_locked(name, vendor, attr, data_size, + data, false); efivar_unlock(); return status; } From 6f7630b1b5bc672b54c1285ee6aba752b446672c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 28 Oct 2022 15:32:07 -0700 Subject: [PATCH 056/328] fortify: Capture __bos() results in const temp vars In two recent run-time memcpy() bound checking bug reports (NFS[1] and JFS[2]), the _detection_ was working correctly (in the sense that the requested copy size was larger than the destination field size), but the _warning text_ was showing the destination field size as SIZE_MAX ("unknown size"). This should be impossible, since the detection function will explicitly give up if the destination field size is unknown. For example, the JFS warning was: memcpy: detected field-spanning write (size 132) of single field "ip->i_link" at fs/jfs/namei.c:950 (size 18446744073709551615) Other cases of this warning (e.g.[3]) have reported correctly, and the reproducer only happens under GCC (at least 10.2 and 12.1), so this currently appears to be a GCC bug. Explicitly capturing the __builtin_object_size() results in const temporary variables fixes the report. For example, the JFS reproducer now correctly reports the field size (128): memcpy: detected field-spanning write (size 132) of single field "ip->i_link" at fs/jfs/namei.c:950 (size 128) Examination of the .text delta (which is otherwise identical), shows the literal value used in the report changing: - mov $0xffffffffffffffff,%rcx + mov $0x80,%ecx [1] https://lore.kernel.org/lkml/Y0zEzZwhOxTDcBTB@codemonkey.org.uk/ [2] https://syzkaller.appspot.com/bug?id=23d613df5259b977dac1696bec77f61a85890e3d [3] https://lore.kernel.org/all/202210110948.26b43120-yujie.liu@intel.com/ Cc: "Dr. David Alan Gilbert" Cc: llvm@lists.linux.dev Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 4029fe368a4f..0f00a551939a 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -441,13 +441,18 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, #define __fortify_memcpy_chk(p, q, size, p_size, q_size, \ p_size_field, q_size_field, op) ({ \ - size_t __fortify_size = (size_t)(size); \ - WARN_ONCE(fortify_memcpy_chk(__fortify_size, p_size, q_size, \ - p_size_field, q_size_field, #op), \ + const size_t __fortify_size = (size_t)(size); \ + const size_t __p_size = (p_size); \ + const size_t __q_size = (q_size); \ + const size_t __p_size_field = (p_size_field); \ + const size_t __q_size_field = (q_size_field); \ + WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size, \ + __q_size, __p_size_field, \ + __q_size_field, #op), \ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ __fortify_size, \ "field \"" #p "\" at " __FILE__ ":" __stringify(__LINE__), \ - p_size_field); \ + __p_size_field); \ __underlying_##op(p, q, __fortify_size); \ }) From 2f321fd6d89ad1e9525f5aa1f2be9202c2f3e724 Mon Sep 17 00:00:00 2001 From: Max Krummenacher Date: Thu, 22 Sep 2022 18:29:18 +0200 Subject: [PATCH 057/328] arm64: dts: verdin-imx8mp: fix ctrl_sleep_moci The GPIO signaling ctrl_sleep_moci is currently handled as a gpio hog. But the gpio-hog node is made a child of the wrong gpio controller. Move it to the node representing gpio4 so that it actually works. Without this carrier board components jumpered to use the signal are unconditionally switched off. Fixes: a39ed23bdf6e ("arm64: dts: freescale: add initial support for verdin imx8m plus") Signed-off-by: Max Krummenacher Signed-off-by: Marcel Ziswiler Signed-off-by: Shawn Guo --- .../boot/dts/freescale/imx8mp-verdin.dtsi | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi index 7b712d1888ea..5dcd1de586b5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi @@ -354,16 +354,6 @@ "SODIMM_82", "SODIMM_70", "SODIMM_72"; - - ctrl-sleep-moci-hog { - gpio-hog; - /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */ - gpios = <29 GPIO_ACTIVE_HIGH>; - line-name = "CTRL_SLEEP_MOCI#"; - output-high; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_ctrl_sleep_moci>; - }; }; &gpio3 { @@ -432,6 +422,16 @@ "SODIMM_256", "SODIMM_48", "SODIMM_44"; + + ctrl-sleep-moci-hog { + gpio-hog; + /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */ + gpios = <29 GPIO_ACTIVE_HIGH>; + line-name = "CTRL_SLEEP_MOCI#"; + output-high; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ctrl_sleep_moci>; + }; }; /* On-module I2C */ From e1ec45b9a8127d9d31bb9fc1d802571a2ba8dd89 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Mon, 26 Sep 2022 19:45:32 +0800 Subject: [PATCH 058/328] arm64: dts: imx8mm: remove otg1/2 power domain dependency on hsio pgc_otg1/2 are independent power domain of hsio, they for usb phy, so remove hsio power domain dependency from its node. Fixes: d39d4bb15310 ("arm64: dts: imx8mm: add GPC node") Signed-off-by: Li Jun Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index afb90f59c83c..41204b871f4f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -674,13 +674,11 @@ pgc_otg1: power-domain@2 { #power-domain-cells = <0>; reg = ; - power-domains = <&pgc_hsiomix>; }; pgc_otg2: power-domain@3 { #power-domain-cells = <0>; reg = ; - power-domains = <&pgc_hsiomix>; }; pgc_gpumix: power-domain@4 { From 4585c79ff477f9517b7f384a4fce351417e8fa36 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Mon, 26 Sep 2022 19:45:33 +0800 Subject: [PATCH 059/328] arm64: dts: imx8mm: correct usb power domains pgc_otg1/2 is actual the power domain of usb PHY, usb controller is in hsio power domain, and pgc_otg1/2 is required to be powered up to detect usb remote wakeup, so move the pgc_otg1/2 power domain to the usb phy node. Fixes: 01df28d80859 ("arm64: dts: imx8mm: put USB controllers into power-domains") Signed-off-by: Li Jun Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 41204b871f4f..dabd94dc30c4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -276,6 +276,7 @@ assigned-clocks = <&clk IMX8MM_CLK_USB_PHY_REF>; assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_100M>; clock-names = "main_clk"; + power-domains = <&pgc_otg1>; }; usbphynop2: usbphynop2 { @@ -285,6 +286,7 @@ assigned-clocks = <&clk IMX8MM_CLK_USB_PHY_REF>; assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_100M>; clock-names = "main_clk"; + power-domains = <&pgc_otg2>; }; soc: soc@0 { @@ -1184,7 +1186,7 @@ assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_500M>; phys = <&usbphynop1>; fsl,usbmisc = <&usbmisc1 0>; - power-domains = <&pgc_otg1>; + power-domains = <&pgc_hsiomix>; status = "disabled"; }; @@ -1204,7 +1206,7 @@ assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_500M>; phys = <&usbphynop2>; fsl,usbmisc = <&usbmisc2 0>; - power-domains = <&pgc_otg2>; + power-domains = <&pgc_hsiomix>; status = "disabled"; }; From 9e0bbb7a5218d856f1ccf8f1bf38c8869572b464 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Mon, 26 Sep 2022 19:45:34 +0800 Subject: [PATCH 060/328] arm64: dts: imx8mn: remove otg1 power domain dependency on hsio pgc_otg1 is an independent power domain of hsio, it's for usb phy, so remove hsio power domain from its node. Fixes: 8b8ebec67360 ("arm64: dts: imx8mn: add GPC node") Signed-off-by: Li Jun Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index cb2836bfbd95..950f432627fe 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -662,7 +662,6 @@ pgc_otg1: power-domain@1 { #power-domain-cells = <0>; reg = ; - power-domains = <&pgc_hsiomix>; }; pgc_gpumix: power-domain@2 { From ee895139a761bdb7869f9f5b9ccc19a064d0d740 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Mon, 26 Sep 2022 19:45:35 +0800 Subject: [PATCH 061/328] arm64: dts: imx8mn: Correct the usb power domain pgc_otg1 is actual the power domain of usb PHY, usb controller is in hsio power domain, and pgc_otg1 is required to be powered up to detect usb remote wakeup, so move the pgc_otg1 power domain to the usb phy node. Fixes: ea2b5af58ab2 ("arm64: dts: imx8mn: put USB controller into power-domains") Signed-off-by: Li Jun Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 950f432627fe..ad0b99adf691 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -1075,7 +1075,7 @@ assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_500M>; phys = <&usbphynop1>; fsl,usbmisc = <&usbmisc1 0>; - power-domains = <&pgc_otg1>; + power-domains = <&pgc_hsiomix>; status = "disabled"; }; @@ -1174,5 +1174,6 @@ assigned-clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; clock-names = "main_clk"; + power-domains = <&pgc_otg1>; }; }; From bb5ad73941dc3f4e3c2241348f385da6501d50ea Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Thu, 29 Sep 2022 12:52:22 -0700 Subject: [PATCH 062/328] ARM: dts: imx6qdl-gw59{10,13}: fix user pushbutton GPIO offset The GW5910 and GW5913 have a user pushbutton that is tied to the Gateworks System Controller GPIO offset 2. Fix the invalid offset of 0. Fixes: 64bf0a0af18d ("ARM: dts: imx6qdl-gw: add Gateworks System Controller support") Signed-off-by: Tim Harvey Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-gw5910.dtsi | 2 +- arch/arm/boot/dts/imx6qdl-gw5913.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-gw5910.dtsi b/arch/arm/boot/dts/imx6qdl-gw5910.dtsi index 68e5ab2e27e2..6bb4855d13ce 100644 --- a/arch/arm/boot/dts/imx6qdl-gw5910.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw5910.dtsi @@ -29,7 +29,7 @@ user-pb { label = "user_pb"; - gpios = <&gsc_gpio 0 GPIO_ACTIVE_LOW>; + gpios = <&gsc_gpio 2 GPIO_ACTIVE_LOW>; linux,code = ; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw5913.dtsi b/arch/arm/boot/dts/imx6qdl-gw5913.dtsi index 8e23cec7149e..696427b487f0 100644 --- a/arch/arm/boot/dts/imx6qdl-gw5913.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw5913.dtsi @@ -26,7 +26,7 @@ user-pb { label = "user_pb"; - gpios = <&gsc_gpio 0 GPIO_ACTIVE_LOW>; + gpios = <&gsc_gpio 2 GPIO_ACTIVE_LOW>; linux,code = ; }; From 5e67d47d0b010f0704aca469d6d27637b1dcb2ce Mon Sep 17 00:00:00 2001 From: Petr Benes Date: Tue, 4 Oct 2022 17:39:20 +0200 Subject: [PATCH 063/328] ARM: dts: imx6dl-yapp4: Do not allow PM to switch PU regulator off on Q/QP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix our design flaw in supply voltage distribution on the Quad and QuadPlus based boards. The problem is that we supply the SoC cache (VDD_CACHE_CAP) from VDD_PU instead of VDD_SOC. The VDD_PU internal regulator can be disabled by PM if VPU or GPU is not used. If that happens the system freezes. To prevent that configure the reg_pu regulator to be always on. Fixes: 0de4ab81ab26 ("ARM: dts: imx6dl-yapp4: Add Y Soft IOTA Crux/Crux+ board") Cc: petrben@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Petr Benes Signed-off-by: Michal Vokáč Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q-yapp4-crux.dts | 4 ++++ arch/arm/boot/dts/imx6qp-yapp4-crux-plus.dts | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/imx6q-yapp4-crux.dts b/arch/arm/boot/dts/imx6q-yapp4-crux.dts index 15f4824a5142..bddf3822ebf7 100644 --- a/arch/arm/boot/dts/imx6q-yapp4-crux.dts +++ b/arch/arm/boot/dts/imx6q-yapp4-crux.dts @@ -33,6 +33,10 @@ status = "okay"; }; +®_pu { + regulator-always-on; +}; + ®_usb_h1_vbus { status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qp-yapp4-crux-plus.dts b/arch/arm/boot/dts/imx6qp-yapp4-crux-plus.dts index cea165f2161a..afaf4a6759d4 100644 --- a/arch/arm/boot/dts/imx6qp-yapp4-crux-plus.dts +++ b/arch/arm/boot/dts/imx6qp-yapp4-crux-plus.dts @@ -33,6 +33,10 @@ status = "okay"; }; +®_pu { + regulator-always-on; +}; + ®_usb_h1_vbus { status = "okay"; }; From 06acb824d7d00a30e9400f67eee481b218371b5a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 10 Oct 2022 18:07:47 +0800 Subject: [PATCH 064/328] arm64: dts: imx8: correct clock order Per bindings/mmc/fsl-imx-esdhc.yaml, the clock order is ipg, ahb, per, otherwise warning: " mmc@5b020000: clock-names:1: 'ahb' was expected mmc@5b020000: clock-names:2: 'per' was expected " Fixes: 16c4ea7501b1 ("arm64: dts: imx8: switch to new lpcg clock binding") Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- .../arm64/boot/dts/freescale/imx8-ss-conn.dtsi | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi index 82a1c4488378..10370d1a6c6d 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi @@ -38,9 +38,9 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b010000 0x10000>; clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>, - <&sdhc0_lpcg IMX_LPCG_CLK_5>, - <&sdhc0_lpcg IMX_LPCG_CLK_0>; - clock-names = "ipg", "per", "ahb"; + <&sdhc0_lpcg IMX_LPCG_CLK_0>, + <&sdhc0_lpcg IMX_LPCG_CLK_5>; + clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_0>; status = "disabled"; }; @@ -49,9 +49,9 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b020000 0x10000>; clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>, - <&sdhc1_lpcg IMX_LPCG_CLK_5>, - <&sdhc1_lpcg IMX_LPCG_CLK_0>; - clock-names = "ipg", "per", "ahb"; + <&sdhc1_lpcg IMX_LPCG_CLK_0>, + <&sdhc1_lpcg IMX_LPCG_CLK_5>; + clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_1>; fsl,tuning-start-tap = <20>; fsl,tuning-step = <2>; @@ -62,9 +62,9 @@ conn_subsys: bus@5b000000 { interrupts = ; reg = <0x5b030000 0x10000>; clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>, - <&sdhc2_lpcg IMX_LPCG_CLK_5>, - <&sdhc2_lpcg IMX_LPCG_CLK_0>; - clock-names = "ipg", "per", "ahb"; + <&sdhc2_lpcg IMX_LPCG_CLK_0>, + <&sdhc2_lpcg IMX_LPCG_CLK_5>; + clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_2>; status = "disabled"; }; From ef370d8ceec62322dee24c960af8ca67a749f34d Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 10 Oct 2022 18:09:58 +0800 Subject: [PATCH 065/328] dt-bindings: power: gpcv2: add power-domains property Some pgc power-domain requires a parent power domain, so add an optional power-domains property, otherwise there will be dt check warning: gpc@303a0000: pgc:power-domain@1: 'power-domains' does not match any of the regexes: 'pinctrl-[0-9]+' Fixes: 30af8513bdb5 ("dt-bindings: power: add defines for i.MX8MM power domains") Signed-off-by: Peng Fan Acked-by: Krzysztof Kozlowski Signed-off-by: Shawn Guo --- Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml b/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml index 58022ae7d5dd..dfdb8dfb6b65 100644 --- a/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml +++ b/Documentation/devicetree/bindings/power/fsl,imx-gpcv2.yaml @@ -81,6 +81,9 @@ properties: power-supply: true + power-domains: + maxItems: 1 + resets: description: | A number of phandles to resets that need to be asserted during From 82ce591967517b733c6e6e6882b5096d239b3afe Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 20 Oct 2022 18:16:56 +0800 Subject: [PATCH 066/328] arm64: dts: imx93: correct s4mu interrupt names Per binding doc, interrupt names should be tx and rx. Fixes: 0dfb380d2492 ("arm64: dts: imx93: add s4 mu node") Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 3a5713bb4880..55a70ee6f91a 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -501,7 +501,7 @@ reg = <0x47520000 0x10000>; interrupts = , ; - interrupt-names = "txirq", "rxirq"; + interrupt-names = "tx", "rx"; #mbox-cells = <2>; }; From d92a110130d492bd5eab81827ce3730581dc933a Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 20 Oct 2022 18:16:57 +0800 Subject: [PATCH 067/328] arm64: dts: imx93: correct gpio-ranges Per imx93-pinfunc.h and pinctrl-imx93.c, correct gpio-ranges. Fixes: ec8b5b5058ea ("arm64: dts: freescale: Add i.MX93 dtsi support") Reported-by: David Wolfe Reviewed-by: Haibo Chen Reviewed-by: Jacky Bai Signed-off-by: Peng Fan Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 55a70ee6f91a..0247866fc86b 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -451,7 +451,7 @@ clocks = <&clk IMX93_CLK_GPIO2_GATE>, <&clk IMX93_CLK_GPIO2_GATE>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc 0 32 32>; + gpio-ranges = <&iomuxc 0 4 30>; }; gpio3: gpio@43820080 { @@ -465,7 +465,8 @@ clocks = <&clk IMX93_CLK_GPIO3_GATE>, <&clk IMX93_CLK_GPIO3_GATE>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc 0 64 32>; + gpio-ranges = <&iomuxc 0 84 8>, <&iomuxc 8 66 18>, + <&iomuxc 26 34 2>, <&iomuxc 28 0 4>; }; gpio4: gpio@43830080 { @@ -479,7 +480,7 @@ clocks = <&clk IMX93_CLK_GPIO4_GATE>, <&clk IMX93_CLK_GPIO4_GATE>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc 0 96 32>; + gpio-ranges = <&iomuxc 0 38 28>, <&iomuxc 28 36 2>; }; gpio1: gpio@47400080 { @@ -493,7 +494,7 @@ clocks = <&clk IMX93_CLK_GPIO1_GATE>, <&clk IMX93_CLK_GPIO1_GATE>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc 0 0 32>; + gpio-ranges = <&iomuxc 0 92 16>; }; s4muap: mailbox@47520000 { From 0ba7b623f15d52fa056eca26573d8cf1b9c29fd1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 22 Oct 2022 08:08:55 +0200 Subject: [PATCH 068/328] soc: imx: imx93-pd: Fix the error handling path of imx93_pd_probe() In imx93_pd_probe(); if an error occurs, some resources need to be released as done in the remove function. Fixes: 0a0f7cc25d4a ("soc: imx: add i.MX93 SRC power domain driver") Signed-off-by: Christophe JAILLET Signed-off-by: Shawn Guo --- drivers/soc/imx/imx93-pd.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/soc/imx/imx93-pd.c b/drivers/soc/imx/imx93-pd.c index 1f3d7039c1de..4d235c8c4924 100644 --- a/drivers/soc/imx/imx93-pd.c +++ b/drivers/soc/imx/imx93-pd.c @@ -135,11 +135,24 @@ static int imx93_pd_probe(struct platform_device *pdev) ret = pm_genpd_init(&domain->genpd, NULL, domain->init_off); if (ret) - return ret; + goto err_clk_unprepare; platform_set_drvdata(pdev, domain); - return of_genpd_add_provider_simple(np, &domain->genpd); + ret = of_genpd_add_provider_simple(np, &domain->genpd); + if (ret) + goto err_genpd_remove; + + return 0; + +err_genpd_remove: + pm_genpd_remove(&domain->genpd); + +err_clk_unprepare: + if (!domain->init_off) + clk_bulk_disable_unprepare(domain->num_clks, domain->clks); + + return ret; } static const struct of_device_id imx93_pd_ids[] = { From c126a0abc5dadd7df236f20aae6d8c3d103f095c Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 25 Oct 2022 17:41:15 +0300 Subject: [PATCH 069/328] arm64: dts: lx2160a: specify clock frequencies for the MDIO controllers Up until now, the external MDIO controller frequency values relied either on the default ones out of reset or on those setup by u-boot. Let's just properly specify the MDC frequency in the DTS so that even without u-boot's intervention Linux can drive the MDIO bus. Fixes: 6e1b8fae892d ("arm64: dts: lx2160a: add emdio1 node") Fixes: 5705b9dcda57 ("arm64: dts: lx2160a: add emdio2 node") Signed-off-by: Ioana Ciornei Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index 6680fb2a6dc9..8c76d86cb756 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -1385,6 +1385,9 @@ #address-cells = <1>; #size-cells = <0>; little-endian; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(2)>; status = "disabled"; }; @@ -1395,6 +1398,9 @@ little-endian; #address-cells = <1>; #size-cells = <0>; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(2)>; status = "disabled"; }; From d78a57426e64fc4c61e6189e450a0432d24536ca Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 25 Oct 2022 17:41:16 +0300 Subject: [PATCH 070/328] arm64: dts: ls1088a: specify clock frequencies for the MDIO controllers Up until now, the external MDIO controller frequency values relied either on the default ones out of reset or on those setup by u-boot. Let's just properly specify the MDC frequency in the DTS so that even without u-boot's intervention Linux can drive the MDIO bus. Fixes: bbe75af7b092 ("arm64: dts: ls1088a: add external MDIO device nodes") Signed-off-by: Ioana Ciornei Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index 421d879013d7..260d045dbd9a 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -779,6 +779,9 @@ little-endian; #address-cells = <1>; #size-cells = <0>; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(1)>; status = "disabled"; }; @@ -788,6 +791,9 @@ little-endian; #address-cells = <1>; #size-cells = <0>; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(1)>; status = "disabled"; }; From d5c921a53c80dfa942f6dff36253db5a50775a5f Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 25 Oct 2022 17:41:17 +0300 Subject: [PATCH 071/328] arm64: dts: ls208xa: specify clock frequencies for the MDIO controllers Up until now, the external MDIO controller frequency values relied either on the default ones out of reset or on those setup by u-boot. Let's just properly specify the MDC frequency in the DTS so that even without u-boot's intervention Linux can drive the MDIO bus. Fixes: 0420dde30a90 ("arm64: dts: ls208xa: add the external MDIO nodes") Signed-off-by: Ioana Ciornei Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index f1b9cc8714dc..348d9e3a9125 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -532,6 +532,9 @@ little-endian; #address-cells = <1>; #size-cells = <0>; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(2)>; status = "disabled"; }; @@ -541,6 +544,9 @@ little-endian; #address-cells = <1>; #size-cells = <0>; + clock-frequency = <2500000>; + clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL + QORIQ_CLK_PLL_DIV(2)>; status = "disabled"; }; From bfab00b94bd8569cdb84a6511d6615e6a8104e9c Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 26 Sep 2022 22:37:52 +0200 Subject: [PATCH 072/328] drm/rockchip: dw_hdmi: filter regulator -EPROBE_DEFER error messages When the avdd-0v9 or avdd-1v8 supply are not yet available, EPROBE_DEFER is returned by rockchip_hdmi_parse_dt(). This causes the following error message to be printed multiple times: dwhdmi-rockchip fe0a0000.hdmi: [drm:dw_hdmi_rockchip_bind [rockchipdrm]] *ERROR* Unable to parse OF data Fix that by not printing the message when rockchip_hdmi_parse_dt() returns -EPROBE_DEFER. Fixes: ca80c4eb4b01 ("drm/rockchip: dw_hdmi: add regulator support") Signed-off-by: Aurelien Jarno Reviewed-by: Dmitry Osipenko Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20220926203752.5430-1-aurelien@aurel32.net --- drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index c14f88893868..2f4b8f64cbad 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -565,7 +565,8 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, ret = rockchip_hdmi_parse_dt(hdmi); if (ret) { - DRM_DEV_ERROR(hdmi->dev, "Unable to parse OF data\n"); + if (ret != -EPROBE_DEFER) + DRM_DEV_ERROR(hdmi->dev, "Unable to parse OF data\n"); return ret; } From 0be67e0556e469c57100ffe3c90df90abc796f3b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 19 Oct 2022 17:03:48 -0700 Subject: [PATCH 073/328] drm/rockchip: dsi: Clean up 'usage_mode' when failing to attach If we fail to attach the first time (especially: EPROBE_DEFER), we fail to clean up 'usage_mode', and thus will fail to attach on any subsequent attempts, with "dsi controller already in use". Re-set to DW_DSI_USAGE_IDLE on attach failure. This is especially common to hit when enabling asynchronous probe on a duel-DSI system (such as RK3399 Gru/Scarlet), such that we're more likely to fail dw_mipi_dsi_rockchip_find_second() the first time. Fixes: 71f68fe7f121 ("drm/rockchip: dsi: add ability to work as a phy instead of full dsi") Cc: Signed-off-by: Brian Norris Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221019170255.1.Ia68dfb27b835d31d22bfe23812baf366ee1c6eac@changeid --- drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index bf6948125b84..d222c6811207 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -1051,23 +1051,31 @@ static int dw_mipi_dsi_rockchip_host_attach(void *priv_data, if (ret) { DRM_DEV_ERROR(dsi->dev, "Failed to register component: %d\n", ret); - return ret; + goto out; } second = dw_mipi_dsi_rockchip_find_second(dsi); - if (IS_ERR(second)) - return PTR_ERR(second); + if (IS_ERR(second)) { + ret = PTR_ERR(second); + goto out; + } if (second) { ret = component_add(second, &dw_mipi_dsi_rockchip_ops); if (ret) { DRM_DEV_ERROR(second, "Failed to register component: %d\n", ret); - return ret; + goto out; } } return 0; + +out: + mutex_lock(&dsi->usage_mutex); + dsi->usage_mode = DW_DSI_USAGE_IDLE; + mutex_unlock(&dsi->usage_mutex); + return ret; } static int dw_mipi_dsi_rockchip_host_detach(void *priv_data, From 81e592f86f7afdb76d655e7fbd7803d7b8f985d8 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 19 Oct 2022 17:03:49 -0700 Subject: [PATCH 074/328] drm/rockchip: dsi: Force synchronous probe We can't safely probe a dual-DSI display asynchronously (driver_async_probe='*' or driver_async_probe='dw-mipi-dsi-rockchip' cmdline), because dw_mipi_dsi_rockchip_find_second() pokes one DSI device's drvdata from the other device without any locking. Request synchronous probe, at least until this driver learns some appropriate locking for dual-DSI initialization. Cc: Signed-off-by: Brian Norris Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221019170255.2.I6b985b0ca372b7e35c6d9ea970b24bcb262d4fc1@changeid --- drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index d222c6811207..528ddce144e5 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -1689,5 +1689,11 @@ struct platform_driver dw_mipi_dsi_rockchip_driver = { .of_match_table = dw_mipi_dsi_rockchip_dt_ids, .pm = &dw_mipi_dsi_rockchip_pm_ops, .name = "dw-mipi-dsi-rockchip", + /* + * For dual-DSI display, one DSI pokes at the other DSI's + * drvdata in dw_mipi_dsi_rockchip_find_second(). This is not + * safe for asynchronous probe. + */ + .probe_type = PROBE_FORCE_SYNCHRONOUS, }, }; From ab78c74cfc5a3caa2bbb7627cb8f3bca40bb5fb0 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Thu, 20 Oct 2022 19:12:47 +0100 Subject: [PATCH 075/328] drm/rockchip: fix fbdev on non-IOMMU devices When switching to the generic fbdev infrastructure, it was missed that framebuffers were created with the alloc_kmap parameter to rockchip_gem_create_object() set to true. The generic infrastructure calls this via the .dumb_create() driver operation and thus creates a buffer without an associated kmap. alloc_kmap only makes a difference on devices without an IOMMU, but when it is missing rockchip_gem_prime_vmap() fails and the framebuffer cannot be used. Detect the case where a buffer is being allocated for the framebuffer and ensure a kernel mapping is created in this case. Fixes: 24af7c34b290 ("drm/rockchip: use generic fbdev setup") Reported-by: Johan Jonker Cc: Thomas Zimmermann Signed-off-by: John Keeping Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221020181248.2497065-1-john@metanate.com --- drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index 614e97aaac80..da8a69953706 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -364,9 +364,12 @@ rockchip_gem_create_with_handle(struct drm_file *file_priv, { struct rockchip_gem_object *rk_obj; struct drm_gem_object *obj; + bool is_framebuffer; int ret; - rk_obj = rockchip_gem_create_object(drm, size, false); + is_framebuffer = drm->fb_helper && file_priv == drm->fb_helper->client.file; + + rk_obj = rockchip_gem_create_object(drm, size, is_framebuffer); if (IS_ERR(rk_obj)) return ERR_CAST(rk_obj); From 553c5a429aee26c9cfaf37ae158a8915540270fe Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sun, 23 Oct 2022 18:07:47 +0200 Subject: [PATCH 076/328] drm/rockchip: dsi: Fix VOP selection on SoCs that support it lcdsel_grf_reg is defined as u32, so "< 0" comaprison is always false, which breaks VOP selection on eg. RK3399. Compare against 0. Fixes: f3aaa6125b6f ("drm/rockchip: dsi: add rk3568 support") Signed-off-by: Ondrej Jirman Tested-by: Chris Morgan Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221023160747.607943-1-megi@xff.cz --- drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index 528ddce144e5..f4df9820b295 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -752,7 +752,7 @@ static void dw_mipi_dsi_rockchip_config(struct dw_mipi_dsi_rockchip *dsi) static void dw_mipi_dsi_rockchip_set_lcdsel(struct dw_mipi_dsi_rockchip *dsi, int mux) { - if (dsi->cdata->lcdsel_grf_reg < 0) + if (dsi->cdata->lcdsel_grf_reg) regmap_write(dsi->grf_regmap, dsi->cdata->lcdsel_grf_reg, mux ? dsi->cdata->lcdsel_lit : dsi->cdata->lcdsel_big); } @@ -1643,7 +1643,6 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = { static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = { { .reg = 0xfe060000, - .lcdsel_grf_reg = -1, .lanecfg1_grf_reg = RK3568_GRF_VO_CON2, .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI0_SKEWCALHS | RK3568_DSI0_FORCETXSTOPMODE | @@ -1653,7 +1652,6 @@ static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = { }, { .reg = 0xfe070000, - .lcdsel_grf_reg = -1, .lanecfg1_grf_reg = RK3568_GRF_VO_CON3, .lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI1_SKEWCALHS | RK3568_DSI1_FORCETXSTOPMODE | From 471bf2406c043491b1a8288e5f04bc278f7d7ca1 Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Fri, 28 Oct 2022 11:52:05 +0200 Subject: [PATCH 077/328] drm/rockchip: vop2: fix null pointer in plane_atomic_disable If the vop2_plane_atomic_disable function is called with NULL as a state, accessing the old_pstate runs into a null pointer exception. However, the drm_atomic_helper_disable_planes_on_crtc function calls the atomic_disable callback with state NULL. Allow to disable a plane without passing a plane state by checking the old_pstate only if a state is passed. Signed-off-by: Michael Tretter Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221028095206.2136601-2-m.tretter@pengutronix.de --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index aac20be5ac08..26f8a8489ded 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -996,13 +996,15 @@ static int vop2_plane_atomic_check(struct drm_plane *plane, static void vop2_plane_atomic_disable(struct drm_plane *plane, struct drm_atomic_state *state) { - struct drm_plane_state *old_pstate = drm_atomic_get_old_plane_state(state, plane); + struct drm_plane_state *old_pstate = NULL; struct vop2_win *win = to_vop2_win(plane); struct vop2 *vop2 = win->vop2; drm_dbg(vop2->drm, "%s disable\n", win->data->name); - if (!old_pstate->crtc) + if (state) + old_pstate = drm_atomic_get_old_plane_state(state, plane); + if (old_pstate && !old_pstate->crtc) return; vop2_win_disable(win); From 447fb14bf07905b880c9ed1ea92c53d6dd0649d7 Mon Sep 17 00:00:00 2001 From: Michael Tretter Date: Fri, 28 Oct 2022 11:52:06 +0200 Subject: [PATCH 078/328] drm/rockchip: vop2: disable planes when disabling the crtc The vop2 driver needs to explicitly disable the planes if the crtc is disabled. Unless the planes are explicitly disabled, the address of the last framebuffer is kept in the registers of the VOP2. When re-enabling the encoder after it has been disabled by the driver, the VOP2 will start and read the framebuffer that has been freed but is still pointed to by the register. The iommu will catch these read accesses and print errors. Explicitly disable the planes when the crtc is disabled to reset the registers. Signed-off-by: Michael Tretter Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/20221028095206.2136601-3-m.tretter@pengutronix.de --- drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 26f8a8489ded..105a548d0abe 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -877,10 +877,14 @@ static void vop2_crtc_atomic_disable(struct drm_crtc *crtc, { struct vop2_video_port *vp = to_vop2_video_port(crtc); struct vop2 *vop2 = vp->vop2; + struct drm_crtc_state *old_crtc_state; int ret; vop2_lock(vop2); + old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc); + drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, false); + drm_crtc_vblank_off(crtc); /* From 88e1f16ba58665e9edfce437ea487da2fa759af9 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 1 Sep 2022 03:10:07 +0000 Subject: [PATCH 079/328] ksefltests: pidfd: Fix wait_states: Test terminated by timeout 0Day/LKP observed that the kselftest blocks forever since one of the pidfd_wait doesn't terminate in 1 of 30 runs. After digging into the source, we found that it blocks at: ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); wait_states has below testing flow: CHILD PARENT ---------------+-------------- 1 STOP itself 2 WAIT for CHILD STOPPED 3 SIGNAL CHILD to CONT 4 CONT 5 STOP itself 5' WAIT for CHILD CONT 6 WAIT for CHILD STOPPED The problem is that the kernel cannot ensure the order of 5 and 5', once 5 goes first, the test will fail. we can reproduce it by: $ while true; do make run_tests -C pidfd; done Introduce a blocking read in child process to make sure the parent can check its WCONTINUED. CC: Philip Li Reported-by: kernel test robot Signed-off-by: Li Zhijian Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_wait.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 070c1c876df1..c3e2a3041f55 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -95,20 +95,28 @@ TEST(wait_states) .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, }; + int pfd[2]; pid_t pid; siginfo_t info = { .si_signo = 0, }; + ASSERT_EQ(pipe(pfd), 0); pid = sys_clone3(&args); ASSERT_GE(pid, 0); if (pid == 0) { + char buf[2]; + + close(pfd[1]); kill(getpid(), SIGSTOP); + ASSERT_EQ(read(pfd[0], buf, 1), 1); + close(pfd[0]); kill(getpid(), SIGSTOP); exit(EXIT_SUCCESS); } + close(pfd[0]); ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); @@ -117,6 +125,8 @@ TEST(wait_states) ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(write(pfd[1], "C", 1), 1); + close(pfd[1]); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_CONTINUED); ASSERT_EQ(info.si_pid, parent_tid); From 3d982441308ebdf713771c8a85c23d9b8b66b4d4 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 1 Sep 2022 03:17:34 +0000 Subject: [PATCH 080/328] selftests: pidfd: Fix compling warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix warnings and enable Wall. pidfd_wait.c: In function ‘wait_nonblock’: pidfd_wait.c:150:13: warning: unused variable ‘status’ [-Wunused-variable] 150 | int pidfd, status = 0; | ^~~~~~ ... pidfd_test.c: In function ‘child_poll_exec_test’: pidfd_test.c:438:1: warning: no return statement in function returning non-void [-Wreturn-type] 438 | } | ^ Signed-off-by: Li Zhijian v2: fix mistake assignment to pidfd Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/Makefile | 2 +- tools/testing/selftests/pidfd/pidfd_test.c | 2 ++ tools/testing/selftests/pidfd/pidfd_wait.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index f4a2f28f926b..778b6cdc8aed 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ -pthread +CFLAGS += -g -I../../../../usr/include/ -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index 9a2d64901d59..d36654265b7a 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -435,6 +435,8 @@ static int child_poll_exec_test(void *args) */ while (1) sleep(1); + + return 0; } static void test_pidfd_poll_exec(int use_waitpid) diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index c3e2a3041f55..0dcb8365ddc3 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -148,7 +148,7 @@ TEST(wait_states) TEST(wait_nonblock) { - int pidfd, status = 0; + int pidfd; unsigned int flags = 0; pid_t parent_tid = -1; struct clone_args args = { From 6fdaed8c79887680bc46cb0a51775bd7c8645528 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Thu, 27 Oct 2022 22:57:11 +0900 Subject: [PATCH 081/328] drm/format-helper: Only advertise supported formats for conversion drm_fb_build_fourcc_list() currently returns all emulated formats unconditionally as long as the native format is among them, even though not all combinations have conversion helpers. Although the list is arguably provided to userspace in precedence order, userspace can pick something out-of-order (and thus break when it shouldn't), or simply only support a format that is unsupported (and thus think it can work, which results in the appearance of a hang as FB blits fail later on, instead of the initialization error you'd expect in this case). Add checks to filter the list of emulated formats to only those supported for conversion to the native format. This presumes that there is a single native format (only the first is checked, if there are multiple). Refactoring this API to drop the native list or support it properly (by returning the appropriate emulated->native mapping table) is left for a future patch. The simpledrm driver is left as-is with a full table of emulated formats. This keeps all currently working conversions available and drops all the broken ones (i.e. this a strict bugfix patch, adding no new supported formats nor removing any actually working ones). In order to avoid proliferation of emulated formats, future drivers should advertise only XRGB8888 as the sole emulated format (since some userspace assumes its presence). This fixes a real user regression where the ?RGB2101010 support commit started advertising it unconditionally where not supported, and KWin decided to start to use it over the native format and broke, but also the fixes the spurious RGB565/RGB888 formats which have been wrongly unconditionally advertised since the dawn of simpledrm. Fixes: 6ea966fca084 ("drm/simpledrm: Add [AX]RGB2101010 formats") Fixes: 11e8f5fd223b ("drm: Add simpledrm driver") Cc: stable@vger.kernel.org Signed-off-by: Hector Martin Acked-by: Pekka Paalanen Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20221027135711.24425-1-marcan@marcan.st --- drivers/gpu/drm/drm_format_helper.c | 66 ++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c index e2f76621453c..3ee59bae9d2f 100644 --- a/drivers/gpu/drm/drm_format_helper.c +++ b/drivers/gpu/drm/drm_format_helper.c @@ -807,6 +807,38 @@ static bool is_listed_fourcc(const uint32_t *fourccs, size_t nfourccs, uint32_t return false; } +static const uint32_t conv_from_xrgb8888[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB888, +}; + +static const uint32_t conv_from_rgb565_888[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, +}; + +static bool is_conversion_supported(uint32_t from, uint32_t to) +{ + switch (from) { + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + return is_listed_fourcc(conv_from_xrgb8888, ARRAY_SIZE(conv_from_xrgb8888), to); + case DRM_FORMAT_RGB565: + case DRM_FORMAT_RGB888: + return is_listed_fourcc(conv_from_rgb565_888, ARRAY_SIZE(conv_from_rgb565_888), to); + case DRM_FORMAT_XRGB2101010: + return to == DRM_FORMAT_ARGB2101010; + case DRM_FORMAT_ARGB2101010: + return to == DRM_FORMAT_XRGB2101010; + default: + return false; + } +} + /** * drm_fb_build_fourcc_list - Filters a list of supported color formats against * the device's native formats @@ -827,7 +859,9 @@ static bool is_listed_fourcc(const uint32_t *fourccs, size_t nfourccs, uint32_t * be handed over to drm_universal_plane_init() et al. Native formats * will go before emulated formats. Other heuristics might be applied * to optimize the order. Formats near the beginning of the list are - * usually preferred over formats near the end of the list. + * usually preferred over formats near the end of the list. Formats + * without conversion helpers will be skipped. New drivers should only + * pass in XRGB8888 and avoid exposing additional emulated formats. * * Returns: * The number of color-formats 4CC codes returned in @fourccs_out. @@ -839,7 +873,7 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, { u32 *fourccs = fourccs_out; const u32 *fourccs_end = fourccs_out + nfourccs_out; - bool found_native = false; + uint32_t native_format = 0; size_t i; /* @@ -858,26 +892,18 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, drm_dbg_kms(dev, "adding native format %p4cc\n", &fourcc); - if (!found_native) - found_native = is_listed_fourcc(driver_fourccs, driver_nfourccs, fourcc); + /* + * There should only be one native format with the current API. + * This API needs to be refactored to correctly support arbitrary + * sets of native formats, since it needs to report which native + * format to use for each emulated format. + */ + if (!native_format) + native_format = fourcc; *fourccs = fourcc; ++fourccs; } - /* - * The plane's atomic_update helper converts the framebuffer's color format - * to a native format when copying to device memory. - * - * If there is not a single format supported by both, device and - * driver, the native formats are likely not supported by the conversion - * helpers. Therefore *only* support the native formats and add a - * conversion helper ASAP. - */ - if (!found_native) { - drm_warn(dev, "Format conversion helpers required to add extra formats.\n"); - goto out; - } - /* * The extra formats, emulated by the driver, go second. */ @@ -890,6 +916,9 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, } else if (fourccs == fourccs_end) { drm_warn(dev, "Ignoring emulated format %p4cc\n", &fourcc); continue; /* end of available output buffer */ + } else if (!is_conversion_supported(fourcc, native_format)) { + drm_dbg_kms(dev, "Unsupported emulated format %p4cc\n", &fourcc); + continue; /* format is not supported for conversion */ } drm_dbg_kms(dev, "adding emulated format %p4cc\n", &fourcc); @@ -898,7 +927,6 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev, ++fourccs; } -out: return fourccs - fourccs_out; } EXPORT_SYMBOL(drm_fb_build_fourcc_list); From 171a93182eccd6e6835d2c86b40787f9f832efaa Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 29 Oct 2022 00:07:06 +0300 Subject: [PATCH 082/328] ata: pata_legacy: fix pdc20230_set_piomode() Clang gives a warning when compiling pata_legacy.c with 'make W=1' about the 'rt' local variable in pdc20230_set_piomode() being set but unused. Quite obviously, there is an outb() call missing to write back the updated variable. Moreover, checking the docs by Petr Soucek revealed that bitwise AND should have been done with a negated timing mask and the master/slave timing masks were swapped while updating... Fixes: 669a5db411d8 ("[libata] Add a bunch of PATA drivers.") Reported-by: Damien Le Moal Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_legacy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index 0a8bf09a5c19..03c580625c2c 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -315,9 +315,10 @@ static void pdc20230_set_piomode(struct ata_port *ap, struct ata_device *adev) outb(inb(0x1F4) & 0x07, 0x1F4); rt = inb(0x1F3); - rt &= 0x07 << (3 * adev->devno); + rt &= ~(0x07 << (3 * !adev->devno)); if (pio) - rt |= (1 + 3 * pio) << (3 * adev->devno); + rt |= (1 + 3 * pio) << (3 * !adev->devno); + outb(rt, 0x1F3); udelay(100); outb(inb(0x1F2) | 0x01, 0x1F2); From 015618c3ec19584c83ff179fa631be8cec906aaf Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 29 Oct 2022 15:49:31 +0800 Subject: [PATCH 083/328] ata: palmld: fix return value check in palmld_pata_probe() If devm_platform_ioremap_resource() fails, it never return NULL pointer, replace the check with IS_ERR(). Fixes: 57bf0f5a162d ("ARM: pxa: use pdev resource for palmld mmio") Signed-off-by: Yang Yingliang Reviewed-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_palmld.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_palmld.c b/drivers/ata/pata_palmld.c index 400e65190904..51caa2a427dd 100644 --- a/drivers/ata/pata_palmld.c +++ b/drivers/ata/pata_palmld.c @@ -63,8 +63,8 @@ static int palmld_pata_probe(struct platform_device *pdev) /* remap drive's physical memory address */ mem = devm_platform_ioremap_resource(pdev, 0); - if (!mem) - return -ENOMEM; + if (IS_ERR(mem)) + return PTR_ERR(mem); /* request and activate power and reset GPIOs */ lda->power = devm_gpiod_get(dev, "power", GPIOD_OUT_HIGH); From 986d93f55bdeab1cac858d1e47b41fac10b2d7f6 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 10:10:21 +0800 Subject: [PATCH 084/328] audit: fix undefined behavior in bit shift for AUDIT_BIT Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in kernel/auditfilter.c:179:23 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c audit_register_class+0x9d/0x137 audit_classes_init+0x4d/0xb8 do_one_initcall+0x76/0x430 kernel_init_freeable+0x3b3/0x422 kernel_init+0x24/0x1e0 ret_from_fork+0x1f/0x30 Signed-off-by: Gaosheng Cui [PM: remove bad 'Fixes' tag as issue predates git, added in v2.6.6-rc1] Signed-off-by: Paul Moore --- include/uapi/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 7c1dc818b1d5..d676ed2b246e 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -187,7 +187,7 @@ #define AUDIT_MAX_KEY_LEN 256 #define AUDIT_BITMASK_SIZE 64 #define AUDIT_WORD(nr) ((__u32)((nr)/32)) -#define AUDIT_BIT(nr) (1 << ((nr) - AUDIT_WORD(nr)*32)) +#define AUDIT_BIT(nr) (1U << ((nr) - AUDIT_WORD(nr)*32)) #define AUDIT_SYSCALL_CLASSES 16 #define AUDIT_CLASS_DIR_WRITE 0 From d7164a5048e8a6afe2cc4aaf7f12643c14e7f241 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 25 Oct 2022 14:44:55 +0300 Subject: [PATCH 085/328] drm/i915/tgl+: Add locking around DKL PHY register accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accessing the TypeC DKL PHY registers during modeset-commit, -verification, DP link-retraining and AUX power well toggling is racy due to these code paths being concurrent and the PHY register bank selection register (HIP_INDEX_REG) being shared between PHY instances (aka TC ports) and the bank selection being not atomic wrt. the actual PHY register access. Add the required locking around each PHY register bank selection-> register access sequence. Kudos to Ville for noticing the race conditions. v2: - Add the DKL PHY register accessors to intel_dkl_phy.[ch]. (Jani) - Make the DKL_REG_TC_PORT macro independent of PHY internals. - Move initing the DKL PHY lock to a more logical place. v3: - Fix parameter reuse in the DKL_REG_TC_PORT definition. - Document the usage of phy_lock. v4: - Fix adding TC_PORT_1 offset in the DKL_REG_TC_PORT definition. Cc: Ville Syrjälä Cc: Jani Nikula Cc: # v5.5+ Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20221025114457.2191004-1-imre.deak@intel.com (cherry picked from commit 89cb0ba4ceee6bed1059904859c5723b3f39da68) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/intel_ddi.c | 68 +++++------ .../gpu/drm/i915/display/intel_display_core.h | 8 ++ .../i915/display/intel_display_power_well.c | 7 +- drivers/gpu/drm/i915/display/intel_dkl_phy.c | 109 ++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dkl_phy.h | 24 ++++ drivers/gpu/drm/i915/display/intel_dpll_mgr.c | 59 +++++----- drivers/gpu/drm/i915/i915_driver.c | 1 + drivers/gpu/drm/i915/i915_reg.h | 3 + 9 files changed, 204 insertions(+), 76 deletions(-) create mode 100644 drivers/gpu/drm/i915/display/intel_dkl_phy.c create mode 100644 drivers/gpu/drm/i915/display/intel_dkl_phy.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index a26edcdadc21..cea00aaca04b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -282,6 +282,7 @@ i915-y += \ display/intel_ddi.o \ display/intel_ddi_buf_trans.o \ display/intel_display_trace.o \ + display/intel_dkl_phy.o \ display/intel_dp.o \ display/intel_dp_aux.o \ display/intel_dp_aux_backlight.o \ diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index da8472cdc135..69ecf2a3d6c6 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -43,6 +43,7 @@ #include "intel_de.h" #include "intel_display_power.h" #include "intel_display_types.h" +#include "intel_dkl_phy.h" #include "intel_dp.h" #include "intel_dp_link_training.h" #include "intel_dp_mst.h" @@ -1262,33 +1263,30 @@ static void tgl_dkl_phy_set_signal_levels(struct intel_encoder *encoder, for (ln = 0; ln < 2; ln++) { int level; - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, ln)); - - intel_de_write(dev_priv, DKL_TX_PMD_LANE_SUS(tc_port), 0); + intel_dkl_phy_write(dev_priv, DKL_TX_PMD_LANE_SUS(tc_port), ln, 0); level = intel_ddi_level(encoder, crtc_state, 2*ln+0); - intel_de_rmw(dev_priv, DKL_TX_DPCNTL0(tc_port), - DKL_TX_PRESHOOT_COEFF_MASK | - DKL_TX_DE_EMPAHSIS_COEFF_MASK | - DKL_TX_VSWING_CONTROL_MASK, - DKL_TX_PRESHOOT_COEFF(trans->entries[level].dkl.preshoot) | - DKL_TX_DE_EMPHASIS_COEFF(trans->entries[level].dkl.de_emphasis) | - DKL_TX_VSWING_CONTROL(trans->entries[level].dkl.vswing)); + intel_dkl_phy_rmw(dev_priv, DKL_TX_DPCNTL0(tc_port), ln, + DKL_TX_PRESHOOT_COEFF_MASK | + DKL_TX_DE_EMPAHSIS_COEFF_MASK | + DKL_TX_VSWING_CONTROL_MASK, + DKL_TX_PRESHOOT_COEFF(trans->entries[level].dkl.preshoot) | + DKL_TX_DE_EMPHASIS_COEFF(trans->entries[level].dkl.de_emphasis) | + DKL_TX_VSWING_CONTROL(trans->entries[level].dkl.vswing)); level = intel_ddi_level(encoder, crtc_state, 2*ln+1); - intel_de_rmw(dev_priv, DKL_TX_DPCNTL1(tc_port), - DKL_TX_PRESHOOT_COEFF_MASK | - DKL_TX_DE_EMPAHSIS_COEFF_MASK | - DKL_TX_VSWING_CONTROL_MASK, - DKL_TX_PRESHOOT_COEFF(trans->entries[level].dkl.preshoot) | - DKL_TX_DE_EMPHASIS_COEFF(trans->entries[level].dkl.de_emphasis) | - DKL_TX_VSWING_CONTROL(trans->entries[level].dkl.vswing)); + intel_dkl_phy_rmw(dev_priv, DKL_TX_DPCNTL1(tc_port), ln, + DKL_TX_PRESHOOT_COEFF_MASK | + DKL_TX_DE_EMPAHSIS_COEFF_MASK | + DKL_TX_VSWING_CONTROL_MASK, + DKL_TX_PRESHOOT_COEFF(trans->entries[level].dkl.preshoot) | + DKL_TX_DE_EMPHASIS_COEFF(trans->entries[level].dkl.de_emphasis) | + DKL_TX_VSWING_CONTROL(trans->entries[level].dkl.vswing)); - intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port), - DKL_TX_DP20BITMODE, 0); + intel_dkl_phy_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port), ln, + DKL_TX_DP20BITMODE, 0); if (IS_ALDERLAKE_P(dev_priv)) { u32 val; @@ -1306,10 +1304,10 @@ static void tgl_dkl_phy_set_signal_levels(struct intel_encoder *encoder, val |= DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2(0); } - intel_de_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port), - DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK | - DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK, - val); + intel_dkl_phy_rmw(dev_priv, DKL_TX_DPCNTL2(tc_port), ln, + DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX1_MASK | + DKL_TX_DPCNTL2_CFG_LOADGENSELECT_TX2_MASK, + val); } } } @@ -2019,12 +2017,8 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, return; if (DISPLAY_VER(dev_priv) >= 12) { - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x0)); - ln0 = intel_de_read(dev_priv, DKL_DP_MODE(tc_port)); - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x1)); - ln1 = intel_de_read(dev_priv, DKL_DP_MODE(tc_port)); + ln0 = intel_dkl_phy_read(dev_priv, DKL_DP_MODE(tc_port), 0); + ln1 = intel_dkl_phy_read(dev_priv, DKL_DP_MODE(tc_port), 1); } else { ln0 = intel_de_read(dev_priv, MG_DP_MODE(0, tc_port)); ln1 = intel_de_read(dev_priv, MG_DP_MODE(1, tc_port)); @@ -2085,12 +2079,8 @@ icl_program_mg_dp_mode(struct intel_digital_port *dig_port, } if (DISPLAY_VER(dev_priv) >= 12) { - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x0)); - intel_de_write(dev_priv, DKL_DP_MODE(tc_port), ln0); - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x1)); - intel_de_write(dev_priv, DKL_DP_MODE(tc_port), ln1); + intel_dkl_phy_write(dev_priv, DKL_DP_MODE(tc_port), 0, ln0); + intel_dkl_phy_write(dev_priv, DKL_DP_MODE(tc_port), 1, ln1); } else { intel_de_write(dev_priv, MG_DP_MODE(0, tc_port), ln0); intel_de_write(dev_priv, MG_DP_MODE(1, tc_port), ln1); @@ -3094,10 +3084,8 @@ static void adlp_tbt_to_dp_alt_switch_wa(struct intel_encoder *encoder) enum tc_port tc_port = intel_port_to_tc(i915, encoder->port); int ln; - for (ln = 0; ln < 2; ln++) { - intel_de_write(i915, HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln)); - intel_de_rmw(i915, DKL_PCS_DW5(tc_port), DKL_PCS_DW5_CORE_SOFTRESET, 0); - } + for (ln = 0; ln < 2; ln++) + intel_dkl_phy_rmw(i915, DKL_PCS_DW5(tc_port), ln, DKL_PCS_DW5_CORE_SOFTRESET, 0); } static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h index 96cf994b0ad1..9b51148e8ba5 100644 --- a/drivers/gpu/drm/i915/display/intel_display_core.h +++ b/drivers/gpu/drm/i915/display/intel_display_core.h @@ -315,6 +315,14 @@ struct intel_display { struct intel_global_obj obj; } dbuf; + struct { + /* + * dkl.phy_lock protects against concurrent access of the + * Dekel TypeC PHYs. + */ + spinlock_t phy_lock; + } dkl; + struct { /* VLV/CHV/BXT/GLK DSI MMIO register base address */ u32 mmio_base; diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.c b/drivers/gpu/drm/i915/display/intel_display_power_well.c index df7ee4969ef1..1d18eee56253 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power_well.c +++ b/drivers/gpu/drm/i915/display/intel_display_power_well.c @@ -12,6 +12,7 @@ #include "intel_de.h" #include "intel_display_power_well.h" #include "intel_display_types.h" +#include "intel_dkl_phy.h" #include "intel_dmc.h" #include "intel_dpio_phy.h" #include "intel_dpll.h" @@ -529,11 +530,9 @@ icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv, enum tc_port tc_port; tc_port = TGL_AUX_PW_TO_TC_PORT(i915_power_well_instance(power_well)->hsw.idx); - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x2)); - if (intel_de_wait_for_set(dev_priv, DKL_CMN_UC_DW_27(tc_port), - DKL_CMN_UC_DW27_UC_HEALTH, 1)) + if (wait_for(intel_dkl_phy_read(dev_priv, DKL_CMN_UC_DW_27(tc_port), 2) & + DKL_CMN_UC_DW27_UC_HEALTH, 1)) drm_warn(&dev_priv->drm, "Timeout waiting TC uC health\n"); } diff --git a/drivers/gpu/drm/i915/display/intel_dkl_phy.c b/drivers/gpu/drm/i915/display/intel_dkl_phy.c new file mode 100644 index 000000000000..710b030c7ed5 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dkl_phy.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_reg.h" + +#include "intel_de.h" +#include "intel_display.h" +#include "intel_dkl_phy.h" + +static void +dkl_phy_set_hip_idx(struct drm_i915_private *i915, i915_reg_t reg, int idx) +{ + enum tc_port tc_port = DKL_REG_TC_PORT(reg); + + drm_WARN_ON(&i915->drm, tc_port < TC_PORT_1 || tc_port >= I915_MAX_TC_PORTS); + + intel_de_write(i915, + HIP_INDEX_REG(tc_port), + HIP_INDEX_VAL(tc_port, idx)); +} + +/** + * intel_dkl_phy_read - read a Dekel PHY register + * @i915: i915 device instance + * @reg: Dekel PHY register + * @ln: lane instance of @reg + * + * Read the @reg Dekel PHY register. + * + * Returns the read value. + */ +u32 +intel_dkl_phy_read(struct drm_i915_private *i915, i915_reg_t reg, int ln) +{ + u32 val; + + spin_lock(&i915->display.dkl.phy_lock); + + dkl_phy_set_hip_idx(i915, reg, ln); + val = intel_de_read(i915, reg); + + spin_unlock(&i915->display.dkl.phy_lock); + + return val; +} + +/** + * intel_dkl_phy_write - write a Dekel PHY register + * @i915: i915 device instance + * @reg: Dekel PHY register + * @ln: lane instance of @reg + * @val: value to write + * + * Write @val to the @reg Dekel PHY register. + */ +void +intel_dkl_phy_write(struct drm_i915_private *i915, i915_reg_t reg, int ln, u32 val) +{ + spin_lock(&i915->display.dkl.phy_lock); + + dkl_phy_set_hip_idx(i915, reg, ln); + intel_de_write(i915, reg, val); + + spin_unlock(&i915->display.dkl.phy_lock); +} + +/** + * intel_dkl_phy_rmw - read-modify-write a Dekel PHY register + * @i915: i915 device instance + * @reg: Dekel PHY register + * @ln: lane instance of @reg + * @clear: mask to clear + * @set: mask to set + * + * Read the @reg Dekel PHY register, clearing then setting the @clear/@set bits in it, and writing + * this value back to the register if the value differs from the read one. + */ +void +intel_dkl_phy_rmw(struct drm_i915_private *i915, i915_reg_t reg, int ln, u32 clear, u32 set) +{ + spin_lock(&i915->display.dkl.phy_lock); + + dkl_phy_set_hip_idx(i915, reg, ln); + intel_de_rmw(i915, reg, clear, set); + + spin_unlock(&i915->display.dkl.phy_lock); +} + +/** + * intel_dkl_phy_posting_read - do a posting read from a Dekel PHY register + * @i915: i915 device instance + * @reg: Dekel PHY register + * @ln: lane instance of @reg + * + * Read the @reg Dekel PHY register without returning the read value. + */ +void +intel_dkl_phy_posting_read(struct drm_i915_private *i915, i915_reg_t reg, int ln) +{ + spin_lock(&i915->display.dkl.phy_lock); + + dkl_phy_set_hip_idx(i915, reg, ln); + intel_de_posting_read(i915, reg); + + spin_unlock(&i915->display.dkl.phy_lock); +} diff --git a/drivers/gpu/drm/i915/display/intel_dkl_phy.h b/drivers/gpu/drm/i915/display/intel_dkl_phy.h new file mode 100644 index 000000000000..260ad121a0b1 --- /dev/null +++ b/drivers/gpu/drm/i915/display/intel_dkl_phy.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2022 Intel Corporation + */ + +#ifndef __INTEL_DKL_PHY_H__ +#define __INTEL_DKL_PHY_H__ + +#include + +#include "i915_reg_defs.h" + +struct drm_i915_private; + +u32 +intel_dkl_phy_read(struct drm_i915_private *i915, i915_reg_t reg, int ln); +void +intel_dkl_phy_write(struct drm_i915_private *i915, i915_reg_t reg, int ln, u32 val); +void +intel_dkl_phy_rmw(struct drm_i915_private *i915, i915_reg_t reg, int ln, u32 clear, u32 set); +void +intel_dkl_phy_posting_read(struct drm_i915_private *i915, i915_reg_t reg, int ln); + +#endif /* __INTEL_DKL_PHY_H__ */ diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index e5fb66a5dd02..64dd603dc69a 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -25,6 +25,7 @@ #include "intel_de.h" #include "intel_display_types.h" +#include "intel_dkl_phy.h" #include "intel_dpio_phy.h" #include "intel_dpll.h" #include "intel_dpll_mgr.h" @@ -3508,15 +3509,12 @@ static bool dkl_pll_get_hw_state(struct drm_i915_private *dev_priv, * All registers read here have the same HIP_INDEX_REG even though * they are on different building blocks */ - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x2)); - - hw_state->mg_refclkin_ctl = intel_de_read(dev_priv, - DKL_REFCLKIN_CTL(tc_port)); + hw_state->mg_refclkin_ctl = intel_dkl_phy_read(dev_priv, + DKL_REFCLKIN_CTL(tc_port), 2); hw_state->mg_refclkin_ctl &= MG_REFCLKIN_CTL_OD_2_MUX_MASK; hw_state->mg_clktop2_hsclkctl = - intel_de_read(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port)); + intel_dkl_phy_read(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port), 2); hw_state->mg_clktop2_hsclkctl &= MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | @@ -3524,32 +3522,32 @@ static bool dkl_pll_get_hw_state(struct drm_i915_private *dev_priv, MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK; hw_state->mg_clktop2_coreclkctl1 = - intel_de_read(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port)); + intel_dkl_phy_read(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port), 2); hw_state->mg_clktop2_coreclkctl1 &= MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; - hw_state->mg_pll_div0 = intel_de_read(dev_priv, DKL_PLL_DIV0(tc_port)); + hw_state->mg_pll_div0 = intel_dkl_phy_read(dev_priv, DKL_PLL_DIV0(tc_port), 2); val = DKL_PLL_DIV0_MASK; if (dev_priv->display.vbt.override_afc_startup) val |= DKL_PLL_DIV0_AFC_STARTUP_MASK; hw_state->mg_pll_div0 &= val; - hw_state->mg_pll_div1 = intel_de_read(dev_priv, DKL_PLL_DIV1(tc_port)); + hw_state->mg_pll_div1 = intel_dkl_phy_read(dev_priv, DKL_PLL_DIV1(tc_port), 2); hw_state->mg_pll_div1 &= (DKL_PLL_DIV1_IREF_TRIM_MASK | DKL_PLL_DIV1_TDC_TARGET_CNT_MASK); - hw_state->mg_pll_ssc = intel_de_read(dev_priv, DKL_PLL_SSC(tc_port)); + hw_state->mg_pll_ssc = intel_dkl_phy_read(dev_priv, DKL_PLL_SSC(tc_port), 2); hw_state->mg_pll_ssc &= (DKL_PLL_SSC_IREF_NDIV_RATIO_MASK | DKL_PLL_SSC_STEP_LEN_MASK | DKL_PLL_SSC_STEP_NUM_MASK | DKL_PLL_SSC_EN); - hw_state->mg_pll_bias = intel_de_read(dev_priv, DKL_PLL_BIAS(tc_port)); + hw_state->mg_pll_bias = intel_dkl_phy_read(dev_priv, DKL_PLL_BIAS(tc_port), 2); hw_state->mg_pll_bias &= (DKL_PLL_BIAS_FRAC_EN_H | DKL_PLL_BIAS_FBDIV_FRAC_MASK); hw_state->mg_pll_tdc_coldst_bias = - intel_de_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port)); + intel_dkl_phy_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port), 2); hw_state->mg_pll_tdc_coldst_bias &= (DKL_PLL_TDC_SSC_STEP_SIZE_MASK | DKL_PLL_TDC_FEED_FWD_GAIN_MASK); @@ -3737,61 +3735,58 @@ static void dkl_pll_write(struct drm_i915_private *dev_priv, * All registers programmed here have the same HIP_INDEX_REG even * though on different building block */ - intel_de_write(dev_priv, HIP_INDEX_REG(tc_port), - HIP_INDEX_VAL(tc_port, 0x2)); - /* All the registers are RMW */ - val = intel_de_read(dev_priv, DKL_REFCLKIN_CTL(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_REFCLKIN_CTL(tc_port), 2); val &= ~MG_REFCLKIN_CTL_OD_2_MUX_MASK; val |= hw_state->mg_refclkin_ctl; - intel_de_write(dev_priv, DKL_REFCLKIN_CTL(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_REFCLKIN_CTL(tc_port), 2, val); - val = intel_de_read(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port), 2); val &= ~MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK; val |= hw_state->mg_clktop2_coreclkctl1; - intel_de_write(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_CLKTOP2_CORECLKCTL1(tc_port), 2, val); - val = intel_de_read(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port), 2); val &= ~(MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL_MASK | MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL_MASK | MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO_MASK | MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO_MASK); val |= hw_state->mg_clktop2_hsclkctl; - intel_de_write(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port), 2, val); val = DKL_PLL_DIV0_MASK; if (dev_priv->display.vbt.override_afc_startup) val |= DKL_PLL_DIV0_AFC_STARTUP_MASK; - intel_de_rmw(dev_priv, DKL_PLL_DIV0(tc_port), val, - hw_state->mg_pll_div0); + intel_dkl_phy_rmw(dev_priv, DKL_PLL_DIV0(tc_port), 2, val, + hw_state->mg_pll_div0); - val = intel_de_read(dev_priv, DKL_PLL_DIV1(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_PLL_DIV1(tc_port), 2); val &= ~(DKL_PLL_DIV1_IREF_TRIM_MASK | DKL_PLL_DIV1_TDC_TARGET_CNT_MASK); val |= hw_state->mg_pll_div1; - intel_de_write(dev_priv, DKL_PLL_DIV1(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_PLL_DIV1(tc_port), 2, val); - val = intel_de_read(dev_priv, DKL_PLL_SSC(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_PLL_SSC(tc_port), 2); val &= ~(DKL_PLL_SSC_IREF_NDIV_RATIO_MASK | DKL_PLL_SSC_STEP_LEN_MASK | DKL_PLL_SSC_STEP_NUM_MASK | DKL_PLL_SSC_EN); val |= hw_state->mg_pll_ssc; - intel_de_write(dev_priv, DKL_PLL_SSC(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_PLL_SSC(tc_port), 2, val); - val = intel_de_read(dev_priv, DKL_PLL_BIAS(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_PLL_BIAS(tc_port), 2); val &= ~(DKL_PLL_BIAS_FRAC_EN_H | DKL_PLL_BIAS_FBDIV_FRAC_MASK); val |= hw_state->mg_pll_bias; - intel_de_write(dev_priv, DKL_PLL_BIAS(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_PLL_BIAS(tc_port), 2, val); - val = intel_de_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port)); + val = intel_dkl_phy_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port), 2); val &= ~(DKL_PLL_TDC_SSC_STEP_SIZE_MASK | DKL_PLL_TDC_FEED_FWD_GAIN_MASK); val |= hw_state->mg_pll_tdc_coldst_bias; - intel_de_write(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port), val); + intel_dkl_phy_write(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port), 2, val); - intel_de_posting_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port)); + intel_dkl_phy_posting_read(dev_priv, DKL_PLL_TDC_COLDST_BIAS(tc_port), 2); } static void icl_pll_power_enable(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index c459eb362c47..f2a15d8155f4 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -353,6 +353,7 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->display.wm.wm_mutex); mutex_init(&dev_priv->display.pps.mutex); mutex_init(&dev_priv->display.hdcp.comp_mutex); + spin_lock_init(&dev_priv->display.dkl.phy_lock); i915_memcpy_init_early(dev_priv); intel_runtime_pm_init_early(&dev_priv->runtime_pm); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0b287a59dc2f..da35bb2db26b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7420,6 +7420,9 @@ enum skl_power_gate { #define _DKL_PHY5_BASE 0x16C000 #define _DKL_PHY6_BASE 0x16D000 +#define DKL_REG_TC_PORT(__reg) \ + (TC_PORT_1 + ((__reg).reg - _DKL_PHY1_BASE) / (_DKL_PHY2_BASE - _DKL_PHY1_BASE)) + /* DEKEL PHY MMIO Address = Phy base + (internal address & ~index_mask) */ #define _DKL_PCS_DW5 0x14 #define DKL_PCS_DW5(tc_port) _MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \ From d3f6bacfca86f6cf6bf85be1e8b54083d68d8195 Mon Sep 17 00:00:00 2001 From: Robert Beckett Date: Thu, 20 Oct 2022 13:03:08 +0200 Subject: [PATCH 086/328] drm/i915: stop abusing swiotlb_max_segment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit swiotlb_max_segment used to return either the maximum size that swiotlb could bounce, or for Xen PV PAGE_SIZE even if swiotlb could bounce buffer larger mappings. This made i915 on Xen PV work as it bypasses the coherency aspect of the DMA API and can't cope with bounce buffering and this avoided bounce buffering for the Xen/PV case. So instead of adding this hack back, check for Xen/PV directly in i915 for the Xen case and otherwise use the proper DMA API helper to query the maximum mapping size. Replace swiotlb_max_segment() calls with dma_max_mapping_size(). In i915_gem_object_get_pages_internal() no longer consider max_segment only if CONFIG_SWIOTLB is enabled. There can be other (iommu related) causes of specific max segment sizes. Fixes: a2daa27c0c61 ("swiotlb: simplify swiotlb_max_segment") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Robert Beckett Signed-off-by: Christoph Hellwig [hch: added the Xen hack, rewrote the changelog] Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221020110308.1582518-1-hch@lst.de (cherry picked from commit 78a07fe777c42800bd1adaec12abe5dcee43919e) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_internal.c | 19 +++--------- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 +-- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/i915_scatterlist.h | 32 ++++++++++++-------- 5 files changed, 28 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..629acb403a2c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + unsigned int max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; + max_order = min(max_order, get_order(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index f42ca1179f37..11125c32dd35 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4f861782c3e8..a4aa9500fa17 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -538,7 +538,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + i915_sg_segment_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index d4398948f016..f34e01a7fefb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev); struct sg_table *st; unsigned int sg_page_sizes; struct page **pvec; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 9ddb3e743a3e..b0a1db44f895 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -9,7 +9,8 @@ #include #include -#include +#include +#include #include "i915_gem.h" @@ -127,19 +128,26 @@ static inline unsigned int i915_sg_dma_sizes(struct scatterlist *sg) return page_sizes; } -static inline unsigned int i915_sg_segment_size(void) +static inline unsigned int i915_sg_segment_size(struct device *dev) { - unsigned int size = swiotlb_max_segment(); + size_t max = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev)); - if (size == 0) - size = UINT_MAX; - - size = rounddown(size, PAGE_SIZE); - /* swiotlb_max_segment_size can return 1 byte when it means one page. */ - if (size < PAGE_SIZE) - size = PAGE_SIZE; - - return size; + /* + * For Xen PV guests pages aren't contiguous in DMA (machine) address + * space. The DMA API takes care of that both in dma_alloc_* (by + * calling into the hypervisor to make the pages contiguous) and in + * dma_map_* (by bounce buffering). But i915 abuses ignores the + * coherency aspects of the DMA API and thus can't cope with bounce + * buffering actually happening, so add a hack here to force small + * allocations and mappings when running in PV mode on Xen. + * + * Note this will still break if bounce buffering is required for other + * reasons, like confidential computing hypervisors or PCIe root ports + * with addressing limitations. + */ + if (xen_pv_domain()) + max = PAGE_SIZE; + return round_down(max, PAGE_SIZE); } bool i915_sg_trim(struct sg_table *orig_st); From 3e206b6aa6df7eed4297577e0cf8403169b800a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2022 13:11:27 +0300 Subject: [PATCH 087/328] drm/i915/sdvo: Filter out invalid outputs more sensibly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We try to filter out the corresponding xxx1 output if the xxx0 output is not present. But the way that is being done is pretty awkward. Make it less so. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221026101134.20865-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit cc1e66394daaa7e9f005e2487a84e34a39f9308b) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_sdvo.c | 27 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index f5b744bef18f..30d3778c6136 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2926,16 +2926,33 @@ err: return false; } +static u16 intel_sdvo_filter_output_flags(u16 flags) +{ + flags &= SDVO_OUTPUT_MASK; + + /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/ + if (!(flags & SDVO_OUTPUT_TMDS0)) + flags &= ~SDVO_OUTPUT_TMDS1; + + if (!(flags & SDVO_OUTPUT_RGB0)) + flags &= ~SDVO_OUTPUT_RGB1; + + if (!(flags & SDVO_OUTPUT_LVDS0)) + flags &= ~SDVO_OUTPUT_LVDS1; + + return flags; +} + static bool intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) { - /* SDVO requires XXX1 function may not exist unless it has XXX0 function.*/ + flags = intel_sdvo_filter_output_flags(flags); if (flags & SDVO_OUTPUT_TMDS0) if (!intel_sdvo_dvi_init(intel_sdvo, 0)) return false; - if ((flags & SDVO_TMDS_MASK) == SDVO_TMDS_MASK) + if (flags & SDVO_OUTPUT_TMDS1) if (!intel_sdvo_dvi_init(intel_sdvo, 1)) return false; @@ -2956,7 +2973,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) if (!intel_sdvo_analog_init(intel_sdvo, 0)) return false; - if ((flags & SDVO_RGB_MASK) == SDVO_RGB_MASK) + if (flags & SDVO_OUTPUT_RGB1) if (!intel_sdvo_analog_init(intel_sdvo, 1)) return false; @@ -2964,11 +2981,11 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) if (!intel_sdvo_lvds_init(intel_sdvo, 0)) return false; - if ((flags & SDVO_LVDS_MASK) == SDVO_LVDS_MASK) + if (flags & SDVO_OUTPUT_LVDS1) if (!intel_sdvo_lvds_init(intel_sdvo, 1)) return false; - if ((flags & SDVO_OUTPUT_MASK) == 0) { + if (flags == 0) { unsigned char bytes[2]; intel_sdvo->controlled_output = 0; From e79762512120f11c51317570519a1553c70805d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2022 13:11:28 +0300 Subject: [PATCH 088/328] drm/i915/sdvo: Setup DDC fully before output init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call intel_sdvo_select_ddc_bus() before initializing any of the outputs. And before that is functional (assuming no VBT) we have to set up the controlled_outputs thing. Otherwise DDC won't be functional during the output init but LVDS really needs it for the fixed mode setup. Note that the whole multi output support still looks very bogus, and more work will be needed to make it correct. But for now this should at least fix the LVDS EDID fixed mode setup. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7301 Fixes: aa2b88074a56 ("drm/i915/sdvo: Fix multi function encoder stuff") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221026101134.20865-3-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 64b7b557dc8a96d9cfed6aedbf81de2df80c025d) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_sdvo.c | 31 +++++++++-------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 30d3778c6136..8046d02a8ad0 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2747,13 +2747,10 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device) if (!intel_sdvo_connector) return false; - if (device == 0) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_TMDS0; + if (device == 0) intel_sdvo_connector->output_flag = SDVO_OUTPUT_TMDS0; - } else if (device == 1) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_TMDS1; + else if (device == 1) intel_sdvo_connector->output_flag = SDVO_OUTPUT_TMDS1; - } intel_connector = &intel_sdvo_connector->base; connector = &intel_connector->base; @@ -2808,7 +2805,6 @@ intel_sdvo_tv_init(struct intel_sdvo *intel_sdvo, int type) encoder->encoder_type = DRM_MODE_ENCODER_TVDAC; connector->connector_type = DRM_MODE_CONNECTOR_SVIDEO; - intel_sdvo->controlled_output |= type; intel_sdvo_connector->output_flag = type; if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { @@ -2849,13 +2845,10 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, int device) encoder->encoder_type = DRM_MODE_ENCODER_DAC; connector->connector_type = DRM_MODE_CONNECTOR_VGA; - if (device == 0) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_RGB0; + if (device == 0) intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB0; - } else if (device == 1) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_RGB1; + else if (device == 1) intel_sdvo_connector->output_flag = SDVO_OUTPUT_RGB1; - } if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { kfree(intel_sdvo_connector); @@ -2885,13 +2878,10 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) encoder->encoder_type = DRM_MODE_ENCODER_LVDS; connector->connector_type = DRM_MODE_CONNECTOR_LVDS; - if (device == 0) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_LVDS0; + if (device == 0) intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS0; - } else if (device == 1) { - intel_sdvo->controlled_output |= SDVO_OUTPUT_LVDS1; + else if (device == 1) intel_sdvo_connector->output_flag = SDVO_OUTPUT_LVDS1; - } if (intel_sdvo_connector_init(intel_sdvo_connector, intel_sdvo) < 0) { kfree(intel_sdvo_connector); @@ -2946,8 +2936,14 @@ static u16 intel_sdvo_filter_output_flags(u16 flags) static bool intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) { + struct drm_i915_private *i915 = to_i915(intel_sdvo->base.base.dev); + flags = intel_sdvo_filter_output_flags(flags); + intel_sdvo->controlled_output = flags; + + intel_sdvo_select_ddc_bus(i915, intel_sdvo); + if (flags & SDVO_OUTPUT_TMDS0) if (!intel_sdvo_dvi_init(intel_sdvo, 0)) return false; @@ -2988,7 +2984,6 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, u16 flags) if (flags == 0) { unsigned char bytes[2]; - intel_sdvo->controlled_output = 0; memcpy(bytes, &intel_sdvo->caps.output_flags, 2); DRM_DEBUG_KMS("%s: Unknown SDVO output type (0x%02x%02x)\n", SDVO_NAME(intel_sdvo), @@ -3400,8 +3395,6 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, */ intel_sdvo->base.cloneable = 0; - intel_sdvo_select_ddc_bus(dev_priv, intel_sdvo); - /* Set the input timing to the screen. Assume always input 0. */ if (!intel_sdvo_set_target_input(intel_sdvo)) goto err_output; From 224e858f215a3d6304f95a92357a1753475ca9cf Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 29 Oct 2022 09:04:29 +0800 Subject: [PATCH 089/328] ublk_drv: return flag of UBLK_F_URING_CMD_COMP_IN_TASK in case of module UBLK_F_URING_CMD_COMP_IN_TASK needs to be set and returned to userspace if ublk driver is built as module, otherwise userspace may get wrong flags shown. Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Signed-off-by: Ming Lei Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20221029010432.598367-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 5afce6ffaadf..6b2f214f0d5c 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1658,6 +1658,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) */ ub->dev_info.flags &= UBLK_F_ALL; + if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK)) + ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK; + /* We are not ready to support zero copy */ ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY; From d57c2c6c1145148bb23d68db73de0b52d482d4ba Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 29 Oct 2022 09:04:30 +0800 Subject: [PATCH 090/328] ublk_drv: comment on ublk_driver entry of Kconfig Add help info for choosing to build ublk_drv as module or builtin. Signed-off-by: Ming Lei Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20221029010432.598367-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index db1b4b202646..a41145d52de9 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -408,6 +408,12 @@ config BLK_DEV_UBLK definition isn't finalized yet, and might change according to future requirement, so mark is as experimental now. + Say Y if you want to get better performance because task_work_add() + can be used in IO path for replacing io_uring cmd, which will become + shared between IO tasks and ubq daemon, meantime task_work_add() can + can handle batch more effectively, but task_work_add() isn't exported + for module, so ublk has to be built to kernel. + source "drivers/block/rnbd/Kconfig" endif # BLK_DEV From 3ab6e94ca539242247d4f00414a1bde584d001ed Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 29 Oct 2022 09:04:31 +0800 Subject: [PATCH 091/328] ublk_drv: avoid to touch io_uring cmd in blk_mq io path io_uring cmd is supposed to be used in ubq daemon context mainly, and we should try to avoid to touch it in ublk io submission context, otherwise this data could become shared between the two contexts, and performance is hurt. So link request into one per-queue list, and use same batching policy of io_uring command, just avoid to touch ucmd in blk-mq io context. Signed-off-by: Ming Lei Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20221029010432.598367-4-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 83 +++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 30 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 6b2f214f0d5c..3a59271dafe4 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -57,11 +57,14 @@ #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD) struct ublk_rq_data { - struct callback_head work; + union { + struct callback_head work; + struct llist_node node; + }; }; struct ublk_uring_cmd_pdu { - struct request *req; + struct ublk_queue *ubq; }; /* @@ -119,6 +122,8 @@ struct ublk_queue { struct task_struct *ubq_daemon; char *io_cmd_buf; + struct llist_head io_cmds; + unsigned long io_addr; /* mapped vm address */ unsigned int max_io_sz; bool force_abort; @@ -764,8 +769,12 @@ static inline void __ublk_rq_task_work(struct request *req) static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + struct ublk_queue *ubq = pdu->ubq; + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data; - __ublk_rq_task_work(pdu->req); + llist_for_each_entry(data, io_cmds, node) + __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); } static void ublk_rq_task_work_fn(struct callback_head *work) @@ -777,17 +786,50 @@ static void ublk_rq_task_work_fn(struct callback_head *work) __ublk_rq_task_work(req); } +static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) +{ + struct ublk_io *io = &ubq->ios[rq->tag]; + + /* + * If the check pass, we know that this is a re-issued request aborted + * previously in monitor_work because the ubq_daemon(cmd's task) is + * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore + * because this ioucmd's io_uring context may be freed now if no inflight + * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work. + * + * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing + * the tag). Then the request is re-started(allocating the tag) and we are here. + * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED + * guarantees that here is a re-issued request aborted previously. + */ + if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) { + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data; + + llist_for_each_entry(data, io_cmds, node) + __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); + } else { + struct io_uring_cmd *cmd = io->cmd; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + + pdu->ubq = ubq; + io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); + } +} + static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct ublk_queue *ubq = hctx->driver_data; struct request *rq = bd->rq; + struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); blk_status_t res; /* fill iod to slot in io cmd buffer */ res = ublk_setup_iod(ubq, rq); if (unlikely(res != BLK_STS_OK)) return BLK_STS_IOERR; + /* With recovery feature enabled, force_abort is set in * ublk_stop_dev() before calling del_gendisk(). We have to * abort all requeued and new rqs here to let del_gendisk() @@ -809,36 +851,15 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, } if (ublk_can_use_task_work(ubq)) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); enum task_work_notify_mode notify_mode = bd->last ? TWA_SIGNAL_NO_IPI : TWA_NONE; if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) goto fail; } else { - struct ublk_io *io = &ubq->ios[rq->tag]; - struct io_uring_cmd *cmd = io->cmd; - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - - /* - * If the check pass, we know that this is a re-issued request aborted - * previously in monitor_work because the ubq_daemon(cmd's task) is - * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore - * because this ioucmd's io_uring context may be freed now if no inflight - * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work. - * - * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing - * the tag). Then the request is re-started(allocating the tag) and we are here. - * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED - * guarantees that here is a re-issued request aborted previously. - */ - if ((io->flags & UBLK_IO_FLAG_ABORTED)) - goto fail; - - pdu->req = rq; - io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); + if (llist_add(&data->node, &ubq->io_cmds)) + ublk_submit_cmd(ubq, rq); } - return BLK_STS_OK; } @@ -1168,17 +1189,19 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, { struct ublk_queue *ubq = ublk_get_queue(ub, q_id); struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); if (ublk_can_use_task_work(ubq)) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - /* should not fail since we call it just in ubq->ubq_daemon */ task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI); } else { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - pdu->req = req; - io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); + if (llist_add(&data->node, &ubq->io_cmds)) { + pdu->ubq = ubq; + io_uring_cmd_complete_in_task(cmd, + ublk_rq_task_work_cb); + } } } From fee32f312405726eec6b35b5740c48acda0315e9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 29 Oct 2022 09:04:32 +0800 Subject: [PATCH 092/328] ublk_drv: add ublk_queue_cmd() for cleanup Add helper of ublk_queue_cmd() so that both ublk_queue_rq() and ublk_handle_need_get_data() can reuse this helper. Signed-off-by: Ming Lei Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20221029010432.598367-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 47 ++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 3a59271dafe4..f96cb01e9604 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -817,12 +817,28 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) } } +static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq, + bool last) +{ + struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); + + if (ublk_can_use_task_work(ubq)) { + enum task_work_notify_mode notify_mode = last ? + TWA_SIGNAL_NO_IPI : TWA_NONE; + + if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) + __ublk_abort_rq(ubq, rq); + } else { + if (llist_add(&data->node, &ubq->io_cmds)) + ublk_submit_cmd(ubq, rq); + } +} + static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct ublk_queue *ubq = hctx->driver_data; struct request *rq = bd->rq; - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); blk_status_t res; /* fill iod to slot in io cmd buffer */ @@ -845,21 +861,12 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); if (unlikely(ubq_daemon_is_dying(ubq))) { - fail: __ublk_abort_rq(ubq, rq); return BLK_STS_OK; } - if (ublk_can_use_task_work(ubq)) { - enum task_work_notify_mode notify_mode = bd->last ? - TWA_SIGNAL_NO_IPI : TWA_NONE; + ublk_queue_cmd(ubq, rq, bd->last); - if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) - goto fail; - } else { - if (llist_add(&data->node, &ubq->io_cmds)) - ublk_submit_cmd(ubq, rq); - } return BLK_STS_OK; } @@ -1185,24 +1192,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) } static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, - int tag, struct io_uring_cmd *cmd) + int tag) { struct ublk_queue *ubq = ublk_get_queue(ub, q_id); struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); - struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); - if (ublk_can_use_task_work(ubq)) { - /* should not fail since we call it just in ubq->ubq_daemon */ - task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI); - } else { - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - - if (llist_add(&data->node, &ubq->io_cmds)) { - pdu->ubq = ubq; - io_uring_cmd_complete_in_task(cmd, - ublk_rq_task_work_cb); - } - } + ublk_queue_cmd(ubq, req, true); } static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) @@ -1290,7 +1285,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) io->addr = ub_cmd->addr; io->cmd = cmd; io->flags |= UBLK_IO_FLAG_ACTIVE; - ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd); + ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag); break; default: goto out; From fa81cbafbf5764ad5053512152345fab37a1fe18 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Sat, 29 Oct 2022 15:13:55 +0800 Subject: [PATCH 093/328] block: Fix possible memory leak for rq_wb on add_disk failure kmemleak reported memory leaks in device_add_disk(): kmemleak: 3 new suspected memory leaks unreferenced object 0xffff88800f420800 (size 512): comm "modprobe", pid 4275, jiffies 4295639067 (age 223.512s) hex dump (first 32 bytes): 04 00 00 00 08 00 00 00 01 00 00 00 00 00 00 00 ................ 00 e1 f5 05 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d3662699>] kmalloc_trace+0x26/0x60 [<00000000edc7aadc>] wbt_init+0x50/0x6f0 [<0000000069601d16>] wbt_enable_default+0x157/0x1c0 [<0000000028fc393f>] blk_register_queue+0x2a4/0x420 [<000000007345a042>] device_add_disk+0x6fd/0xe40 [<0000000060e6aab0>] nbd_dev_add+0x828/0xbf0 [nbd] ... It is because the memory allocated in wbt_enable_default() is not released in device_add_disk() error path. Normally, these memory are freed in: del_gendisk() rq_qos_exit() rqos->ops->exit(rqos); wbt_exit() So rq_qos_exit() is called to free the rq_wb memory for wbt_init(). However in the error path of device_add_disk(), only blk_unregister_queue() is called and make rq_wb memory leaked. Add rq_qos_exit() to the error path to fix it. Fixes: 83cbce957446 ("block: add error handling for device_add_disk / add_disk") Signed-off-by: Chen Zhongjin Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20221029071355.35462-1-chenzhongjin@huawei.com Signed-off-by: Jens Axboe --- block/genhd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/genhd.c b/block/genhd.c index fee90eb98b4a..0f9769db2de8 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -527,6 +527,7 @@ out_unregister_bdi: bdi_unregister(disk->bdi); out_unregister_queue: blk_unregister_queue(disk); + rq_qos_exit(disk->queue); out_put_slave_dir: kobject_put(disk->slave_dir); out_put_holder_dir: From d372ec94a018c3a19dad71e2ee3478126394d9fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 27 Sep 2022 21:06:13 +0300 Subject: [PATCH 094/328] drm/i915: Simplify intel_panel_add_edid_alt_fixed_modes() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit a5810f551d0a ("drm/i915: Allow more varied alternate fixed modes for panels") intel_panel_add_edid_alt_fixed_modes() no longer considers vrr vs. drrs separately. So no reason to pass them as separate parameters either. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20220927180615.25476-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit eb89e83c152b122a94e79527d63cb7c79823c37e) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- drivers/gpu/drm/i915/display/intel_lvds.c | 3 +-- drivers/gpu/drm/i915/display/intel_panel.c | 4 ++-- drivers/gpu/drm/i915/display/intel_panel.h | 2 +- drivers/gpu/drm/i915/display/intel_sdvo.c | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 47419d162f30..2b5bc95a8b0d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5276,7 +5276,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, encoder->devdata, IS_ERR(edid) ? NULL : edid); intel_panel_add_edid_fixed_modes(intel_connector, - intel_connector->panel.vbt.drrs_type != DRRS_TYPE_NONE, + intel_connector->panel.vbt.drrs_type != DRRS_TYPE_NONE || intel_vrr_is_capable(intel_connector)); /* MSO requires information from the EDID */ diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c index 9aa38e8141b5..e5352239b2a2 100644 --- a/drivers/gpu/drm/i915/display/intel_lvds.c +++ b/drivers/gpu/drm/i915/display/intel_lvds.c @@ -972,8 +972,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) /* Try EDID first */ intel_panel_add_edid_fixed_modes(intel_connector, - intel_connector->panel.vbt.drrs_type != DRRS_TYPE_NONE, - false); + intel_connector->panel.vbt.drrs_type != DRRS_TYPE_NONE); /* Failed to get EDID, what about VBT? */ if (!intel_panel_preferred_fixed_mode(intel_connector)) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index a3a3f9fe4342..41cec9dc4223 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -254,10 +254,10 @@ static void intel_panel_destroy_probed_modes(struct intel_connector *connector) } void intel_panel_add_edid_fixed_modes(struct intel_connector *connector, - bool has_drrs, bool has_vrr) + bool use_alt_fixed_modes) { intel_panel_add_edid_preferred_mode(connector); - if (intel_panel_preferred_fixed_mode(connector) && (has_drrs || has_vrr)) + if (intel_panel_preferred_fixed_mode(connector) && use_alt_fixed_modes) intel_panel_add_edid_alt_fixed_modes(connector); intel_panel_destroy_probed_modes(connector); } diff --git a/drivers/gpu/drm/i915/display/intel_panel.h b/drivers/gpu/drm/i915/display/intel_panel.h index eff3ffd3d082..5c5b5b7f95b6 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.h +++ b/drivers/gpu/drm/i915/display/intel_panel.h @@ -44,7 +44,7 @@ int intel_panel_fitting(struct intel_crtc_state *crtc_state, int intel_panel_compute_config(struct intel_connector *connector, struct drm_display_mode *adjusted_mode); void intel_panel_add_edid_fixed_modes(struct intel_connector *connector, - bool has_drrs, bool has_vrr); + bool use_alt_fixed_modes); void intel_panel_add_vbt_lfp_fixed_mode(struct intel_connector *connector); void intel_panel_add_vbt_sdvo_fixed_mode(struct intel_connector *connector); void intel_panel_add_encoder_fixed_mode(struct intel_connector *connector, diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 8046d02a8ad0..8ee7b05ab733 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2901,7 +2901,7 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) if (!intel_panel_preferred_fixed_mode(intel_connector)) { intel_ddc_get_modes(connector, &intel_sdvo->ddc); - intel_panel_add_edid_fixed_modes(intel_connector, false, false); + intel_panel_add_edid_fixed_modes(intel_connector, false); } intel_panel_init(intel_connector); From 12caf46cf4fc92b1c3884cb363ace2e12732fd2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2022 13:11:29 +0300 Subject: [PATCH 095/328] drm/i915/sdvo: Grab mode_config.mutex during LVDS init to avoid WARNs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_mode_probed_add() is unhappy about being called w/o mode_config.mutex. Grab it during LVDS fixed mode setup to silence the WARNs. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7301 Fixes: aa2b88074a56 ("drm/i915/sdvo: Fix multi function encoder stuff") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221026101134.20865-4-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit a3cd4f447281c56377de2ee109327400eb00668d) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_sdvo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c index 8ee7b05ab733..774c1dc31a52 100644 --- a/drivers/gpu/drm/i915/display/intel_sdvo.c +++ b/drivers/gpu/drm/i915/display/intel_sdvo.c @@ -2900,8 +2900,12 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) intel_panel_add_vbt_sdvo_fixed_mode(intel_connector); if (!intel_panel_preferred_fixed_mode(intel_connector)) { + mutex_lock(&i915->drm.mode_config.mutex); + intel_ddc_get_modes(connector, &intel_sdvo->ddc); intel_panel_add_edid_fixed_modes(intel_connector, false); + + mutex_unlock(&i915->drm.mode_config.mutex); } intel_panel_init(intel_connector); From 943f45b9399ed8b2b5190cbc797995edaa97f58f Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Mon, 31 Oct 2022 03:12:42 +0000 Subject: [PATCH 096/328] blk-mq: Fix kmemleak in blk_mq_init_allocated_queue There is a kmemleak caused by modprobe null_blk.ko unreferenced object 0xffff8881acb1f000 (size 1024): comm "modprobe", pid 836, jiffies 4294971190 (age 27.068s) hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 00 53 99 9e ff ff ff ff .........S...... backtrace: [<000000004a10c249>] kmalloc_node_trace+0x22/0x60 [<00000000648f7950>] blk_mq_alloc_and_init_hctx+0x289/0x350 [<00000000af06de0e>] blk_mq_realloc_hw_ctxs+0x2fe/0x3d0 [<00000000e00c1872>] blk_mq_init_allocated_queue+0x48c/0x1440 [<00000000d16b4e68>] __blk_mq_alloc_disk+0xc8/0x1c0 [<00000000d10c98c3>] 0xffffffffc450d69d [<00000000b9299f48>] 0xffffffffc4538392 [<0000000061c39ed6>] do_one_initcall+0xd0/0x4f0 [<00000000b389383b>] do_init_module+0x1a4/0x680 [<0000000087cf3542>] load_module+0x6249/0x7110 [<00000000beba61b8>] __do_sys_finit_module+0x140/0x200 [<00000000fdcfff51>] do_syscall_64+0x35/0x80 [<000000003c0f1f71>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 That is because q->ma_ops is set to NULL before blk_release_queue is called. blk_mq_init_queue_data blk_mq_init_allocated_queue blk_mq_realloc_hw_ctxs for (i = 0; i < set->nr_hw_queues; i++) { old_hctx = xa_load(&q->hctx_table, i); if (!blk_mq_alloc_and_init_hctx(.., i, ..)) [1] if (!old_hctx) break; xa_for_each_start(&q->hctx_table, j, hctx, j) blk_mq_exit_hctx(q, set, hctx, j); [2] if (!q->nr_hw_queues) [3] goto err_hctxs; err_exit: q->mq_ops = NULL; [4] blk_put_queue blk_release_queue if (queue_is_mq(q)) [5] blk_mq_release(q); [1]: blk_mq_alloc_and_init_hctx failed at i != 0. [2]: The hctxs allocated by [1] are moved to q->unused_hctx_list and will be cleaned up in blk_mq_release. [3]: q->nr_hw_queues is 0. [4]: Set q->mq_ops to NULL. [5]: queue_is_mq returns false due to [4]. And blk_mq_release will not be called. The hctxs in q->unused_hctx_list are leaked. To fix it, call blk_release_queue in exception path. Fixes: 2f8f1336a48b ("blk-mq: always free hctx after request queue is freed") Signed-off-by: Yuan Can Signed-off-by: Chen Jun Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20221031031242.94107-1-chenjun102@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 75c8296b6feb..21cc7c2da0f9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4193,9 +4193,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, return 0; err_hctxs: - xa_destroy(&q->hctx_table); - q->nr_hw_queues = 0; - blk_mq_sysfs_deinit(q); + blk_mq_release(q); err_poll: blk_stat_free_callback(q->poll_cb); q->poll_cb = NULL; From 063b1f21cc9be07291a1f5e227436f353c6d1695 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 30 Oct 2022 08:35:28 +0100 Subject: [PATCH 097/328] btrfs: fix a memory allocation failure test in btrfs_submit_direct After allocation 'dip' is tested instead of 'dip->csums'. Fix it. Fixes: 642c5d34da53 ("btrfs: allocate the btrfs_dio_private as part of the iomap dio bio") CC: stable@vger.kernel.org # 5.19+ Reviewed-by: Nikolay Borisov Signed-off-by: Christophe JAILLET Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 45ebef8d3ea8..f0ebc17aeb67 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7980,7 +7980,7 @@ static void btrfs_submit_direct(const struct iomap_iter *iter, */ status = BLK_STS_RESOURCE; dip->csums = kcalloc(nr_sectors, fs_info->csum_size, GFP_NOFS); - if (!dip) + if (!dip->csums) goto out_err; status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums); From 47ba8cc7b4f82c927cec3ad7c7392e4c45c81c56 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 24 Oct 2022 10:11:02 -0700 Subject: [PATCH 098/328] xfs: fix incorrect return type for fsdax fault handlers The kernel robot complained about this: >> fs/xfs/xfs_file.c:1266:31: sparse: sparse: incorrect type in return expression (different base types) @@ expected int @@ got restricted vm_fault_t @@ fs/xfs/xfs_file.c:1266:31: sparse: expected int fs/xfs/xfs_file.c:1266:31: sparse: got restricted vm_fault_t fs/xfs/xfs_file.c:1314:21: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted vm_fault_t [usertype] ret @@ got int @@ fs/xfs/xfs_file.c:1314:21: sparse: expected restricted vm_fault_t [usertype] ret fs/xfs/xfs_file.c:1314:21: sparse: got int Fix the incorrect return type for these two functions. While we're at it, make the !fsdax version return VM_FAULT_SIGBUS because a zero return value will cause some callers to try to lock vmf->page, which we never set here. Fixes: ea6c49b784f0 ("xfs: support CoW in fsdax mode") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c6c80265c0b2..e462d39c840e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1261,7 +1261,7 @@ xfs_file_llseek( } #ifdef CONFIG_FS_DAX -static int +static inline vm_fault_t xfs_dax_fault( struct vm_fault *vmf, enum page_entry_size pe_size, @@ -1274,14 +1274,15 @@ xfs_dax_fault( &xfs_read_iomap_ops); } #else -static int +static inline vm_fault_t xfs_dax_fault( struct vm_fault *vmf, enum page_entry_size pe_size, bool write_fault, pfn_t *pfn) { - return 0; + ASSERT(0); + return VM_FAULT_SIGBUS; } #endif From 8184620ae21213d51eaf2e0bd4186baacb928172 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 28 Oct 2022 13:15:35 +0100 Subject: [PATCH 099/328] btrfs: fix lost file sync on direct IO write with nowait and dsync iocb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When doing a direct IO write using a iocb with nowait and dsync set, we end up not syncing the file once the write completes. This is because we tell iomap to not call generic_write_sync(), which would result in calling btrfs_sync_file(), in order to avoid a deadlock since iomap can call it while we are holding the inode's lock and btrfs_sync_file() needs to acquire the inode's lock. The deadlock happens only if the write happens synchronously, when iomap_dio_rw() calls iomap_dio_complete() before it returns. Instead we do the sync ourselves at btrfs_do_write_iter(). For a nowait write however we can end up not doing the sync ourselves at at btrfs_do_write_iter() because the write could have been queued, and therefore we get -EIOCBQUEUED returned from iomap in such case. That makes us skip the sync call at btrfs_do_write_iter(), as we don't do it for any error returned from btrfs_direct_write(). We can't simply do the call even if -EIOCBQUEUED is returned, since that would block the task waiting for IO, both for the data since there are bios still in progress as well as potentially blocking when joining a log transaction and when syncing the log (writing log trees, super blocks, etc). So let iomap do the sync call itself and in order to avoid deadlocks for the case of synchronous writes (without nowait), use __iomap_dio_rw() and have ourselves call iomap_dio_complete() after unlocking the inode. A test case will later be sent for fstests, after this is fixed in Linus' tree. Fixes: 51bd9563b678 ("btrfs: fix deadlock due to page faults during direct IO reads and writes") Reported-by: Марк Коренберг Link: https://lore.kernel.org/linux-btrfs/CAEmTpZGRKbzc16fWPvxbr6AfFsQoLmz-Lcg-7OgJOZDboJ+SGQ@mail.gmail.com/ CC: stable@vger.kernel.org # 6.0+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 ++++- fs/btrfs/file.c | 22 ++++++++++++++++------ fs/btrfs/inode.c | 14 +++++++++++--- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 727595eee973..f677b49df8ae 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3462,7 +3462,10 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter, ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, const struct btrfs_ioctl_encoded_io_args *encoded); -ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before); +ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before); +struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before); extern const struct dentry_operations btrfs_dentry_operations; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 176b432035ae..d55ad46384d1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1765,6 +1765,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) loff_t endbyte; ssize_t err; unsigned int ilock_flags = 0; + struct iomap_dio *dio; if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; @@ -1825,11 +1826,22 @@ relock: * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ -again: from->nofault = true; - err = btrfs_dio_rw(iocb, from, written); + dio = btrfs_dio_write(iocb, from, written); from->nofault = false; + /* + * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync + * iocb, and that needs to lock the inode. So unlock it before calling + * iomap_dio_complete() to avoid a deadlock. + */ + btrfs_inode_unlock(inode, ilock_flags); + + if (IS_ERR_OR_NULL(dio)) + err = PTR_ERR_OR_ZERO(dio); + else + err = iomap_dio_complete(dio); + /* No increment (+=) because iomap returns a cumulative value. */ if (err > 0) written = err; @@ -1855,12 +1867,10 @@ again: } else { fault_in_iov_iter_readable(from, left); prev_left = left; - goto again; + goto relock; } } - btrfs_inode_unlock(inode, ilock_flags); - /* * If 'err' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. @@ -4035,7 +4045,7 @@ again: */ pagefault_disable(); to->nofault = true; - ret = btrfs_dio_rw(iocb, to, read); + ret = btrfs_dio_read(iocb, to, read); to->nofault = false; pagefault_enable(); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f0ebc17aeb67..d70f85b73169 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8078,13 +8078,21 @@ static const struct iomap_dio_ops btrfs_dio_ops = { .bio_set = &btrfs_dio_bioset, }; -ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) +ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) { struct btrfs_dio_data data; return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, - IOMAP_DIO_PARTIAL | IOMAP_DIO_NOSYNC, - &data, done_before); + IOMAP_DIO_PARTIAL, &data, done_before); +} + +struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, + size_t done_before) +{ + struct btrfs_dio_data data; + + return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, + IOMAP_DIO_PARTIAL, &data, done_before); } static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, From 59da7ff49d67a1b63b1b81c7f53dcb6a84cdad2b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2022 16:08:11 -0700 Subject: [PATCH 100/328] xfs: fix validation in attr log item recovery Before we start fixing all the complaints about memcpy'ing log items around, let's fix some inadequate validation in the xattr log item recovery code and get rid of the (now trivial) copy_format function. Signed-off-by: Darrick J. Wong Reviewed-by: Kees Cook Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/xfs_attr_item.c | 54 ++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index cf5ce607dc05..ee8f678a10a1 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -245,28 +245,6 @@ xfs_attri_init( return attrip; } -/* - * Copy an attr format buffer from the given buf, and into the destination attr - * format structure. - */ -STATIC int -xfs_attri_copy_format( - struct xfs_log_iovec *buf, - struct xfs_attri_log_format *dst_attr_fmt) -{ - struct xfs_attri_log_format *src_attr_fmt = buf->i_addr; - size_t len; - - len = sizeof(struct xfs_attri_log_format); - if (buf->i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; - } - - memcpy((char *)dst_attr_fmt, (char *)src_attr_fmt, len); - return 0; -} - static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_attrd_log_item, attrd_item); @@ -731,24 +709,44 @@ xlog_recover_attri_commit_pass2( struct xfs_attri_log_nameval *nv; const void *attr_value = NULL; const void *attr_name; - int error; + size_t len; attri_formatp = item->ri_buf[0].i_addr; attr_name = item->ri_buf[1].i_addr; /* Validate xfs_attri_log_format before the large memory allocation */ + len = sizeof(struct xfs_attri_log_format); + if (item->ri_buf[0].i_len != len) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + if (!xfs_attri_validate(mp, attri_formatp)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } + /* Validate the attr name */ + if (item->ri_buf[1].i_len != + xlog_calc_iovec_len(attri_formatp->alfi_name_len)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return -EFSCORRUPTED; } - if (attri_formatp->alfi_value_len) + /* Validate the attr value, if present */ + if (attri_formatp->alfi_value_len != 0) { + if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } + attr_value = item->ri_buf[2].i_addr; + } /* * Memory alloc failure will cause replay to abort. We attach the @@ -760,9 +758,7 @@ xlog_recover_attri_commit_pass2( attri_formatp->alfi_value_len); attrip = xfs_attri_init(mp, nv); - error = xfs_attri_copy_format(&item->ri_buf[0], &attrip->attri_format); - if (error) - goto out; + memcpy(&attrip->attri_format, attri_formatp, len); /* * The ATTRI has two references. One for the ATTRD and one for ATTRI to @@ -774,10 +770,6 @@ xlog_recover_attri_commit_pass2( xfs_attri_release(attrip); xfs_attri_log_nameval_put(nv); return 0; -out: - xfs_attri_item_free(attrip); - xfs_attri_log_nameval_put(nv); - return error; } /* From a38ebce1da271f480e47c3def4f810c6106b74a1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2022 16:17:41 -0700 Subject: [PATCH 101/328] xfs: fix memcpy fortify errors in BUI log format copying Starting in 6.1, CONFIG_FORTIFY_SOURCE checks the length parameter of memcpy. Unfortunately, it doesn't handle flex arrays correctly: ------------[ cut here ]------------ memcpy: detected field-spanning write (size 48) of single field "dst_bui_fmt" at fs/xfs/xfs_bmap_item.c:628 (size 16) Fix this by refactoring the xfs_bui_copy_format function to handle the copying of the head and the flex array members separately. While we're at it, fix a minor validation deficiency in the recovery function. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/xfs_bmap_item.c | 46 ++++++++++++++++++++---------------------- fs/xfs/xfs_ondisk.h | 5 +++++ 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 51f66e982484..a1da6205252b 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -608,28 +608,18 @@ static const struct xfs_item_ops xfs_bui_item_ops = { .iop_relog = xfs_bui_item_relog, }; -/* - * Copy an BUI format buffer from the given buf, and into the destination - * BUI format structure. The BUI/BUD items were designed not to need any - * special alignment handling. - */ -static int +static inline void xfs_bui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_bui_log_format *dst_bui_fmt) + struct xfs_bui_log_format *dst, + const struct xfs_bui_log_format *src) { - struct xfs_bui_log_format *src_bui_fmt; - uint len; + unsigned int i; - src_bui_fmt = buf->i_addr; - len = xfs_bui_log_format_sizeof(src_bui_fmt->bui_nextents); + memcpy(dst, src, offsetof(struct xfs_bui_log_format, bui_extents)); - if (buf->i_len == len) { - memcpy(dst_bui_fmt, src_bui_fmt, len); - return 0; - } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; + for (i = 0; i < src->bui_nextents; i++) + memcpy(&dst->bui_extents[i], &src->bui_extents[i], + sizeof(struct xfs_map_extent)); } /* @@ -646,23 +636,31 @@ xlog_recover_bui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_bui_log_item *buip; struct xfs_bui_log_format *bui_formatp; + size_t len; bui_formatp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; + } + if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; } - buip = xfs_bui_init(mp); - error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); - if (error) { - xfs_bui_item_free(buip); - return error; + + len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; } + + buip = xfs_bui_init(mp); + xfs_bui_copy_format(&buip->bui_format, bui_formatp); atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); /* * Insert the intent into the AIL directly and drop one reference so diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 758702b9495f..56917e236370 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -134,6 +134,11 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format, 40); XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); + + XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); /* * The v5 superblock format extended several v4 header structures with From a38935c03c7914a6ab22eefb750b259868ed5a4b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2022 16:23:16 -0700 Subject: [PATCH 102/328] xfs: fix memcpy fortify errors in CUI log format copying Starting in 6.1, CONFIG_FORTIFY_SOURCE checks the length parameter of memcpy. Since we're already fixing problems with BUI item copying, we should fix it everything else. Refactor the xfs_cui_copy_format function to handle the copying of the head and the flex array members separately. While we're at it, fix a minor validation deficiency in the recovery function. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/xfs_ondisk.h | 4 ++++ fs/xfs/xfs_refcount_item.c | 45 ++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 56917e236370..e20d2844b0c5 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -136,9 +136,13 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16); XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); /* * The v5 superblock format extended several v4 header structures with diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7e97bf19793d..24cf4c64ebaa 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -622,28 +622,18 @@ static const struct xfs_item_ops xfs_cui_item_ops = { .iop_relog = xfs_cui_item_relog, }; -/* - * Copy an CUI format buffer from the given buf, and into the destination - * CUI format structure. The CUI/CUD items were designed not to need any - * special alignment handling. - */ -static int +static inline void xfs_cui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_cui_log_format *dst_cui_fmt) + struct xfs_cui_log_format *dst, + const struct xfs_cui_log_format *src) { - struct xfs_cui_log_format *src_cui_fmt; - uint len; + unsigned int i; - src_cui_fmt = buf->i_addr; - len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents); + memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents)); - if (buf->i_len == len) { - memcpy(dst_cui_fmt, src_cui_fmt, len); - return 0; - } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; + for (i = 0; i < src->cui_nextents; i++) + memcpy(&dst->cui_extents[i], &src->cui_extents[i], + sizeof(struct xfs_phys_extent)); } /* @@ -660,19 +650,26 @@ xlog_recover_cui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_cui_log_item *cuip; struct xfs_cui_log_format *cui_formatp; + size_t len; cui_formatp = item->ri_buf[0].i_addr; - cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); - error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format); - if (error) { - xfs_cui_item_free(cuip); - return error; + if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; } + + len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; + } + + cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); + xfs_cui_copy_format(&cuip->cui_format, cui_formatp); atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); /* * Insert the intent into the AIL directly and drop one reference so From b45ca961e94673df83ab1900802afe82776966e6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2022 16:26:36 -0700 Subject: [PATCH 103/328] xfs: fix memcpy fortify errors in RUI log format copying Starting in 6.1, CONFIG_FORTIFY_SOURCE checks the length parameter of memcpy. Since we're already fixing problems with BUI item copying, we should fix it everything else. Refactor the xfs_rui_copy_format function to handle the copying of the head and the flex array members separately. While we're at it, fix a minor validation deficiency in the recovery function. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/xfs_ondisk.h | 3 +++ fs/xfs/xfs_rmap_item.c | 58 ++++++++++++++++++++---------------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index e20d2844b0c5..19c1df00b48e 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -138,11 +138,14 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_rui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16); XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16); /* * The v5 superblock format extended several v4 header structures with diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index fef92e02f3bb..27047e73f582 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -155,31 +155,6 @@ xfs_rui_init( return ruip; } -/* - * Copy an RUI format buffer from the given buf, and into the destination - * RUI format structure. The RUI/RUD items were designed not to need any - * special alignment handling. - */ -STATIC int -xfs_rui_copy_format( - struct xfs_log_iovec *buf, - struct xfs_rui_log_format *dst_rui_fmt) -{ - struct xfs_rui_log_format *src_rui_fmt; - uint len; - - src_rui_fmt = buf->i_addr; - len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents); - - if (buf->i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); - return -EFSCORRUPTED; - } - - memcpy(dst_rui_fmt, src_rui_fmt, len); - return 0; -} - static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip) { return container_of(lip, struct xfs_rud_log_item, rud_item); @@ -652,6 +627,20 @@ static const struct xfs_item_ops xfs_rui_item_ops = { .iop_relog = xfs_rui_item_relog, }; +static inline void +xfs_rui_copy_format( + struct xfs_rui_log_format *dst, + const struct xfs_rui_log_format *src) +{ + unsigned int i; + + memcpy(dst, src, offsetof(struct xfs_rui_log_format, rui_extents)); + + for (i = 0; i < src->rui_nextents; i++) + memcpy(&dst->rui_extents[i], &src->rui_extents[i], + sizeof(struct xfs_map_extent)); +} + /* * This routine is called to create an in-core extent rmap update * item from the rui format structure which was logged on disk. @@ -666,19 +655,26 @@ xlog_recover_rui_commit_pass2( struct xlog_recover_item *item, xfs_lsn_t lsn) { - int error; struct xfs_mount *mp = log->l_mp; struct xfs_rui_log_item *ruip; struct xfs_rui_log_format *rui_formatp; + size_t len; rui_formatp = item->ri_buf[0].i_addr; - ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); - error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format); - if (error) { - xfs_rui_item_free(ruip); - return error; + if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; } + + len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents); + if (item->ri_buf[0].i_len != len) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; + } + + ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); + xfs_rui_copy_format(&ruip->rui_format, rui_formatp); atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); /* * Insert the intent into the AIL directly and drop one reference so From f850995f60e49818093ef5e477cdb0ff2c11a0a4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 25 Oct 2022 18:18:21 -0700 Subject: [PATCH 104/328] xfs: make sure aglen never goes negative in xfs_refcount_adjust_extents Prior to calling xfs_refcount_adjust_extents, we trimmed agbno/aglen such that the end of the range would not be in the middle of a refcount record. If this is no longer the case, something is seriously wrong with the btree. Bail out with a corruption error. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 64b910caafaa..831353ba96dc 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -986,15 +986,29 @@ xfs_refcount_adjust_extents( (*agbno) += tmp.rc_blockcount; (*aglen) -= tmp.rc_blockcount; + /* Stop if there's nothing left to modify */ + if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) + break; + + /* Move the cursor to the start of ext. */ error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); if (error) goto out_error; } - /* Stop if there's nothing left to modify */ - if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) - break; + /* + * A previous step trimmed agbno/aglen such that the end of the + * range would not be in the middle of the record. If this is + * no longer the case, something is seriously wrong with the + * btree. Make sure we never feed the synthesized record into + * the processing loop below. + */ + if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || + XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { + error = -EFSCORRUPTED; + goto out_error; + } /* * Adjust the reference count and either update the tree From 03a7485cd701e1c08baadcf39d9592d83715e224 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2022 16:39:59 -0700 Subject: [PATCH 105/328] xfs: fix memcpy fortify errors in EFI log format copying Starting in 6.1, CONFIG_FORTIFY_SOURCE checks the length parameter of memcpy. Since we're already fixing problems with BUI item copying, we should fix it everything else. An extra difficulty here is that the ef[id]_extents arrays are declared as single-element arrays. This is not the convention for flex arrays in the modern kernel, and it causes all manner of problems with static checking tools, since they often cannot tell the difference between a single element array and a flex array. So for starters, change those array[1] declarations to array[] declarations to signal that they are proper flex arrays and adjust all the "size-1" expressions to fit the new declaration style. Next, refactor the xfs_efi_copy_format function to handle the copying of the head and the flex array members separately. While we're at it, fix a minor validation deficiency in the recovery function. Signed-off-by: Darrick J. Wong Reviewed-by: Kees Cook Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_log_format.h | 12 ++++++------ fs/xfs/xfs_extfree_item.c | 31 +++++++++++++++++++++---------- fs/xfs/xfs_ondisk.h | 11 +++++++---- fs/xfs/xfs_super.c | 4 ++-- 4 files changed, 36 insertions(+), 22 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index b351b9dc6561..2f41fa8477c9 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -613,7 +613,7 @@ typedef struct xfs_efi_log_format { uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_t efi_extents[1]; /* array of extents to free */ + xfs_extent_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_t; typedef struct xfs_efi_log_format_32 { @@ -621,7 +621,7 @@ typedef struct xfs_efi_log_format_32 { uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_32_t efi_extents[1]; /* array of extents to free */ + xfs_extent_32_t efi_extents[]; /* array of extents to free */ } __attribute__((packed)) xfs_efi_log_format_32_t; typedef struct xfs_efi_log_format_64 { @@ -629,7 +629,7 @@ typedef struct xfs_efi_log_format_64 { uint16_t efi_size; /* size of this item */ uint32_t efi_nextents; /* # extents to free */ uint64_t efi_id; /* efi identifier */ - xfs_extent_64_t efi_extents[1]; /* array of extents to free */ + xfs_extent_64_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_64_t; /* @@ -642,7 +642,7 @@ typedef struct xfs_efd_log_format { uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_t efd_extents[1]; /* array of extents freed */ + xfs_extent_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_t; typedef struct xfs_efd_log_format_32 { @@ -650,7 +650,7 @@ typedef struct xfs_efd_log_format_32 { uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_32_t efd_extents[1]; /* array of extents freed */ + xfs_extent_32_t efd_extents[]; /* array of extents freed */ } __attribute__((packed)) xfs_efd_log_format_32_t; typedef struct xfs_efd_log_format_64 { @@ -658,7 +658,7 @@ typedef struct xfs_efd_log_format_64 { uint16_t efd_size; /* size of this item */ uint32_t efd_nextents; /* # of extents freed */ uint64_t efd_efi_id; /* id of corresponding efi */ - xfs_extent_64_t efd_extents[1]; /* array of extents freed */ + xfs_extent_64_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_64_t; /* diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 27ccfcd82f04..466cc5c5cd33 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -76,7 +76,7 @@ xfs_efi_item_sizeof( struct xfs_efi_log_item *efip) { return sizeof(struct xfs_efi_log_format) + - (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t); + efip->efi_format.efi_nextents * sizeof(xfs_extent_t); } STATIC void @@ -160,7 +160,7 @@ xfs_efi_init( ASSERT(nextents > 0); if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { size = (uint)(sizeof(struct xfs_efi_log_item) + - ((nextents - 1) * sizeof(xfs_extent_t))); + (nextents * sizeof(xfs_extent_t))); efip = kmem_zalloc(size, 0); } else { efip = kmem_cache_zalloc(xfs_efi_cache, @@ -189,14 +189,19 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) xfs_efi_log_format_t *src_efi_fmt = buf->i_addr; uint i; uint len = sizeof(xfs_efi_log_format_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_t); + src_efi_fmt->efi_nextents * sizeof(xfs_extent_t); uint len32 = sizeof(xfs_efi_log_format_32_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_32_t); + src_efi_fmt->efi_nextents * sizeof(xfs_extent_32_t); uint len64 = sizeof(xfs_efi_log_format_64_t) + - (src_efi_fmt->efi_nextents - 1) * sizeof(xfs_extent_64_t); + src_efi_fmt->efi_nextents * sizeof(xfs_extent_64_t); if (buf->i_len == len) { - memcpy((char *)dst_efi_fmt, (char*)src_efi_fmt, len); + memcpy(dst_efi_fmt, src_efi_fmt, + offsetof(struct xfs_efi_log_format, efi_extents)); + for (i = 0; i < src_efi_fmt->efi_nextents; i++) + memcpy(&dst_efi_fmt->efi_extents[i], + &src_efi_fmt->efi_extents[i], + sizeof(struct xfs_extent)); return 0; } else if (buf->i_len == len32) { xfs_efi_log_format_32_t *src_efi_fmt_32 = buf->i_addr; @@ -256,7 +261,7 @@ xfs_efd_item_sizeof( struct xfs_efd_log_item *efdp) { return sizeof(xfs_efd_log_format_t) + - (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t); + efdp->efd_format.efd_nextents * sizeof(xfs_extent_t); } STATIC void @@ -341,7 +346,7 @@ xfs_trans_get_efd( if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + - (nextents - 1) * sizeof(struct xfs_extent), + nextents * sizeof(struct xfs_extent), 0); } else { efdp = kmem_cache_zalloc(xfs_efd_cache, @@ -733,6 +738,12 @@ xlog_recover_efi_commit_pass2( efi_formatp = item->ri_buf[0].i_addr; + if (item->ri_buf[0].i_len < + offsetof(struct xfs_efi_log_format, efi_extents)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; + } + efip = xfs_efi_init(mp, efi_formatp->efi_nextents); error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format); if (error) { @@ -772,9 +783,9 @@ xlog_recover_efd_commit_pass2( efd_formatp = item->ri_buf[0].i_addr; ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_32_t)))) || + (efd_formatp->efd_nextents * sizeof(xfs_extent_32_t)))) || (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + - ((efd_formatp->efd_nextents - 1) * sizeof(xfs_extent_64_t))))); + (efd_formatp->efd_nextents * sizeof(xfs_extent_64_t))))); xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); return 0; diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index 19c1df00b48e..9737b5a9f405 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -118,10 +118,10 @@ xfs_check_ondisk_structs(void) /* log structures */ XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24); - XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28); - XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32); - XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 28); - XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12); XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16); XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176); @@ -146,6 +146,9 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format, efi_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16); /* * The v5 superblock format extended several v4 header structures with diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f029c6702dda..8485e3b37ca0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2029,7 +2029,7 @@ xfs_init_caches(void) xfs_efd_cache = kmem_cache_create("xfs_efd_item", (sizeof(struct xfs_efd_log_item) + - (XFS_EFD_MAX_FAST_EXTENTS - 1) * + XFS_EFD_MAX_FAST_EXTENTS * sizeof(struct xfs_extent)), 0, 0, NULL); if (!xfs_efd_cache) @@ -2037,7 +2037,7 @@ xfs_init_caches(void) xfs_efi_cache = kmem_cache_create("xfs_efi_item", (sizeof(struct xfs_efi_log_item) + - (XFS_EFI_MAX_FAST_EXTENTS - 1) * + XFS_EFI_MAX_FAST_EXTENTS * sizeof(struct xfs_extent)), 0, 0, NULL); if (!xfs_efi_cache) From b65e08f83b119ae9345ed23d4da357a72b3cb55c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 28 Oct 2022 15:48:58 -0700 Subject: [PATCH 106/328] xfs: create a predicate to verify per-AG extents Create a predicate function to verify that a given agbno/blockcount pair fit entirely within a single allocation group and don't suffer mathematical overflows. Refactor the existng open-coded logic; we're going to add more calls to this function in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_ag.h | 15 +++++++++++++++ fs/xfs/libxfs/xfs_alloc.c | 6 +----- fs/xfs/libxfs/xfs_refcount.c | 6 +----- fs/xfs/libxfs/xfs_rmap.c | 9 ++------- fs/xfs/scrub/alloc.c | 4 +--- fs/xfs/scrub/ialloc.c | 5 ++--- fs/xfs/scrub/refcount.c | 5 ++--- 7 files changed, 24 insertions(+), 26 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 517a138faa66..191b22b9a35b 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -133,6 +133,21 @@ xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno) return true; } +static inline bool +xfs_verify_agbext( + struct xfs_perag *pag, + xfs_agblock_t agbno, + xfs_agblock_t len) +{ + if (agbno + len <= agbno) + return false; + + if (!xfs_verify_agbno(pag, agbno)) + return false; + + return xfs_verify_agbno(pag, agbno + len - 1); +} + /* * Verify that an AG inode number pointer neither points outside the AG * nor points at static metadata. diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6261599bb389..de79f5d07f65 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -263,11 +263,7 @@ xfs_alloc_get_rec( goto out_bad_rec; /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, *bno)) - goto out_bad_rec; - if (*bno > *bno + *len) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, *bno + *len - 1)) + if (!xfs_verify_agbext(pag, *bno, *len)) goto out_bad_rec; return 0; diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 831353ba96dc..1a50ca53304a 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -135,11 +135,7 @@ xfs_refcount_get_rec( } /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, realstart)) - goto out_bad_rec; - if (realstart > realstart + irec->rc_blockcount) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, realstart + irec->rc_blockcount - 1)) + if (!xfs_verify_agbext(pag, realstart, irec->rc_blockcount)) goto out_bad_rec; if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 094dfc897ebc..b56aca1e7c66 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -235,13 +235,8 @@ xfs_rmap_get_rec( goto out_bad_rec; } else { /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(pag, irec->rm_startblock)) - goto out_bad_rec; - if (irec->rm_startblock > - irec->rm_startblock + irec->rm_blockcount) - goto out_bad_rec; - if (!xfs_verify_agbno(pag, - irec->rm_startblock + irec->rm_blockcount - 1)) + if (!xfs_verify_agbext(pag, irec->rm_startblock, + irec->rm_blockcount)) goto out_bad_rec; } diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index ab427b4d7fe0..3b38f4e2a537 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -100,9 +100,7 @@ xchk_allocbt_rec( bno = be32_to_cpu(rec->alloc.ar_startblock); len = be32_to_cpu(rec->alloc.ar_blockcount); - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + if (!xfs_verify_agbext(pag, bno, len)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); xchk_allocbt_xref(bs->sc, bno, len); diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index e1026e07bf94..e312be7cd375 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -108,9 +108,8 @@ xchk_iallocbt_chunk( xfs_agblock_t bno; bno = XFS_AGINO_TO_AGBNO(mp, agino); - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + + if (!xfs_verify_agbext(pag, bno, len)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index c68b767dc08f..9959397f797f 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -354,9 +354,8 @@ xchk_refcountbt_rec( /* Check the extent. */ bno &= ~XFS_REFC_COW_START; - if (bno + len <= bno || - !xfs_verify_agbno(pag, bno) || - !xfs_verify_agbno(pag, bno + len - 1)) + + if (!xfs_verify_agbext(pag, bno, len)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (refcount == 0) From 3c5aaaced99912c9fb3352fc5af5b104df67d4aa Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 21 Oct 2022 09:10:05 -0700 Subject: [PATCH 107/328] xfs: refactor all the EFI/EFD log item sizeof logic Refactor all the open-coded sizeof logic for EFI/EFD log item and log format structures into common helper functions whose names reflect the struct names. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_log_format.h | 48 +++++++++++++++++++++++ fs/xfs/xfs_extfree_item.c | 69 ++++++++++------------------------ fs/xfs/xfs_extfree_item.h | 16 ++++++++ fs/xfs/xfs_super.c | 12 ++---- 4 files changed, 88 insertions(+), 57 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 2f41fa8477c9..f13e0809dc63 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -616,6 +616,14 @@ typedef struct xfs_efi_log_format { xfs_extent_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_t; +static inline size_t +xfs_efi_log_format_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format) + + nr * sizeof(struct xfs_extent); +} + typedef struct xfs_efi_log_format_32 { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ @@ -624,6 +632,14 @@ typedef struct xfs_efi_log_format_32 { xfs_extent_32_t efi_extents[]; /* array of extents to free */ } __attribute__((packed)) xfs_efi_log_format_32_t; +static inline size_t +xfs_efi_log_format32_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format_32) + + nr * sizeof(struct xfs_extent_32); +} + typedef struct xfs_efi_log_format_64 { uint16_t efi_type; /* efi log item type */ uint16_t efi_size; /* size of this item */ @@ -632,6 +648,14 @@ typedef struct xfs_efi_log_format_64 { xfs_extent_64_t efi_extents[]; /* array of extents to free */ } xfs_efi_log_format_64_t; +static inline size_t +xfs_efi_log_format64_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efi_log_format_64) + + nr * sizeof(struct xfs_extent_64); +} + /* * This is the structure used to lay out an efd log item in the * log. The efd_extents array is a variable size array whose @@ -645,6 +669,14 @@ typedef struct xfs_efd_log_format { xfs_extent_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_t; +static inline size_t +xfs_efd_log_format_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format) + + nr * sizeof(struct xfs_extent); +} + typedef struct xfs_efd_log_format_32 { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ @@ -653,6 +685,14 @@ typedef struct xfs_efd_log_format_32 { xfs_extent_32_t efd_extents[]; /* array of extents freed */ } __attribute__((packed)) xfs_efd_log_format_32_t; +static inline size_t +xfs_efd_log_format32_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format_32) + + nr * sizeof(struct xfs_extent_32); +} + typedef struct xfs_efd_log_format_64 { uint16_t efd_type; /* efd log item type */ uint16_t efd_size; /* size of this item */ @@ -661,6 +701,14 @@ typedef struct xfs_efd_log_format_64 { xfs_extent_64_t efd_extents[]; /* array of extents freed */ } xfs_efd_log_format_64_t; +static inline size_t +xfs_efd_log_format64_sizeof( + unsigned int nr) +{ + return sizeof(struct xfs_efd_log_format_64) + + nr * sizeof(struct xfs_extent_64); +} + /* * RUI/RUD (reverse mapping) log format definitions */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 466cc5c5cd33..f7e52db8da66 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -66,27 +66,16 @@ xfs_efi_release( xfs_efi_item_free(efip); } -/* - * This returns the number of iovecs needed to log the given efi item. - * We only need 1 iovec for an efi item. It just logs the efi_log_format - * structure. - */ -static inline int -xfs_efi_item_sizeof( - struct xfs_efi_log_item *efip) -{ - return sizeof(struct xfs_efi_log_format) + - efip->efi_format.efi_nextents * sizeof(xfs_extent_t); -} - STATIC void xfs_efi_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { + struct xfs_efi_log_item *efip = EFI_ITEM(lip); + *nvecs += 1; - *nbytes += xfs_efi_item_sizeof(EFI_ITEM(lip)); + *nbytes += xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents); } /* @@ -112,7 +101,7 @@ xfs_efi_item_format( xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format, - xfs_efi_item_sizeof(efip)); + xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents)); } @@ -155,13 +144,11 @@ xfs_efi_init( { struct xfs_efi_log_item *efip; - uint size; ASSERT(nextents > 0); if (nextents > XFS_EFI_MAX_FAST_EXTENTS) { - size = (uint)(sizeof(struct xfs_efi_log_item) + - (nextents * sizeof(xfs_extent_t))); - efip = kmem_zalloc(size, 0); + efip = kzalloc(xfs_efi_log_item_sizeof(nextents), + GFP_KERNEL | __GFP_NOFAIL); } else { efip = kmem_cache_zalloc(xfs_efi_cache, GFP_KERNEL | __GFP_NOFAIL); @@ -188,12 +175,9 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) { xfs_efi_log_format_t *src_efi_fmt = buf->i_addr; uint i; - uint len = sizeof(xfs_efi_log_format_t) + - src_efi_fmt->efi_nextents * sizeof(xfs_extent_t); - uint len32 = sizeof(xfs_efi_log_format_32_t) + - src_efi_fmt->efi_nextents * sizeof(xfs_extent_32_t); - uint len64 = sizeof(xfs_efi_log_format_64_t) + - src_efi_fmt->efi_nextents * sizeof(xfs_extent_64_t); + uint len = xfs_efi_log_format_sizeof(src_efi_fmt->efi_nextents); + uint len32 = xfs_efi_log_format32_sizeof(src_efi_fmt->efi_nextents); + uint len64 = xfs_efi_log_format64_sizeof(src_efi_fmt->efi_nextents); if (buf->i_len == len) { memcpy(dst_efi_fmt, src_efi_fmt, @@ -251,27 +235,16 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp) kmem_cache_free(xfs_efd_cache, efdp); } -/* - * This returns the number of iovecs needed to log the given efd item. - * We only need 1 iovec for an efd item. It just logs the efd_log_format - * structure. - */ -static inline int -xfs_efd_item_sizeof( - struct xfs_efd_log_item *efdp) -{ - return sizeof(xfs_efd_log_format_t) + - efdp->efd_format.efd_nextents * sizeof(xfs_extent_t); -} - STATIC void xfs_efd_item_size( struct xfs_log_item *lip, int *nvecs, int *nbytes) { + struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + *nvecs += 1; - *nbytes += xfs_efd_item_sizeof(EFD_ITEM(lip)); + *nbytes += xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents); } /* @@ -296,7 +269,7 @@ xfs_efd_item_format( xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format, - xfs_efd_item_sizeof(efdp)); + xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents)); } /* @@ -345,9 +318,8 @@ xfs_trans_get_efd( ASSERT(nextents > 0); if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { - efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + - nextents * sizeof(struct xfs_extent), - 0); + efdp = kzalloc(xfs_efd_log_item_sizeof(nextents), + GFP_KERNEL | __GFP_NOFAIL); } else { efdp = kmem_cache_zalloc(xfs_efd_cache, GFP_KERNEL | __GFP_NOFAIL); @@ -738,8 +710,7 @@ xlog_recover_efi_commit_pass2( efi_formatp = item->ri_buf[0].i_addr; - if (item->ri_buf[0].i_len < - offsetof(struct xfs_efi_log_format, efi_extents)) { + if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; } @@ -782,10 +753,10 @@ xlog_recover_efd_commit_pass2( struct xfs_efd_log_format *efd_formatp; efd_formatp = item->ri_buf[0].i_addr; - ASSERT((item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_32_t) + - (efd_formatp->efd_nextents * sizeof(xfs_extent_32_t)))) || - (item->ri_buf[0].i_len == (sizeof(xfs_efd_log_format_64_t) + - (efd_formatp->efd_nextents * sizeof(xfs_extent_64_t))))); + ASSERT(item->ri_buf[0].i_len == xfs_efd_log_format32_sizeof( + efd_formatp->efd_nextents) || + item->ri_buf[0].i_len == xfs_efd_log_format64_sizeof( + efd_formatp->efd_nextents)); xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); return 0; diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 186d0f2137f1..da6a5afa607c 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -52,6 +52,14 @@ struct xfs_efi_log_item { xfs_efi_log_format_t efi_format; }; +static inline size_t +xfs_efi_log_item_sizeof( + unsigned int nr) +{ + return offsetof(struct xfs_efi_log_item, efi_format) + + xfs_efi_log_format_sizeof(nr); +} + /* * This is the "extent free done" log item. It is used to log * the fact that some extents earlier mentioned in an efi item @@ -64,6 +72,14 @@ struct xfs_efd_log_item { xfs_efd_log_format_t efd_format; }; +static inline size_t +xfs_efd_log_item_sizeof( + unsigned int nr) +{ + return offsetof(struct xfs_efd_log_item, efd_format) + + xfs_efd_log_format_sizeof(nr); +} + /* * Max number of extents in fast allocation path. */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 8485e3b37ca0..ee4b429a2f2c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2028,18 +2028,14 @@ xfs_init_caches(void) goto out_destroy_trans_cache; xfs_efd_cache = kmem_cache_create("xfs_efd_item", - (sizeof(struct xfs_efd_log_item) + - XFS_EFD_MAX_FAST_EXTENTS * - sizeof(struct xfs_extent)), - 0, 0, NULL); + xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), + 0, 0, NULL); if (!xfs_efd_cache) goto out_destroy_buf_item_cache; xfs_efi_cache = kmem_cache_create("xfs_efi_item", - (sizeof(struct xfs_efi_log_item) + - XFS_EFI_MAX_FAST_EXTENTS * - sizeof(struct xfs_extent)), - 0, 0, NULL); + xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), + 0, 0, NULL); if (!xfs_efi_cache) goto out_destroy_efd_cache; From 8edbe0cf8b4bbe2cf47513998641797b0aca8ee2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 10 Oct 2022 11:33:47 -0700 Subject: [PATCH 108/328] xfs: check deferred refcount op continuation parameters If we're in the middle of a deferred refcount operation and decide to roll the transaction to avoid overflowing the transaction space, we need to check the new agbno/aglen parameters that we're about to record in the new intent. Specifically, we need to check that the new extent is completely within the filesystem, and that continuation does not put us into a different AG. If the keys of a node block are wrong, the lookup to resume an xfs_refcount_adjust_extents operation can put us into the wrong record block. If this happens, we might not find that we run out of aglen at an exact record boundary, which will cause the loop control to do the wrong thing. The previous patch should take care of that problem, but let's add this extra sanity check to stop corruption problems sooner than later. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 38 ++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 1a50ca53304a..542f749d0c6a 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1134,6 +1134,32 @@ xfs_refcount_finish_one_cleanup( xfs_trans_brelse(tp, agbp); } +/* + * Set up a continuation a deferred refcount operation by updating the intent. + * Checks to make sure we're not going to run off the end of the AG. + */ +static inline int +xfs_refcount_continue_op( + struct xfs_btree_cur *cur, + xfs_fsblock_t startblock, + xfs_agblock_t new_agbno, + xfs_extlen_t new_len, + xfs_fsblock_t *new_fsbno) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_perag *pag = cur->bc_ag.pag; + + if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len))) + return -EFSCORRUPTED; + + *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + + ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len)); + ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno)); + + return 0; +} + /* * Process one of the deferred refcount operations. We pass back the * btree cursor to maintain our lock on the btree between calls. @@ -1201,12 +1227,20 @@ xfs_refcount_finish_one( case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, new_len, XFS_REFCOUNT_ADJUST_INCREASE); - *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + if (error) + goto out_drop; + if (*new_len > 0) + error = xfs_refcount_continue_op(rcur, startblock, + new_agbno, *new_len, new_fsb); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, new_len, XFS_REFCOUNT_ADJUST_DECREASE); - *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + if (error) + goto out_drop; + if (*new_len > 0) + error = xfs_refcount_continue_op(rcur, startblock, + new_agbno, *new_len, new_fsb); break; case XFS_REFCOUNT_ALLOC_COW: *new_fsb = startblock + blockcount; From 921ed96b4f4e3bd19da7f775f39234226e6647e7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 25 Oct 2022 15:14:06 -0700 Subject: [PATCH 109/328] xfs: actually abort log recovery on corrupt intent-done log items If log recovery picks up intent-done log items that are not of the correct size it needs to abort recovery and fail the mount. Debug assertions are not good enough. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_extfree_item.c | 20 ++++++++++++++++---- fs/xfs/xfs_rmap_item.c | 6 +++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f7e52db8da66..18c224351343 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -751,12 +751,24 @@ xlog_recover_efd_commit_pass2( xfs_lsn_t lsn) { struct xfs_efd_log_format *efd_formatp; + int buflen = item->ri_buf[0].i_len; efd_formatp = item->ri_buf[0].i_addr; - ASSERT(item->ri_buf[0].i_len == xfs_efd_log_format32_sizeof( - efd_formatp->efd_nextents) || - item->ri_buf[0].i_len == xfs_efd_log_format64_sizeof( - efd_formatp->efd_nextents)); + + if (buflen < sizeof(struct xfs_efd_log_format)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + efd_formatp, buflen); + return -EFSCORRUPTED; + } + + if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof( + efd_formatp->efd_nextents) && + item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof( + efd_formatp->efd_nextents)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + efd_formatp, buflen); + return -EFSCORRUPTED; + } xlog_recover_release_intent(log, XFS_LI_EFI, efd_formatp->efd_efi_id); return 0; diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 27047e73f582..5a360c384ea5 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -707,7 +707,11 @@ xlog_recover_rud_commit_pass2( struct xfs_rud_log_format *rud_formatp; rud_formatp = item->ri_buf[0].i_addr; - ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format)); + if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + rud_formatp, item->ri_buf[0].i_len); + return -EFSCORRUPTED; + } xlog_recover_release_intent(log, XFS_LI_RUI, rud_formatp->rud_rui_id); return 0; From 9e7e2436c159490fbbadbc4b5a4ee6bc30dae02e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 10 Oct 2022 08:47:59 -0700 Subject: [PATCH 110/328] xfs: move _irec structs to xfs_types.h Structure definitions for incore objects do not belong in the ondisk format header. Move them to the incore types header where they belong. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_format.h | 20 -------------------- fs/xfs/libxfs/xfs_types.h | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index b55bdfa9c8a8..005dd65b71cd 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1564,20 +1564,6 @@ struct xfs_rmap_rec { #define RMAPBT_UNUSED_OFFSET_BITLEN 7 #define RMAPBT_OFFSET_BITLEN 54 -#define XFS_RMAP_ATTR_FORK (1 << 0) -#define XFS_RMAP_BMBT_BLOCK (1 << 1) -#define XFS_RMAP_UNWRITTEN (1 << 2) -#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \ - XFS_RMAP_BMBT_BLOCK) -#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN) -struct xfs_rmap_irec { - xfs_agblock_t rm_startblock; /* extent start block */ - xfs_extlen_t rm_blockcount; /* extent length */ - uint64_t rm_owner; /* extent owner */ - uint64_t rm_offset; /* offset within the owner */ - unsigned int rm_flags; /* state flags */ -}; - /* * Key structure * @@ -1640,12 +1626,6 @@ struct xfs_refcount_key { __be32 rc_startblock; /* starting block number */ }; -struct xfs_refcount_irec { - xfs_agblock_t rc_startblock; /* starting block number */ - xfs_extlen_t rc_blockcount; /* count of free blocks */ - xfs_nlink_t rc_refcount; /* number of inodes linked here */ -}; - #define MAXREFCOUNT ((xfs_nlink_t)~0U) #define MAXREFCEXTLEN ((xfs_extlen_t)~0U) diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index a6b7d98cf68f..2d9ebc7338b1 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -166,6 +166,26 @@ typedef struct xfs_bmbt_irec xfs_exntst_t br_state; /* extent state */ } xfs_bmbt_irec_t; +struct xfs_refcount_irec { + xfs_agblock_t rc_startblock; /* starting block number */ + xfs_extlen_t rc_blockcount; /* count of free blocks */ + xfs_nlink_t rc_refcount; /* number of inodes linked here */ +}; + +#define XFS_RMAP_ATTR_FORK (1 << 0) +#define XFS_RMAP_BMBT_BLOCK (1 << 1) +#define XFS_RMAP_UNWRITTEN (1 << 2) +#define XFS_RMAP_KEY_FLAGS (XFS_RMAP_ATTR_FORK | \ + XFS_RMAP_BMBT_BLOCK) +#define XFS_RMAP_REC_FLAGS (XFS_RMAP_UNWRITTEN) +struct xfs_rmap_irec { + xfs_agblock_t rm_startblock; /* extent start block */ + xfs_extlen_t rm_blockcount; /* extent length */ + uint64_t rm_owner; /* extent owner */ + uint64_t rm_offset; /* offset within the owner */ + unsigned int rm_flags; /* state flags */ +}; + /* per-AG block reservation types */ enum xfs_ag_resv_type { XFS_AG_RESV_NONE = 0, From 950f0d50ee7138d7e631aefea8528d485426eda6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 25 Oct 2022 15:07:14 -0700 Subject: [PATCH 111/328] xfs: dump corrupt recovered log intent items to dmesg consistently If log recovery decides that an intent item is corrupt and wants to abort the mount, capture a hexdump of the corrupt log item in the kernel log for further analysis. Some of the log item code already did this, so we're fixing the rest to do it consistently. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/xfs_attr_item.c | 19 +++++++++++++------ fs/xfs/xfs_bmap_item.c | 12 ++++++++---- fs/xfs/xfs_extfree_item.c | 6 ++++-- fs/xfs/xfs_refcount_item.c | 16 +++++++++++----- fs/xfs/xfs_rmap_item.c | 10 +++++++--- 5 files changed, 43 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index ee8f678a10a1..2788a6f2edcd 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -717,31 +717,37 @@ xlog_recover_attri_commit_pass2( /* Validate xfs_attri_log_format before the large memory allocation */ len = sizeof(struct xfs_attri_log_format); if (item->ri_buf[0].i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } if (!xfs_attri_validate(mp, attri_formatp)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } /* Validate the attr name */ if (item->ri_buf[1].i_len != xlog_calc_iovec_len(attri_formatp->alfi_name_len)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[1].i_addr, item->ri_buf[1].i_len); return -EFSCORRUPTED; } /* Validate the attr value, if present */ if (attri_formatp->alfi_value_len != 0) { if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, + item->ri_buf[0].i_len); return -EFSCORRUPTED; } @@ -834,7 +840,8 @@ xlog_recover_attrd_commit_pass2( attrd_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_attrd_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index a1da6205252b..41323da523d1 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -644,18 +644,21 @@ xlog_recover_bui_commit_pass2( bui_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len < xfs_bui_log_format_sizeof(0)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } len = xfs_bui_log_format_sizeof(bui_formatp->bui_nextents); if (item->ri_buf[0].i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } @@ -694,7 +697,8 @@ xlog_recover_bud_commit_pass2( bud_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 18c224351343..d5130d1fcfae 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -216,7 +216,8 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt) } return 0; } - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, NULL, buf->i_addr, + buf->i_len); return -EFSCORRUPTED; } @@ -711,7 +712,8 @@ xlog_recover_efi_commit_pass2( efi_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 24cf4c64ebaa..858e3e9eb4a8 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -523,7 +523,9 @@ xfs_cui_item_recover( type = refc_type; break; default: - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &cuip->cui_format, + sizeof(cuip->cui_format)); error = -EFSCORRUPTED; goto abort_error; } @@ -536,7 +538,8 @@ xfs_cui_item_recover( &new_fsb, &new_len, &rcur); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - refc, sizeof(*refc)); + &cuip->cui_format, + sizeof(cuip->cui_format)); if (error) goto abort_error; @@ -658,13 +661,15 @@ xlog_recover_cui_commit_pass2( cui_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); if (item->ri_buf[0].i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } @@ -703,7 +708,8 @@ xlog_recover_cud_commit_pass2( cud_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 5a360c384ea5..534504ede1a3 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -557,7 +557,9 @@ xfs_rui_item_recover( type = XFS_RMAP_FREE; break; default: - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &ruip->rui_format, + sizeof(ruip->rui_format)); error = -EFSCORRUPTED; goto abort_error; } @@ -663,13 +665,15 @@ xlog_recover_rui_commit_pass2( rui_formatp = item->ri_buf[0].i_addr; if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents); if (item->ri_buf[0].i_len != len) { - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + item->ri_buf[0].i_addr, item->ri_buf[0].i_len); return -EFSCORRUPTED; } From 5a8c345ca8b99a9f54b89991f2f6a20521cb05f4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 11 Oct 2022 11:22:54 -0700 Subject: [PATCH 112/328] xfs: refactor refcount record usage in xchk_refcountbt_rec Consolidate the open-coded xfs_refcount_irec fields into an actual struct and use the existing _btrec_to_irec to decode the ondisk record. This will reduce code churn in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/scrub/refcount.c | 54 ++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 9959397f797f..9e6b36ac8079 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -269,15 +269,13 @@ done: STATIC void xchk_refcountbt_xref_rmap( struct xfs_scrub *sc, - xfs_agblock_t bno, - xfs_extlen_t len, - xfs_nlink_t refcount) + const struct xfs_refcount_irec *irec) { struct xchk_refcnt_check refchk = { - .sc = sc, - .bno = bno, - .len = len, - .refcount = refcount, + .sc = sc, + .bno = irec->rc_startblock, + .len = irec->rc_blockcount, + .refcount = irec->rc_refcount, .seen = 0, }; struct xfs_rmap_irec low; @@ -291,9 +289,9 @@ xchk_refcountbt_xref_rmap( /* Cross-reference with the rmapbt to confirm the refcount. */ memset(&low, 0, sizeof(low)); - low.rm_startblock = bno; + low.rm_startblock = irec->rc_startblock; memset(&high, 0xFF, sizeof(high)); - high.rm_startblock = bno + len - 1; + high.rm_startblock = irec->rc_startblock + irec->rc_blockcount - 1; INIT_LIST_HEAD(&refchk.fragments); error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, @@ -302,7 +300,7 @@ xchk_refcountbt_xref_rmap( goto out_free; xchk_refcountbt_process_rmap_fragments(&refchk); - if (refcount != refchk.seen) + if (irec->rc_refcount != refchk.seen) xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); out_free: @@ -315,17 +313,16 @@ out_free: /* Cross-reference with the other btrees. */ STATIC void xchk_refcountbt_xref( - struct xfs_scrub *sc, - xfs_agblock_t agbno, - xfs_extlen_t len, - xfs_nlink_t refcount) + struct xfs_scrub *sc, + const struct xfs_refcount_irec *irec) { if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) return; - xchk_xref_is_used_space(sc, agbno, len); - xchk_xref_is_not_inode_chunk(sc, agbno, len); - xchk_refcountbt_xref_rmap(sc, agbno, len, refcount); + xchk_xref_is_used_space(sc, irec->rc_startblock, irec->rc_blockcount); + xchk_xref_is_not_inode_chunk(sc, irec->rc_startblock, + irec->rc_blockcount); + xchk_refcountbt_xref_rmap(sc, irec); } /* Scrub a refcountbt record. */ @@ -334,34 +331,31 @@ xchk_refcountbt_rec( struct xchk_btree *bs, const union xfs_btree_rec *rec) { + struct xfs_refcount_irec irec; xfs_agblock_t *cow_blocks = bs->private; struct xfs_perag *pag = bs->cur->bc_ag.pag; - xfs_agblock_t bno; - xfs_extlen_t len; - xfs_nlink_t refcount; bool has_cowflag; - bno = be32_to_cpu(rec->refc.rc_startblock); - len = be32_to_cpu(rec->refc.rc_blockcount); - refcount = be32_to_cpu(rec->refc.rc_refcount); + xfs_refcount_btrec_to_irec(rec, &irec); /* Only CoW records can have refcount == 1. */ - has_cowflag = (bno & XFS_REFC_COW_START); - if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) + has_cowflag = (irec.rc_startblock & XFS_REFC_COW_START); + if ((irec.rc_refcount == 1 && !has_cowflag) || + (irec.rc_refcount != 1 && has_cowflag)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (has_cowflag) - (*cow_blocks) += len; + (*cow_blocks) += irec.rc_blockcount; /* Check the extent. */ - bno &= ~XFS_REFC_COW_START; + irec.rc_startblock &= ~XFS_REFC_COW_START; - if (!xfs_verify_agbext(pag, bno, len)) + if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (refcount == 0) + if (irec.rc_refcount == 0) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - xchk_refcountbt_xref(bs->sc, bno, len, refcount); + xchk_refcountbt_xref(bs->sc, &irec); return 0; } From 9a50ee4f8db6e4dd0d8d757b7adaf0591776860a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 10 Oct 2022 09:06:24 -0700 Subject: [PATCH 113/328] xfs: track cow/shared record domains explicitly in xfs_refcount_irec Just prior to committing the reflink code into upstream, the xfs maintainer at the time requested that I find a way to shard the refcount records into two domains -- one for records tracking shared extents, and a second for tracking CoW staging extents. The idea here was to minimize mount time CoW reclamation by pushing all the CoW records to the right edge of the keyspace, and it was accomplished by setting the upper bit in rc_startblock. We don't allow AGs to have more than 2^31 blocks, so the bit was free. Unfortunately, this was a very late addition to the codebase, so most of the refcount record processing code still treats rc_startblock as a u32 and pays no attention to whether or not the upper bit (the cow flag) is set. This is a weakness is theoretically exploitable, since we're not fully validating the incoming metadata records. Fuzzing demonstrates practical exploits of this weakness. If the cow flag of a node block key record is corrupted, a lookup operation can go to the wrong record block and start returning records from the wrong cow/shared domain. This causes the math to go all wrong (since cow domain is still implicit in the upper bit of rc_startblock) and we can crash the kernel by tricking xfs into jumping into a nonexistent AG and tripping over xfs_perag_get(mp, ) returning NULL. To fix this, start tracking the domain as an explicit part of struct xfs_refcount_irec, adjust all refcount functions to check the domain of a returned record, and alter the function definitions to accept them where necessary. Found by fuzzing keys[2].cowflag = add in xfs/464. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 146 +++++++++++++++++++---------- fs/xfs/libxfs/xfs_refcount.h | 28 +++++- fs/xfs/libxfs/xfs_refcount_btree.c | 15 ++- fs/xfs/libxfs/xfs_types.h | 6 ++ fs/xfs/scrub/refcount.c | 23 ++--- 5 files changed, 151 insertions(+), 67 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 542f749d0c6a..0f920eff34c4 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -46,13 +46,16 @@ STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, int xfs_refcount_lookup_le( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); } @@ -63,13 +66,16 @@ xfs_refcount_lookup_le( int xfs_refcount_lookup_ge( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_GE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); } @@ -80,13 +86,16 @@ xfs_refcount_lookup_ge( int xfs_refcount_lookup_eq( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat) { - trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, + trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, + xfs_refcount_encode_startblock(bno, domain), XFS_LOOKUP_LE); cur->bc_rec.rc.rc_startblock = bno; cur->bc_rec.rc.rc_blockcount = 0; + cur->bc_rec.rc.rc_domain = domain; return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); } @@ -96,7 +105,17 @@ xfs_refcount_btrec_to_irec( const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec) { - irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); + uint32_t start; + + start = be32_to_cpu(rec->refc.rc_startblock); + if (start & XFS_REFC_COW_START) { + start &= ~XFS_REFC_COW_START; + irec->rc_domain = XFS_REFC_DOMAIN_COW; + } else { + irec->rc_domain = XFS_REFC_DOMAIN_SHARED; + } + + irec->rc_startblock = start; irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); } @@ -114,7 +133,6 @@ xfs_refcount_get_rec( struct xfs_perag *pag = cur->bc_ag.pag; union xfs_btree_rec *rec; int error; - xfs_agblock_t realstart; error = xfs_btree_get_rec(cur, &rec, stat); if (error || !*stat) @@ -124,18 +142,14 @@ xfs_refcount_get_rec( if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) goto out_bad_rec; - /* handle special COW-staging state */ - realstart = irec->rc_startblock; - if (realstart & XFS_REFC_COW_START) { - if (irec->rc_refcount != 1) - goto out_bad_rec; - realstart &= ~XFS_REFC_COW_START; - } else if (irec->rc_refcount < 2) { + /* handle special COW-staging domain */ + if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1) + goto out_bad_rec; + if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2) goto out_bad_rec; - } /* check for valid extent range, including overflow */ - if (!xfs_verify_agbext(pag, realstart, irec->rc_blockcount)) + if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) goto out_bad_rec; if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) @@ -165,12 +179,17 @@ xfs_refcount_update( struct xfs_refcount_irec *irec) { union xfs_btree_rec rec; + uint32_t start; int error; trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); - rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); + + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + rec.refc.rc_startblock = cpu_to_be32(start); rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); + error = xfs_btree_update(cur, &rec); if (error) trace_xfs_refcount_update_error(cur->bc_mp, @@ -192,9 +211,12 @@ xfs_refcount_insert( int error; trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); + cur->bc_rec.rc.rc_startblock = irec->rc_startblock; cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; cur->bc_rec.rc.rc_refcount = irec->rc_refcount; + cur->bc_rec.rc.rc_domain = irec->rc_domain; + error = xfs_btree_insert(cur, i); if (error) goto out_error; @@ -240,7 +262,8 @@ xfs_refcount_delete( } if (error) goto out_error; - error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); + error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, + &found_rec); out_error: if (error) trace_xfs_refcount_delete_error(cur->bc_mp, @@ -339,6 +362,7 @@ xfs_refc_next( STATIC int xfs_refcount_split_extent( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t agbno, bool *shape_changed) { @@ -347,7 +371,7 @@ xfs_refcount_split_extent( int error; *shape_changed = false; - error = xfs_refcount_lookup_le(cur, agbno, &found_rec); + error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); if (error) goto out_error; if (!found_rec) @@ -419,8 +443,8 @@ xfs_refcount_merge_center_extents( * call removes the center and the second one removes the right * extent. */ - error = xfs_refcount_lookup_ge(cur, center->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_ge(cur, center->rc_domain, + center->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -447,8 +471,8 @@ xfs_refcount_merge_center_extents( } /* Enlarge the left extent. */ - error = xfs_refcount_lookup_le(cur, left->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, left->rc_domain, + left->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -489,8 +513,8 @@ xfs_refcount_merge_left_extent( /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ if (cleft->rc_refcount > 1) { - error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, cleft->rc_domain, + cleft->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -508,8 +532,8 @@ xfs_refcount_merge_left_extent( } /* Enlarge the left extent. */ - error = xfs_refcount_lookup_le(cur, left->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, left->rc_domain, + left->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -553,8 +577,8 @@ xfs_refcount_merge_right_extent( * remove it. */ if (cright->rc_refcount > 1) { - error = xfs_refcount_lookup_le(cur, cright->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, cright->rc_domain, + cright->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -572,8 +596,8 @@ xfs_refcount_merge_right_extent( } /* Enlarge the right extent. */ - error = xfs_refcount_lookup_le(cur, right->rc_startblock, - &found_rec); + error = xfs_refcount_lookup_le(cur, right->rc_domain, + right->rc_startblock, &found_rec); if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { @@ -612,11 +636,17 @@ xfs_refcount_find_left_extents( int flags) { struct xfs_refcount_irec tmp; + enum xfs_refc_domain domain; int error; int found_rec; + if (flags & XFS_FIND_RCEXT_SHARED) + domain = XFS_REFC_DOMAIN_SHARED; + else + domain = XFS_REFC_DOMAIN_COW; + left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; - error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); + error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); if (error) goto out_error; if (!found_rec) @@ -667,6 +697,7 @@ xfs_refcount_find_left_extents( cleft->rc_blockcount = min(aglen, tmp.rc_startblock - agbno); cleft->rc_refcount = 1; + cleft->rc_domain = domain; } } else { /* @@ -676,6 +707,7 @@ xfs_refcount_find_left_extents( cleft->rc_startblock = agbno; cleft->rc_blockcount = aglen; cleft->rc_refcount = 1; + cleft->rc_domain = domain; } trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, cleft, agbno); @@ -701,11 +733,17 @@ xfs_refcount_find_right_extents( int flags) { struct xfs_refcount_irec tmp; + enum xfs_refc_domain domain; int error; int found_rec; + if (flags & XFS_FIND_RCEXT_SHARED) + domain = XFS_REFC_DOMAIN_SHARED; + else + domain = XFS_REFC_DOMAIN_COW; + right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; - error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); + error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); if (error) goto out_error; if (!found_rec) @@ -756,6 +794,7 @@ xfs_refcount_find_right_extents( cright->rc_blockcount = right->rc_startblock - cright->rc_startblock; cright->rc_refcount = 1; + cright->rc_domain = domain; } } else { /* @@ -765,6 +804,7 @@ xfs_refcount_find_right_extents( cright->rc_startblock = agbno; cright->rc_blockcount = aglen; cright->rc_refcount = 1; + cright->rc_domain = domain; } trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, cright, right, agbno + aglen); @@ -929,7 +969,8 @@ xfs_refcount_adjust_extents( if (*aglen == 0) return 0; - error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); + error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, + &found_rec); if (error) goto out_error; @@ -941,6 +982,7 @@ xfs_refcount_adjust_extents( ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; + ext.rc_domain = XFS_REFC_DOMAIN_SHARED; } /* @@ -953,6 +995,8 @@ xfs_refcount_adjust_extents( tmp.rc_blockcount = min(*aglen, ext.rc_startblock - *agbno); tmp.rc_refcount = 1 + adj; + tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; + trace_xfs_refcount_modify_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &tmp); @@ -987,7 +1031,8 @@ xfs_refcount_adjust_extents( break; /* Move the cursor to the start of ext. */ - error = xfs_refcount_lookup_ge(cur, *agbno, + error = xfs_refcount_lookup_ge(cur, + XFS_REFC_DOMAIN_SHARED, *agbno, &found_rec); if (error) goto out_error; @@ -1080,13 +1125,15 @@ xfs_refcount_adjust( /* * Ensure that no rcextents cross the boundary of the adjustment range. */ - error = xfs_refcount_split_extent(cur, agbno, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, + agbno, &shape_changed); if (error) goto out_error; if (shape_changed) shape_changes++; - error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, + agbno + aglen, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1351,7 +1398,8 @@ xfs_refcount_find_shared( *flen = 0; /* Try to find a refcount extent that crosses the start */ - error = xfs_refcount_lookup_le(cur, agbno, &have); + error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, + &have); if (error) goto out_error; if (!have) { @@ -1499,17 +1547,18 @@ xfs_refcount_adjust_cow_extents( return 0; /* Find any overlapping refcount records */ - error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); + error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, + &found_rec); if (error) goto out_error; error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; if (!found_rec) { - ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + - XFS_REFC_COW_START; + ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; + ext.rc_domain = XFS_REFC_DOMAIN_COW; } switch (adj) { @@ -1524,6 +1573,8 @@ xfs_refcount_adjust_cow_extents( tmp.rc_startblock = agbno; tmp.rc_blockcount = aglen; tmp.rc_refcount = 1; + tmp.rc_domain = XFS_REFC_DOMAIN_COW; + trace_xfs_refcount_modify_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &tmp); @@ -1586,16 +1637,16 @@ xfs_refcount_adjust_cow( bool shape_changed; int error; - agbno += XFS_REFC_COW_START; - /* * Ensure that no rcextents cross the boundary of the adjustment range. */ - error = xfs_refcount_split_extent(cur, agbno, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, + agbno, &shape_changed); if (error) goto out_error; - error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, + agbno + aglen, &shape_changed); if (error) goto out_error; @@ -1731,7 +1782,6 @@ xfs_refcount_recover_cow_leftovers( union xfs_btree_irec low; union xfs_btree_irec high; xfs_fsblock_t fsb; - xfs_agblock_t agbno; int error; if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) @@ -1761,7 +1811,7 @@ xfs_refcount_recover_cow_leftovers( /* Find all the leftover CoW staging extents. */ memset(&low, 0, sizeof(low)); memset(&high, 0, sizeof(high)); - low.rc.rc_startblock = XFS_REFC_COW_START; + low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW; high.rc.rc_startblock = -1U; error = xfs_btree_query_range(cur, &low, &high, xfs_refcount_recover_extent, &debris); @@ -1782,8 +1832,8 @@ xfs_refcount_recover_cow_leftovers( &rr->rr_rrec); /* Free the orphan record */ - agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; - fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno); + fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, + rr->rr_rrec.rc_startblock); xfs_refcount_free_cow_extent(tp, fsb, rr->rr_rrec.rc_blockcount); @@ -1814,6 +1864,7 @@ out_free: int xfs_refcount_has_record( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t bno, xfs_extlen_t len, bool *exists) @@ -1825,6 +1876,7 @@ xfs_refcount_has_record( low.rc.rc_startblock = bno; memset(&high, 0xFF, sizeof(high)); high.rc.rc_startblock = bno + len - 1; + low.rc.rc_domain = high.rc.rc_domain = domain; return xfs_btree_has_record(cur, &low, &high, exists); } diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index e8b322de7f3d..3beb5a30a9c9 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -14,14 +14,33 @@ struct xfs_bmbt_irec; struct xfs_refcount_irec; extern int xfs_refcount_lookup_le(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur, - xfs_agblock_t bno, int *stat); + enum xfs_refc_domain domain, xfs_agblock_t bno, int *stat); extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); +static inline uint32_t +xfs_refcount_encode_startblock( + xfs_agblock_t startblock, + enum xfs_refc_domain domain) +{ + uint32_t start; + + /* + * low level btree operations need to handle the generic btree range + * query functions (which set rc_domain == -1U), so we check that the + * domain is /not/ shared. + */ + start = startblock & ~XFS_REFC_COW_START; + if (domain != XFS_REFC_DOMAIN_SHARED) + start |= XFS_REFC_COW_START; + + return start; +} + enum xfs_refcount_intent_type { XFS_REFCOUNT_INCREASE = 1, XFS_REFCOUNT_DECREASE, @@ -79,7 +98,8 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp, #define XFS_REFCOUNT_ITEM_OVERHEAD 32 extern int xfs_refcount_has_record(struct xfs_btree_cur *cur, - xfs_agblock_t bno, xfs_extlen_t len, bool *exists); + enum xfs_refc_domain domain, xfs_agblock_t bno, + xfs_extlen_t len, bool *exists); union xfs_btree_rec; extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 316c1ec0c3c2..e1f789866683 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -13,6 +13,7 @@ #include "xfs_btree.h" #include "xfs_btree_staging.h" #include "xfs_refcount_btree.h" +#include "xfs_refcount.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -160,7 +161,12 @@ xfs_refcountbt_init_rec_from_cur( struct xfs_btree_cur *cur, union xfs_btree_rec *rec) { - rec->refc.rc_startblock = cpu_to_be32(cur->bc_rec.rc.rc_startblock); + const struct xfs_refcount_irec *irec = &cur->bc_rec.rc; + uint32_t start; + + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + rec->refc.rc_startblock = cpu_to_be32(start); rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount); rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount); } @@ -182,10 +188,13 @@ xfs_refcountbt_key_diff( struct xfs_btree_cur *cur, const union xfs_btree_key *key) { - struct xfs_refcount_irec *rec = &cur->bc_rec.rc; const struct xfs_refcount_key *kp = &key->refc; + const struct xfs_refcount_irec *irec = &cur->bc_rec.rc; + uint32_t start; - return (int64_t)be32_to_cpu(kp->rc_startblock) - rec->rc_startblock; + start = xfs_refcount_encode_startblock(irec->rc_startblock, + irec->rc_domain); + return (int64_t)be32_to_cpu(kp->rc_startblock) - start; } STATIC int64_t diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 2d9ebc7338b1..eb9a98338bb9 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -166,10 +166,16 @@ typedef struct xfs_bmbt_irec xfs_exntst_t br_state; /* extent state */ } xfs_bmbt_irec_t; +enum xfs_refc_domain { + XFS_REFC_DOMAIN_SHARED = 0, + XFS_REFC_DOMAIN_COW, +}; + struct xfs_refcount_irec { xfs_agblock_t rc_startblock; /* starting block number */ xfs_extlen_t rc_blockcount; /* count of free blocks */ xfs_nlink_t rc_refcount; /* number of inodes linked here */ + enum xfs_refc_domain rc_domain; /* shared or cow staging extent? */ }; #define XFS_RMAP_ATTR_FORK (1 << 0) diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 9e6b36ac8079..af5b796ec9ec 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -334,21 +334,19 @@ xchk_refcountbt_rec( struct xfs_refcount_irec irec; xfs_agblock_t *cow_blocks = bs->private; struct xfs_perag *pag = bs->cur->bc_ag.pag; - bool has_cowflag; xfs_refcount_btrec_to_irec(rec, &irec); /* Only CoW records can have refcount == 1. */ - has_cowflag = (irec.rc_startblock & XFS_REFC_COW_START); - if ((irec.rc_refcount == 1 && !has_cowflag) || - (irec.rc_refcount != 1 && has_cowflag)) + if (irec.rc_domain == XFS_REFC_DOMAIN_SHARED && irec.rc_refcount == 1) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (has_cowflag) + if (irec.rc_domain == XFS_REFC_DOMAIN_COW) { + if (irec.rc_refcount != 1) + xchk_btree_set_corrupt(bs->sc, bs->cur, 0); (*cow_blocks) += irec.rc_blockcount; + } /* Check the extent. */ - irec.rc_startblock &= ~XFS_REFC_COW_START; - if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); @@ -419,7 +417,6 @@ xchk_xref_is_cow_staging( xfs_extlen_t len) { struct xfs_refcount_irec rc; - bool has_cowflag; int has_refcount; int error; @@ -427,8 +424,8 @@ xchk_xref_is_cow_staging( return; /* Find the CoW staging extent. */ - error = xfs_refcount_lookup_le(sc->sa.refc_cur, - agbno + XFS_REFC_COW_START, &has_refcount); + error = xfs_refcount_lookup_le(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW, + agbno, &has_refcount); if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (!has_refcount) { @@ -445,8 +442,7 @@ xchk_xref_is_cow_staging( } /* CoW flag must be set, refcount must be 1. */ - has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); - if (!has_cowflag || rc.rc_refcount != 1) + if (rc.rc_domain != XFS_REFC_DOMAIN_COW || rc.rc_refcount != 1) xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); /* Must be at least as long as what was passed in */ @@ -470,7 +466,8 @@ xchk_xref_is_not_shared( if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) return; - error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); + error = xfs_refcount_has_record(sc->sa.refc_cur, XFS_REFC_DOMAIN_SHARED, + agbno, len, &shared); if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) return; if (shared) From 571423a162cd86acb1b010a01c6203369586daa6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 26 Oct 2022 14:23:58 -0700 Subject: [PATCH 114/328] xfs: report refcount domain in tracepoints Now that we've broken out the startblock and shared/cow domain in the incore refcount extent record structure, update the tracepoints to report the domain. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_types.h | 4 ++++ fs/xfs/xfs_trace.h | 48 +++++++++++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index eb9a98338bb9..5ebdda7e1078 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -171,6 +171,10 @@ enum xfs_refc_domain { XFS_REFC_DOMAIN_COW, }; +#define XFS_REFC_DOMAIN_STRINGS \ + { XFS_REFC_DOMAIN_SHARED, "shared" }, \ + { XFS_REFC_DOMAIN_COW, "cow" } + struct xfs_refcount_irec { xfs_agblock_t rc_startblock; /* starting block number */ xfs_extlen_t rc_blockcount; /* count of free blocks */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index cb7c81ba7fa3..372d871bccc5 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -799,6 +799,9 @@ TRACE_DEFINE_ENUM(PE_SIZE_PTE); TRACE_DEFINE_ENUM(PE_SIZE_PMD); TRACE_DEFINE_ENUM(PE_SIZE_PUD); +TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED); +TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW); + TRACE_EVENT(xfs_filemap_fault, TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size, bool write_fault), @@ -2925,6 +2928,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, domain) __field(xfs_agblock_t, startblock) __field(xfs_extlen_t, blockcount) __field(xfs_nlink_t, refcount) @@ -2932,13 +2936,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->domain = irec->rc_domain; __entry->startblock = irec->rc_startblock; __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS), __entry->startblock, __entry->blockcount, __entry->refcount) @@ -2958,6 +2964,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, domain) __field(xfs_agblock_t, startblock) __field(xfs_extlen_t, blockcount) __field(xfs_nlink_t, refcount) @@ -2966,14 +2973,16 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->domain = irec->rc_domain; __entry->startblock = irec->rc_startblock; __entry->blockcount = irec->rc_blockcount; __entry->refcount = irec->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->domain, XFS_REFC_DOMAIN_STRINGS), __entry->startblock, __entry->blockcount, __entry->refcount, @@ -2994,9 +3003,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) @@ -3004,20 +3015,24 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount) @@ -3038,9 +3053,11 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) @@ -3049,21 +3066,25 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; __entry->agbno = agbno; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u @ agbno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount, @@ -3086,12 +3107,15 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) + __field(enum xfs_refc_domain, i1_domain) __field(xfs_agblock_t, i1_startblock) __field(xfs_extlen_t, i1_blockcount) __field(xfs_nlink_t, i1_refcount) + __field(enum xfs_refc_domain, i2_domain) __field(xfs_agblock_t, i2_startblock) __field(xfs_extlen_t, i2_blockcount) __field(xfs_nlink_t, i2_refcount) + __field(enum xfs_refc_domain, i3_domain) __field(xfs_agblock_t, i3_startblock) __field(xfs_extlen_t, i3_blockcount) __field(xfs_nlink_t, i3_refcount) @@ -3099,27 +3123,33 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class, TP_fast_assign( __entry->dev = mp->m_super->s_dev; __entry->agno = agno; + __entry->i1_domain = i1->rc_domain; __entry->i1_startblock = i1->rc_startblock; __entry->i1_blockcount = i1->rc_blockcount; __entry->i1_refcount = i1->rc_refcount; + __entry->i2_domain = i2->rc_domain; __entry->i2_startblock = i2->rc_startblock; __entry->i2_blockcount = i2->rc_blockcount; __entry->i2_refcount = i2->rc_refcount; + __entry->i3_domain = i3->rc_domain; __entry->i3_startblock = i3->rc_startblock; __entry->i3_blockcount = i3->rc_blockcount; __entry->i3_refcount = i3->rc_refcount; ), - TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u -- " - "agbno 0x%x fsbcount 0x%x refcount %u", + TP_printk("dev %d:%d agno 0x%x dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u -- " + "dom %s agbno 0x%x fsbcount 0x%x refcount %u", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __print_symbolic(__entry->i1_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i1_startblock, __entry->i1_blockcount, __entry->i1_refcount, + __print_symbolic(__entry->i2_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i2_startblock, __entry->i2_blockcount, __entry->i2_refcount, + __print_symbolic(__entry->i3_domain, XFS_REFC_DOMAIN_STRINGS), __entry->i3_startblock, __entry->i3_blockcount, __entry->i3_refcount) From f492135df0aa0417337f9b8b1cc6d6a994d61d25 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 26 Oct 2022 14:31:27 -0700 Subject: [PATCH 115/328] xfs: refactor domain and refcount checking Create a helper function to ensure that CoW staging extent records have a single refcount and that shared extent records have more than 1 refcount. We'll put this to more use in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 5 +---- fs/xfs/libxfs/xfs_refcount.h | 12 ++++++++++++ fs/xfs/scrub/refcount.c | 10 ++++------ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 0f920eff34c4..8eaa11470f46 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -142,10 +142,7 @@ xfs_refcount_get_rec( if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) goto out_bad_rec; - /* handle special COW-staging domain */ - if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1) - goto out_bad_rec; - if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2) + if (!xfs_refcount_check_domain(irec)) goto out_bad_rec; /* check for valid extent range, including overflow */ diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 3beb5a30a9c9..ee32e8eb5a99 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -55,6 +55,18 @@ struct xfs_refcount_intent { xfs_fsblock_t ri_startblock; }; +/* Check that the refcount is appropriate for the record domain. */ +static inline bool +xfs_refcount_check_domain( + const struct xfs_refcount_irec *irec) +{ + if (irec->rc_domain == XFS_REFC_DOMAIN_COW && irec->rc_refcount != 1) + return false; + if (irec->rc_domain == XFS_REFC_DOMAIN_SHARED && irec->rc_refcount < 2) + return false; + return true; +} + void xfs_refcount_increase_extent(struct xfs_trans *tp, struct xfs_bmbt_irec *irec); void xfs_refcount_decrease_extent(struct xfs_trans *tp, diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index af5b796ec9ec..fe5ffe4f478d 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -337,14 +337,12 @@ xchk_refcountbt_rec( xfs_refcount_btrec_to_irec(rec, &irec); - /* Only CoW records can have refcount == 1. */ - if (irec.rc_domain == XFS_REFC_DOMAIN_SHARED && irec.rc_refcount == 1) + /* Check the domain and refcount are not incompatible. */ + if (!xfs_refcount_check_domain(&irec)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); - if (irec.rc_domain == XFS_REFC_DOMAIN_COW) { - if (irec.rc_refcount != 1) - xchk_btree_set_corrupt(bs->sc, bs->cur, 0); + + if (irec.rc_domain == XFS_REFC_DOMAIN_COW) (*cow_blocks) += irec.rc_blockcount; - } /* Check the extent. */ if (!xfs_verify_agbext(pag, irec.rc_startblock, irec.rc_blockcount)) From 68d0f389179a52555cfd8fa3254e4adcd7576904 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 26 Oct 2022 14:42:48 -0700 Subject: [PATCH 116/328] xfs: remove XFS_FIND_RCEXT_SHARED and _COW Now that we have an explicit enum for shared and CoW staging extents, we can get rid of the old FIND_RCEXT flags. Omit a couple of conversions that disappear in the next patches. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 48 +++++++++++++----------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 8eaa11470f46..ba2ddf177a49 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -617,8 +617,6 @@ out_error: return error; } -#define XFS_FIND_RCEXT_SHARED 1 -#define XFS_FIND_RCEXT_COW 2 /* * Find the left extent and the one after it (cleft). This function assumes * that we've already split any extent crossing agbno. @@ -628,20 +626,14 @@ xfs_refcount_find_left_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *left, struct xfs_refcount_irec *cleft, + enum xfs_refc_domain domain, xfs_agblock_t agbno, - xfs_extlen_t aglen, - int flags) + xfs_extlen_t aglen) { struct xfs_refcount_irec tmp; - enum xfs_refc_domain domain; int error; int found_rec; - if (flags & XFS_FIND_RCEXT_SHARED) - domain = XFS_REFC_DOMAIN_SHARED; - else - domain = XFS_REFC_DOMAIN_COW; - left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); if (error) @@ -659,9 +651,9 @@ xfs_refcount_find_left_extents( if (xfs_refc_next(&tmp) != agbno) return 0; - if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) + if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) return 0; - if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) + if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) return 0; /* We have a left extent; retrieve (or invent) the next right one */ *left = tmp; @@ -725,20 +717,14 @@ xfs_refcount_find_right_extents( struct xfs_btree_cur *cur, struct xfs_refcount_irec *right, struct xfs_refcount_irec *cright, + enum xfs_refc_domain domain, xfs_agblock_t agbno, - xfs_extlen_t aglen, - int flags) + xfs_extlen_t aglen) { struct xfs_refcount_irec tmp; - enum xfs_refc_domain domain; int error; int found_rec; - if (flags & XFS_FIND_RCEXT_SHARED) - domain = XFS_REFC_DOMAIN_SHARED; - else - domain = XFS_REFC_DOMAIN_COW; - right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); if (error) @@ -756,9 +742,9 @@ xfs_refcount_find_right_extents( if (tmp.rc_startblock != agbno + aglen) return 0; - if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) + if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) return 0; - if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) + if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) return 0; /* We have a right extent; retrieve (or invent) the next left one */ *right = tmp; @@ -827,10 +813,10 @@ xfs_refc_valid( STATIC int xfs_refcount_merge_extents( struct xfs_btree_cur *cur, + enum xfs_refc_domain domain, xfs_agblock_t *agbno, xfs_extlen_t *aglen, enum xfs_refc_adjust_op adjust, - int flags, bool *shape_changed) { struct xfs_refcount_irec left = {0}, cleft = {0}; @@ -845,12 +831,12 @@ xfs_refcount_merge_extents( * just below (agbno + aglen) [cright], and just above (agbno + aglen) * [right]. */ - error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, - *aglen, flags); + error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain, + *agbno, *aglen); if (error) return error; - error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, - *aglen, flags); + error = xfs_refcount_find_right_extents(cur, &right, &cright, domain, + *agbno, *aglen); if (error) return error; @@ -1139,8 +1125,8 @@ xfs_refcount_adjust( /* * Try to merge with the left or right extents of the range. */ - error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, - XFS_FIND_RCEXT_SHARED, &shape_changed); + error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, + new_agbno, new_aglen, adj, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1650,8 +1636,8 @@ xfs_refcount_adjust_cow( /* * Try to merge with the left or right extents of the range. */ - error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, - XFS_FIND_RCEXT_COW, &shape_changed); + error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno, + &aglen, adj, &shape_changed); if (error) goto out_error; From f62ac3e0ac33d366fe81e194fee81de9be2cd886 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 26 Oct 2022 14:16:36 -0700 Subject: [PATCH 117/328] xfs: check record domain when accessing refcount records Now that we've separated the startblock and CoW/shared extent domain in the incore refcount record structure, check the domain whenever we retrieve a record to ensure that it's still in the domain that we want. Depending on the circumstances, a change in domain either means we're done processing or that we've found a corruption and need to fail out. The refcount check in xchk_xref_is_cow_staging is redundant since _get_rec has done that for a long time now, so we can get rid of it. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 53 ++++++++++++++++++++++++++++-------- fs/xfs/scrub/refcount.c | 4 +-- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index ba2ddf177a49..27ed4c10d0d0 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -381,6 +381,8 @@ xfs_refcount_split_extent( error = -EFSCORRUPTED; goto out_error; } + if (rcext.rc_domain != domain) + return 0; if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) return 0; @@ -432,6 +434,9 @@ xfs_refcount_merge_center_extents( trace_xfs_refcount_merge_center_extents(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, center, right); + ASSERT(left->rc_domain == center->rc_domain); + ASSERT(right->rc_domain == center->rc_domain); + /* * Make sure the center and right extents are not in the btree. * If the center extent was synthesized, the first delete call @@ -508,6 +513,8 @@ xfs_refcount_merge_left_extent( trace_xfs_refcount_merge_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, left, cleft); + ASSERT(left->rc_domain == cleft->rc_domain); + /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ if (cleft->rc_refcount > 1) { error = xfs_refcount_lookup_le(cur, cleft->rc_domain, @@ -569,6 +576,8 @@ xfs_refcount_merge_right_extent( trace_xfs_refcount_merge_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, cright, right); + ASSERT(right->rc_domain == cright->rc_domain); + /* * If the extent ending at agbno+aglen (cright) wasn't synthesized, * remove it. @@ -649,12 +658,10 @@ xfs_refcount_find_left_extents( goto out_error; } + if (tmp.rc_domain != domain) + return 0; if (xfs_refc_next(&tmp) != agbno) return 0; - if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) - return 0; - if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) - return 0; /* We have a left extent; retrieve (or invent) the next right one */ *left = tmp; @@ -670,6 +677,9 @@ xfs_refcount_find_left_extents( goto out_error; } + if (tmp.rc_domain != domain) + goto not_found; + /* if tmp starts at the end of our range, just use that */ if (tmp.rc_startblock == agbno) *cleft = tmp; @@ -689,6 +699,7 @@ xfs_refcount_find_left_extents( cleft->rc_domain = domain; } } else { +not_found: /* * No extents, so pretend that there's one covering the whole * range. @@ -740,12 +751,10 @@ xfs_refcount_find_right_extents( goto out_error; } + if (tmp.rc_domain != domain) + return 0; if (tmp.rc_startblock != agbno + aglen) return 0; - if (domain == XFS_REFC_DOMAIN_SHARED && tmp.rc_refcount < 2) - return 0; - if (domain == XFS_REFC_DOMAIN_COW && tmp.rc_refcount > 1) - return 0; /* We have a right extent; retrieve (or invent) the next left one */ *right = tmp; @@ -761,6 +770,9 @@ xfs_refcount_find_right_extents( goto out_error; } + if (tmp.rc_domain != domain) + goto not_found; + /* if tmp ends at the end of our range, just use that */ if (xfs_refc_next(&tmp) == agbno + aglen) *cright = tmp; @@ -780,6 +792,7 @@ xfs_refcount_find_right_extents( cright->rc_domain = domain; } } else { +not_found: /* * No extents, so pretend that there's one covering the whole * range. @@ -889,7 +902,7 @@ xfs_refcount_merge_extents( aglen); } - return error; + return 0; } /* @@ -961,7 +974,7 @@ xfs_refcount_adjust_extents( error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; - if (!found_rec) { + if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; ext.rc_refcount = 0; @@ -1400,6 +1413,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) + goto done; /* If the extent ends before the start, look at the next one */ if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { @@ -1415,6 +1430,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) + goto done; } /* If the extent starts after the range we want, bail out */ @@ -1446,7 +1463,8 @@ xfs_refcount_find_shared( error = -EFSCORRUPTED; goto out_error; } - if (tmp.rc_startblock >= agbno + aglen || + if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || + tmp.rc_startblock >= agbno + aglen || tmp.rc_startblock != *fbno + *flen) break; *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); @@ -1537,6 +1555,11 @@ xfs_refcount_adjust_cow_extents( error = xfs_refcount_get_rec(cur, &ext, &found_rec); if (error) goto out_error; + if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && + ext.rc_domain != XFS_REFC_DOMAIN_COW)) { + error = -EFSCORRUPTED; + goto out_error; + } if (!found_rec) { ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; ext.rc_blockcount = 0; @@ -1746,8 +1769,14 @@ xfs_refcount_recover_extent( rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); - list_add_tail(&rr->rr_list, debris); + if (XFS_IS_CORRUPT(cur->bc_mp, + rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { + kmem_free(rr); + return -EFSCORRUPTED; + } + + list_add_tail(&rr->rr_list, debris); return 0; } diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index fe5ffe4f478d..a26ee0f24ef2 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -439,8 +439,8 @@ xchk_xref_is_cow_staging( return; } - /* CoW flag must be set, refcount must be 1. */ - if (rc.rc_domain != XFS_REFC_DOMAIN_COW || rc.rc_refcount != 1) + /* CoW lookup returned a shared extent record? */ + if (rc.rc_domain != XFS_REFC_DOMAIN_COW) xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); /* Must be at least as long as what was passed in */ From f1fdc8207840672a46f26414f2c989ec078a153b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 27 Oct 2022 09:48:59 -0700 Subject: [PATCH 118/328] xfs: fix agblocks check in the cow leftover recovery function As we've seen, refcount records use the upper bit of the rc_startblock field to ensure that all the refcount records are at the right side of the refcount btree. This works because an AG is never allowed to have more than (1U << 31) blocks in it. If we ever encounter a filesystem claiming to have that many blocks, we absolutely do not want reflink touching it at all. However, this test at the start of xfs_refcount_recover_cow_leftovers is slightly incorrect -- it /should/ be checking that agblocks isn't larger than the XFS_MAX_CRC_AG_BLOCKS constant, and it should check that the constant is never large enough to conflict with that CoW flag. Note that the V5 superblock verifier has not historically rejected filesystems where agblocks >= XFS_MAX_CRC_AG_BLOCKS, which is why this ended up in the COW recovery routine. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 27ed4c10d0d0..ad0fb6a7177b 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1796,7 +1796,9 @@ xfs_refcount_recover_cow_leftovers( xfs_fsblock_t fsb; int error; - if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) + /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */ + BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COW_START); + if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS) return -EOPNOTSUPP; INIT_LIST_HEAD(&debris); From c1ccf967bf962b998f0c096e06a658ece27d10a0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 26 Oct 2022 14:55:04 -0700 Subject: [PATCH 119/328] xfs: fix uninitialized list head in struct xfs_refcount_recovery We're supposed to initialize the list head of an object before adding it to another list. Fix that, and stop using the kmem_{alloc,free} calls from the Irix days. Fixes: 174edb0e46e5 ("xfs: store in-progress CoW allocations in the refcount btree") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_refcount.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index ad0fb6a7177b..44d4667d4301 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1767,12 +1767,14 @@ xfs_refcount_recover_extent( be32_to_cpu(rec->refc.rc_refcount) != 1)) return -EFSCORRUPTED; - rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); + rr = kmalloc(sizeof(struct xfs_refcount_recovery), + GFP_KERNEL | __GFP_NOFAIL); + INIT_LIST_HEAD(&rr->rr_list); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); if (XFS_IS_CORRUPT(cur->bc_mp, rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { - kmem_free(rr); + kfree(rr); return -EFSCORRUPTED; } @@ -1859,7 +1861,7 @@ xfs_refcount_recover_cow_leftovers( goto out_free; list_del(&rr->rr_list); - kmem_free(rr); + kfree(rr); } return error; @@ -1869,7 +1871,7 @@ out_free: /* Free the leftover list */ list_for_each_entry_safe(rr, n, &debris, rr_list) { list_del(&rr->rr_list); - kmem_free(rr); + kfree(rr); } return error; } From 8b972158afcaa66c538c3ee1d394f096fcd238a8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 10 Oct 2022 11:13:20 -0700 Subject: [PATCH 120/328] xfs: rename XFS_REFC_COW_START to _COWFLAG We've been (ab)using XFS_REFC_COW_START as both an integer quantity and a bit flag, even though it's *only* a bit flag. Rename the variable to reflect its nature and update the cast target since we're not supposed to be comparing it to xfs_agblock_t now. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- fs/xfs/libxfs/xfs_format.h | 2 +- fs/xfs/libxfs/xfs_refcount.c | 6 +++--- fs/xfs/libxfs/xfs_refcount.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 005dd65b71cd..371dc07233e0 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1612,7 +1612,7 @@ unsigned int xfs_refc_block(struct xfs_mount *mp); * on the startblock. This speeds up mount time deletion of stale * staging extents because they're all at the right side of the tree. */ -#define XFS_REFC_COW_START ((xfs_agblock_t)(1U << 31)) +#define XFS_REFC_COWFLAG (1U << 31) #define REFCNTBT_COWFLAG_BITLEN 1 #define REFCNTBT_AGBLOCK_BITLEN 31 diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 44d4667d4301..3f34bafe18dd 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -108,8 +108,8 @@ xfs_refcount_btrec_to_irec( uint32_t start; start = be32_to_cpu(rec->refc.rc_startblock); - if (start & XFS_REFC_COW_START) { - start &= ~XFS_REFC_COW_START; + if (start & XFS_REFC_COWFLAG) { + start &= ~XFS_REFC_COWFLAG; irec->rc_domain = XFS_REFC_DOMAIN_COW; } else { irec->rc_domain = XFS_REFC_DOMAIN_SHARED; @@ -1799,7 +1799,7 @@ xfs_refcount_recover_cow_leftovers( int error; /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */ - BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COW_START); + BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG); if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS) return -EOPNOTSUPP; diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index ee32e8eb5a99..452f30556f5a 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -34,9 +34,9 @@ xfs_refcount_encode_startblock( * query functions (which set rc_domain == -1U), so we check that the * domain is /not/ shared. */ - start = startblock & ~XFS_REFC_COW_START; + start = startblock & ~XFS_REFC_COWFLAG; if (domain != XFS_REFC_DOMAIN_SHARED) - start |= XFS_REFC_COW_START; + start |= XFS_REFC_COWFLAG; return start; } From 7a2726ec3290c52f52ce8d5f5af73ab8c7681bc1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 31 Oct 2022 08:36:15 +0800 Subject: [PATCH 121/328] KVM: Check KVM_CAP_DIRTY_LOG_{RING, RING_ACQ_REL} prior to enabling them There are two capabilities related to ring-based dirty page tracking: KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL. Both are supported by x86. However, arm64 supports KVM_CAP_DIRTY_LOG_RING_ACQ_REL only when the feature is supported on arm64. The userspace doesn't have to enable the advertised capability, meaning KVM_CAP_DIRTY_LOG_RING can be enabled on arm64 by userspace and it's wrong. Fix it by double checking if the capability has been advertised prior to enabling it. It's rejected to enable the capability if it hasn't been advertised. Fixes: 17601bfed909 ("KVM: Add KVM_CAP_DIRTY_LOG_RING_ACQ_REL capability and config option") Reported-by: Sean Christopherson Suggested-by: Sean Christopherson Signed-off-by: Gavin Shan Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221031003621.164306-4-gshan@redhat.com --- virt/kvm/kvm_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1376a47fedee..46e8ed1ae647 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4585,6 +4585,9 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, } case KVM_CAP_DIRTY_LOG_RING: case KVM_CAP_DIRTY_LOG_RING_ACQ_REL: + if (!kvm_vm_ioctl_check_extension_generic(kvm, cap->cap)) + return -EINVAL; + return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); default: return kvm_vm_ioctl_enable_cap(kvm, cap); From 878eb6e48f240d02ed1c9298020a0b6370695f24 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 1 Nov 2022 00:54:13 +0000 Subject: [PATCH 122/328] block: blk_add_rq_to_plug(): clear stale 'last' after flush blk_mq_flush_plug_list() empties ->mq_list and request we'd peeked there before that call is gone; in any case, we are not dealing with a mix of requests for different queues now - there's no requests left in the plug. Signed-off-by: Al Viro Signed-off-by: Jens Axboe --- block/blk-mq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 21cc7c2da0f9..6a789cda68a5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1262,6 +1262,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) (!blk_queue_nomerges(rq->q) && blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { blk_mq_flush_plug_list(plug, false); + last = NULL; trace_block_plug(rq->q); } From e6cb8769452e8236b52134e5cb4a18b8f5986932 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 24 Oct 2022 18:28:43 +0200 Subject: [PATCH 123/328] wifi: airo: do not assign -1 to unsigned char With char becoming unsigned by default, and with `char` alone being ambiguous and based on architecture, we get a warning when assigning the unchecked output of hex_to_bin() to that unsigned char. Mark `key` as a `u8`, which matches the struct's type, and then check each call to hex_to_bin() before casting. Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Jason A. Donenfeld Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221024162843.535921-1-Jason@zx2c4.com --- drivers/net/wireless/cisco/airo.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 10daef81c355..fb2c35bd73bb 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -5232,7 +5232,7 @@ static int get_wep_tx_idx(struct airo_info *ai) return -1; } -static int set_wep_key(struct airo_info *ai, u16 index, const char *key, +static int set_wep_key(struct airo_info *ai, u16 index, const u8 *key, u16 keylen, int perm, int lock) { static const unsigned char macaddr[ETH_ALEN] = { 0x01, 0, 0, 0, 0, 0 }; @@ -5283,7 +5283,7 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file) struct net_device *dev = pde_data(inode); struct airo_info *ai = dev->ml_priv; int i, rc; - char key[16]; + u8 key[16]; u16 index = 0; int j = 0; @@ -5311,12 +5311,22 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file) } for (i = 0; i < 16*3 && data->wbuffer[i+j]; i++) { + int val; + + if (i % 3 == 2) + continue; + + val = hex_to_bin(data->wbuffer[i+j]); + if (val < 0) { + airo_print_err(ai->dev->name, "WebKey passed invalid key hex"); + return; + } switch(i%3) { case 0: - key[i/3] = hex_to_bin(data->wbuffer[i+j])<<4; + key[i/3] = (u8)val << 4; break; case 1: - key[i/3] |= hex_to_bin(data->wbuffer[i+j]); + key[i/3] |= (u8)val; break; } } From 6788ba8aed4e28e90f72d68a9d794e34eac17295 Mon Sep 17 00:00:00 2001 From: Dokyung Song Date: Fri, 21 Oct 2022 15:13:59 +0900 Subject: [PATCH 124/328] wifi: brcmfmac: Fix potential buffer overflow in brcmf_fweh_event_worker() This patch fixes an intra-object buffer overflow in brcmfmac that occurs when the device provides a 'bsscfgidx' equal to or greater than the buffer size. The patch adds a check that leads to a safe failure if that is the case. This fixes CVE-2022-3628. UBSAN: array-index-out-of-bounds in drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 52 is out of range for type 'brcmf_if *[16]' CPU: 0 PID: 1898 Comm: kworker/0:2 Tainted: G O 5.14.0+ #132 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Workqueue: events brcmf_fweh_event_worker Call Trace: dump_stack_lvl+0x57/0x7d ubsan_epilogue+0x5/0x40 __ubsan_handle_out_of_bounds+0x69/0x80 ? memcpy+0x39/0x60 brcmf_fweh_event_worker+0xae1/0xc00 ? brcmf_fweh_call_event_handler.isra.0+0x100/0x100 ? rcu_read_lock_sched_held+0xa1/0xd0 ? rcu_read_lock_bh_held+0xb0/0xb0 ? lockdep_hardirqs_on_prepare+0x273/0x3e0 process_one_work+0x873/0x13e0 ? lock_release+0x640/0x640 ? pwq_dec_nr_in_flight+0x320/0x320 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x8b/0xd10 ? __kthread_parkme+0xd9/0x1d0 ? process_one_work+0x13e0/0x13e0 kthread+0x379/0x450 ? _raw_spin_unlock_irq+0x24/0x30 ? set_kthread_struct+0x100/0x100 ret_from_fork+0x1f/0x30 ================================================================================ general protection fault, probably for non-canonical address 0xe5601c0020023fff: 0000 [#1] SMP KASAN KASAN: maybe wild-memory-access in range [0x2b0100010011fff8-0x2b0100010011ffff] CPU: 0 PID: 1898 Comm: kworker/0:2 Tainted: G O 5.14.0+ #132 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Workqueue: events brcmf_fweh_event_worker RIP: 0010:brcmf_fweh_call_event_handler.isra.0+0x42/0x100 Code: 89 f5 53 48 89 fb 48 83 ec 08 e8 79 0b 38 fe 48 85 ed 74 7e e8 6f 0b 38 fe 48 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 8b 00 00 00 4c 8b 7d 00 44 89 e0 48 ba 00 00 00 RSP: 0018:ffffc9000259fbd8 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffff888115d8cd50 RCX: 0000000000000000 RDX: 0560200020023fff RSI: ffffffff8304bc91 RDI: ffff888115d8cd50 RBP: 2b0100010011ffff R08: ffff888112340050 R09: ffffed1023549809 R10: ffff88811aa4c047 R11: ffffed1023549808 R12: 0000000000000045 R13: ffffc9000259fca0 R14: ffff888112340050 R15: ffff888112340000 FS: 0000000000000000(0000) GS:ffff88811aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000004053ccc0 CR3: 0000000112740000 CR4: 0000000000750ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: brcmf_fweh_event_worker+0x117/0xc00 ? brcmf_fweh_call_event_handler.isra.0+0x100/0x100 ? rcu_read_lock_sched_held+0xa1/0xd0 ? rcu_read_lock_bh_held+0xb0/0xb0 ? lockdep_hardirqs_on_prepare+0x273/0x3e0 process_one_work+0x873/0x13e0 ? lock_release+0x640/0x640 ? pwq_dec_nr_in_flight+0x320/0x320 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x8b/0xd10 ? __kthread_parkme+0xd9/0x1d0 ? process_one_work+0x13e0/0x13e0 kthread+0x379/0x450 ? _raw_spin_unlock_irq+0x24/0x30 ? set_kthread_struct+0x100/0x100 ret_from_fork+0x1f/0x30 Modules linked in: 88XXau(O) 88x2bu(O) ---[ end trace 41d302138f3ff55a ]--- RIP: 0010:brcmf_fweh_call_event_handler.isra.0+0x42/0x100 Code: 89 f5 53 48 89 fb 48 83 ec 08 e8 79 0b 38 fe 48 85 ed 74 7e e8 6f 0b 38 fe 48 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 8b 00 00 00 4c 8b 7d 00 44 89 e0 48 ba 00 00 00 RSP: 0018:ffffc9000259fbd8 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffff888115d8cd50 RCX: 0000000000000000 RDX: 0560200020023fff RSI: ffffffff8304bc91 RDI: ffff888115d8cd50 RBP: 2b0100010011ffff R08: ffff888112340050 R09: ffffed1023549809 R10: ffff88811aa4c047 R11: ffffed1023549808 R12: 0000000000000045 R13: ffffc9000259fca0 R14: ffff888112340050 R15: ffff888112340000 FS: 0000000000000000(0000) GS:ffff88811aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000004053ccc0 CR3: 0000000112740000 CR4: 0000000000750ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Reported-by: Dokyung Song Reported-by: Jisoo Jang Reported-by: Minsuk Kang Reviewed-by: Arend van Spriel Cc: Signed-off-by: Dokyung Song Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221021061359.GA550858@laguna --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index bc3f4e4edcdf..dac7eb77799b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -228,6 +228,10 @@ static void brcmf_fweh_event_worker(struct work_struct *work) brcmf_fweh_event_name(event->code), event->code, event->emsg.ifidx, event->emsg.bsscfgidx, event->emsg.addr); + if (event->emsg.bsscfgidx >= BRCMF_MAX_IFS) { + bphy_err(drvr, "invalid bsscfg index: %u\n", event->emsg.bsscfgidx); + goto event_free; + } /* convert event message */ emsg_be = &event->emsg; From cd73adcdbad3d9e9923b045d3643409e9c148d17 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 30 Oct 2022 22:08:54 +0100 Subject: [PATCH 125/328] ARM: dts: ux500: Add trips to battery thermal zones Recent changes to the thermal framework has made the trip points (trips) for thermal zones compulsory, which made the Ux500 DTS files break validation and also stopped probing because of similar changes to the code. Fix this by adding an "outer bounding box": battery thermal zones should not get warmer than 70 degress, then we will shut down. Fixes: 8c596324232d ("dt-bindings: thermal: Fix missing required property") Fixes: 3fd6d6e2b4e8 ("thermal/of: Rework the thermal device tree initialization") Signed-off-by: Linus Walleij Cc: Daniel Lezcano Cc: linux-pm@vger.kernel.org Link: https://lore.kernel.org/r/20221030210854.346662-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/ste-href.dtsi | 8 ++++++++ arch/arm/boot/dts/ste-snowball.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-codina-tmo.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-codina.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-gavini.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-golden.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-janice.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-kyle.dts | 8 ++++++++ arch/arm/boot/dts/ste-ux500-samsung-skomer.dts | 8 ++++++++ 9 files changed, 72 insertions(+) diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi index fbaa0ce46427..8f1bb78fc1e4 100644 --- a/arch/arm/boot/dts/ste-href.dtsi +++ b/arch/arm/boot/dts/ste-href.dtsi @@ -24,6 +24,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index 1c9094f24893..e2f0cdacba7d 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -28,6 +28,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-codina-tmo.dts b/arch/arm/boot/dts/ste-ux500-samsung-codina-tmo.dts index d6940e0afa86..27a3ab7e25e1 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-codina-tmo.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-codina-tmo.dts @@ -44,6 +44,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-codina.dts b/arch/arm/boot/dts/ste-ux500-samsung-codina.dts index 5f41256d7f4b..b88f0c07873d 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-codina.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-codina.dts @@ -57,6 +57,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts b/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts index 806da3fc33cd..7231bc745200 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts @@ -30,6 +30,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts index b0dce91aff4b..9604695edf53 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts @@ -35,6 +35,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-janice.dts b/arch/arm/boot/dts/ste-ux500-samsung-janice.dts index ed5c79c3d04b..69387e8754a9 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-janice.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-janice.dts @@ -30,6 +30,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-kyle.dts b/arch/arm/boot/dts/ste-ux500-samsung-kyle.dts index c57676faf181..167846df3104 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-kyle.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-kyle.dts @@ -34,6 +34,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts index 81b341a5ae45..93e5f5ed888d 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts @@ -30,6 +30,14 @@ polling-delay = <0>; polling-delay-passive = <0>; thermal-sensors = <&bat_therm>; + + trips { + battery-crit-hi { + temperature = <70000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; }; From a0e215088ef4955f442af58822da5d2bcc3fbbe7 Mon Sep 17 00:00:00 2001 From: Jay Fang Date: Fri, 28 Oct 2022 18:54:34 +0800 Subject: [PATCH 126/328] MAINTAINERS: Update HiSilicon LPC BUS Driver maintainer Add Jay Fang as the maintainer of the HiSilicon LPC BUS Driver, replacing John Garry. Signed-off-by: Jay Fang Link: https://lore.kernel.org/r/20221028105434.1661264-1-f.fangjian@huawei.com' Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8ae6d1a81824..d3b729b3cbfc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9214,7 +9214,7 @@ W: https://www.hisilicon.com F: drivers/i2c/busses/i2c-hisi.c HISILICON LPC BUS DRIVER -M: john.garry@huawei.com +M: Jay Fang S: Maintained W: http://www.hisilicon.com F: Documentation/devicetree/bindings/arm/hisilicon/low-pin-count.yaml From 3f4071cbd2063b917486d1047a4da47718215fee Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:26 +0100 Subject: [PATCH 127/328] firmware: arm_scmi: Cleanup the core driver removal callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Platform drivers .remove callbacks are not supposed to fail and report errors. Such errors are indeed ignored by the core platform drivers and the driver unbind process is anyway completed. The SCMI core platform driver as it is now, instead, bails out reporting an error in case of an explicit unbind request. Fix the removal path by adding proper device links between the core SCMI device and the SCMI protocol devices so that a full SCMI stack unbind is triggered when the core driver is removed. The remove process does not bail out anymore on the anomalous conditions triggered by an explicit unbind but the user is still warned. Reported-by: Uwe Kleine-König Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-1-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/bus.c | 11 +++++++++++ drivers/firmware/arm_scmi/common.h | 1 + drivers/firmware/arm_scmi/driver.c | 31 ++++++++++++++++++------------ 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index d4e23101448a..35bb70724d44 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -216,9 +216,20 @@ void scmi_device_destroy(struct scmi_device *scmi_dev) device_unregister(&scmi_dev->dev); } +void scmi_device_link_add(struct device *consumer, struct device *supplier) +{ + struct device_link *link; + + link = device_link_add(consumer, supplier, DL_FLAG_AUTOREMOVE_CONSUMER); + + WARN_ON(!link); +} + void scmi_set_handle(struct scmi_device *scmi_dev) { scmi_dev->handle = scmi_handle_get(&scmi_dev->dev); + if (scmi_dev->handle) + scmi_device_link_add(&scmi_dev->dev, scmi_dev->handle->dev); } int scmi_protocol_register(const struct scmi_protocol *proto) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 61aba7447c32..9b87b5b69535 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -97,6 +97,7 @@ static inline void unpack_scmi_header(u32 msg_hdr, struct scmi_msg_hdr *hdr) struct scmi_revision_info * scmi_revision_area_get(const struct scmi_protocol_handle *ph); int scmi_handle_put(const struct scmi_handle *handle); +void scmi_device_link_add(struct device *consumer, struct device *supplier); struct scmi_handle *scmi_handle_get(struct device *dev); void scmi_set_handle(struct scmi_device *scmi_dev); void scmi_setup_protocol_implemented(const struct scmi_protocol_handle *ph, diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 609ebedee9cb..7e19b6055d75 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2273,10 +2273,16 @@ int scmi_protocol_device_request(const struct scmi_device_id *id_table) sdev = scmi_get_protocol_device(child, info, id_table->protocol_id, id_table->name); - /* Set handle if not already set: device existed */ - if (sdev && !sdev->handle) - sdev->handle = - scmi_handle_get_from_info_unlocked(info); + if (sdev) { + /* Set handle if not already set: device existed */ + if (!sdev->handle) + sdev->handle = + scmi_handle_get_from_info_unlocked(info); + /* Relink consumer and suppliers */ + if (sdev->handle) + scmi_device_link_add(&sdev->dev, + sdev->handle->dev); + } } else { dev_err(info->dev, "Failed. SCMI protocol %d not active.\n", @@ -2475,20 +2481,17 @@ void scmi_free_channel(struct scmi_chan_info *cinfo, struct idr *idr, int id) static int scmi_remove(struct platform_device *pdev) { - int ret = 0, id; + int ret, id; struct scmi_info *info = platform_get_drvdata(pdev); struct device_node *child; mutex_lock(&scmi_list_mutex); if (info->users) - ret = -EBUSY; - else - list_del(&info->node); + dev_warn(&pdev->dev, + "Still active SCMI users will be forcibly unbound.\n"); + list_del(&info->node); mutex_unlock(&scmi_list_mutex); - if (ret) - return ret; - scmi_notification_exit(&info->handle); mutex_lock(&info->protocols_mtx); @@ -2500,7 +2503,11 @@ static int scmi_remove(struct platform_device *pdev) idr_destroy(&info->active_protocols); /* Safe to free channels since no more users */ - return scmi_cleanup_txrx_channels(info); + ret = scmi_cleanup_txrx_channels(info); + if (ret) + dev_warn(&pdev->dev, "Failed to cleanup SCMI channels.\n"); + + return 0; } static ssize_t protocol_version_show(struct device *dev, From fd96fbc8fad35d6b1872c90df8a2f5d721f14d91 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:27 +0100 Subject: [PATCH 128/328] firmware: arm_scmi: Suppress the driver's bind attributes Suppress the capability to unbind the core SCMI driver since all the SCMI stack protocol drivers depend on it. Fixes: aa4f886f3893 ("firmware: arm_scmi: add basic driver infrastructure for SCMI") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-2-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 7e19b6055d75..94be633b55a0 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2578,6 +2578,7 @@ MODULE_DEVICE_TABLE(of, scmi_of_match); static struct platform_driver scmi_driver = { .driver = { .name = "arm-scmi", + .suppress_bind_attrs = true, .of_match_table = scmi_of_match, .dev_groups = versions_groups, }, From 59172b212ec0dbb97ceb5671d912e6e61fa802d5 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:28 +0100 Subject: [PATCH 129/328] firmware: arm_scmi: Make tx_prepare time out eventually SCMI transports based on shared memory, at start of transmissions, have to wait for the shared Tx channel area to be eventually freed by the SCMI platform before accessing the channel. In fact the channel is owned by the SCMI platform until marked as free by the platform itself and, as such, cannot be used by the agent until relinquished. As a consequence a badly misbehaving SCMI platform firmware could lock the channel indefinitely and make the kernel side SCMI stack loop forever waiting for such channel to be freed, possibly hanging the whole boot sequence. Add a timeout to the existent Tx waiting spin-loop so that, when the system ends up in this situation, the SCMI stack can at least bail-out, nosily warn the user, and abort the transmission. Reported-by: YaxiongTian Suggested-by: YaxiongTian Cc: Vincent Guittot Cc: Etienne Carriere Cc: Florian Fainelli Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-3-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/common.h | 4 +++- drivers/firmware/arm_scmi/driver.c | 1 + drivers/firmware/arm_scmi/mailbox.c | 2 +- drivers/firmware/arm_scmi/optee.c | 2 +- drivers/firmware/arm_scmi/shmem.c | 31 +++++++++++++++++++++++++---- drivers/firmware/arm_scmi/smc.c | 2 +- 6 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 9b87b5b69535..a1c0154c31c6 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -118,6 +118,7 @@ void scmi_protocol_release(const struct scmi_handle *handle, u8 protocol_id); * * @dev: Reference to device in the SCMI hierarchy corresponding to this * channel + * @rx_timeout_ms: The configured RX timeout in milliseconds. * @handle: Pointer to SCMI entity handle * @no_completion_irq: Flag to indicate that this channel has no completion * interrupt mechanism for synchronous commands. @@ -127,6 +128,7 @@ void scmi_protocol_release(const struct scmi_handle *handle, u8 protocol_id); */ struct scmi_chan_info { struct device *dev; + unsigned int rx_timeout_ms; struct scmi_handle *handle; bool no_completion_irq; void *transport_info; @@ -233,7 +235,7 @@ void scmi_free_channel(struct scmi_chan_info *cinfo, struct idr *idr, int id); struct scmi_shared_mem; void shmem_tx_prepare(struct scmi_shared_mem __iomem *shmem, - struct scmi_xfer *xfer); + struct scmi_xfer *xfer, struct scmi_chan_info *cinfo); u32 shmem_read_header(struct scmi_shared_mem __iomem *shmem); void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem, struct scmi_xfer *xfer); diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 94be633b55a0..985775f210f9 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2013,6 +2013,7 @@ static int scmi_chan_setup(struct scmi_info *info, struct device *dev, return -ENOMEM; cinfo->dev = dev; + cinfo->rx_timeout_ms = info->desc->max_rx_timeout_ms; ret = info->desc->ops->chan_setup(cinfo, info->dev, tx); if (ret) diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c index 08ff4d110beb..1e40cb035044 100644 --- a/drivers/firmware/arm_scmi/mailbox.c +++ b/drivers/firmware/arm_scmi/mailbox.c @@ -36,7 +36,7 @@ static void tx_prepare(struct mbox_client *cl, void *m) { struct scmi_mailbox *smbox = client_to_scmi_mailbox(cl); - shmem_tx_prepare(smbox->shmem, m); + shmem_tx_prepare(smbox->shmem, m, smbox->cinfo); } static void rx_callback(struct mbox_client *cl, void *m) diff --git a/drivers/firmware/arm_scmi/optee.c b/drivers/firmware/arm_scmi/optee.c index f42dad997ac9..2a7aeab40e54 100644 --- a/drivers/firmware/arm_scmi/optee.c +++ b/drivers/firmware/arm_scmi/optee.c @@ -498,7 +498,7 @@ static int scmi_optee_send_message(struct scmi_chan_info *cinfo, msg_tx_prepare(channel->req.msg, xfer); ret = invoke_process_msg_channel(channel, msg_command_size(xfer)); } else { - shmem_tx_prepare(channel->req.shmem, xfer); + shmem_tx_prepare(channel->req.shmem, xfer, cinfo); ret = invoke_process_smt_channel(channel); } diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 0e3eaea5d852..1dfe534b8518 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -5,10 +5,13 @@ * Copyright (C) 2019 ARM Ltd. */ +#include #include #include #include +#include + #include "common.h" /* @@ -30,16 +33,36 @@ struct scmi_shared_mem { }; void shmem_tx_prepare(struct scmi_shared_mem __iomem *shmem, - struct scmi_xfer *xfer) + struct scmi_xfer *xfer, struct scmi_chan_info *cinfo) { + ktime_t stop; + /* * Ideally channel must be free by now unless OS timeout last * request and platform continued to process the same, wait * until it releases the shared memory, otherwise we may endup - * overwriting its response with new message payload or vice-versa + * overwriting its response with new message payload or vice-versa. + * Giving up anyway after twice the expected channel timeout so as + * not to bail-out on intermittent issues where the platform is + * occasionally a bit slower to answer. + * + * Note that after a timeout is detected we bail-out and carry on but + * the transport functionality is probably permanently compromised: + * this is just to ease debugging and avoid complete hangs on boot + * due to a misbehaving SCMI firmware. */ - spin_until_cond(ioread32(&shmem->channel_status) & - SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE); + stop = ktime_add_ms(ktime_get(), 2 * cinfo->rx_timeout_ms); + spin_until_cond((ioread32(&shmem->channel_status) & + SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE) || + ktime_after(ktime_get(), stop)); + if (!(ioread32(&shmem->channel_status) & + SCMI_SHMEM_CHAN_STAT_CHANNEL_FREE)) { + WARN_ON_ONCE(1); + dev_err(cinfo->dev, + "Timeout waiting for a free TX channel !\n"); + return; + } + /* Mark channel busy + clear error */ iowrite32(0x0, &shmem->channel_status); iowrite32(xfer->hdr.poll_completion ? 0 : SCMI_SHMEM_FLAG_INTR_ENABLED, diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c index 745acfdd0b3d..87a7b13cf868 100644 --- a/drivers/firmware/arm_scmi/smc.c +++ b/drivers/firmware/arm_scmi/smc.c @@ -188,7 +188,7 @@ static int smc_send_message(struct scmi_chan_info *cinfo, */ smc_channel_lock_acquire(scmi_info, xfer); - shmem_tx_prepare(scmi_info->shmem, xfer); + shmem_tx_prepare(scmi_info->shmem, xfer, cinfo); arm_smccc_1_1_invoke(scmi_info->func_id, 0, 0, 0, 0, 0, 0, 0, &res); From be9ba1f7f9e0b565b19f4294f5871da9d654bc6d Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:29 +0100 Subject: [PATCH 130/328] firmware: arm_scmi: Make Rx chan_setup fail on memory errors SCMI Rx channels are optional and they can fail to be setup when not present but anyway channels setup routines must bail-out on memory errors. Make channels setup, and related probing, fail when memory errors are reported on Rx channels. Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-4-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 985775f210f9..f818d00bb2c6 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -2045,8 +2045,12 @@ scmi_txrx_setup(struct scmi_info *info, struct device *dev, int prot_id) { int ret = scmi_chan_setup(info, dev, prot_id, true); - if (!ret) /* Rx is optional, hence no error check */ - scmi_chan_setup(info, dev, prot_id, false); + if (!ret) { + /* Rx is optional, report only memory errors */ + ret = scmi_chan_setup(info, dev, prot_id, false); + if (ret && ret != -ENOMEM) + ret = 0; + } return ret; } From 5ffc1c4cb896f8d2cf10309422da3633a616d60f Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:30 +0100 Subject: [PATCH 131/328] firmware: arm_scmi: Fix devres allocation device in virtio transport SCMI virtio transport device managed allocations must use the main platform device in devres operations instead of the channel devices. Cc: Peter Hilber Fixes: 46abe13b5e3d ("firmware: arm_scmi: Add virtio transport") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-5-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/virtio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c index 14709dbc96a1..36b7686843a4 100644 --- a/drivers/firmware/arm_scmi/virtio.c +++ b/drivers/firmware/arm_scmi/virtio.c @@ -444,12 +444,12 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, for (i = 0; i < vioch->max_msg; i++) { struct scmi_vio_msg *msg; - msg = devm_kzalloc(cinfo->dev, sizeof(*msg), GFP_KERNEL); + msg = devm_kzalloc(dev, sizeof(*msg), GFP_KERNEL); if (!msg) return -ENOMEM; if (tx) { - msg->request = devm_kzalloc(cinfo->dev, + msg->request = devm_kzalloc(dev, VIRTIO_SCMI_MAX_PDU_SIZE, GFP_KERNEL); if (!msg->request) @@ -458,7 +458,7 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, refcount_set(&msg->users, 1); } - msg->input = devm_kzalloc(cinfo->dev, VIRTIO_SCMI_MAX_PDU_SIZE, + msg->input = devm_kzalloc(dev, VIRTIO_SCMI_MAX_PDU_SIZE, GFP_KERNEL); if (!msg->input) return -ENOMEM; From 1eff6929aff594fba3182660f7b6213ec0ceda0c Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:31 +0100 Subject: [PATCH 132/328] firmware: arm_scmi: Fix deferred_tx_wq release on error paths Use devres to allocate the dedicated deferred_tx_wq polling workqueue so as to automatically trigger the proper resource release on error path. Reported-by: Dan Carpenter Fixes: 5a3b7185c47c ("firmware: arm_scmi: Add atomic mode support to virtio transport") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221028140833.280091-6-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/virtio.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/arm_scmi/virtio.c b/drivers/firmware/arm_scmi/virtio.c index 36b7686843a4..33c9b81a55cd 100644 --- a/drivers/firmware/arm_scmi/virtio.c +++ b/drivers/firmware/arm_scmi/virtio.c @@ -148,7 +148,6 @@ static void scmi_vio_channel_cleanup_sync(struct scmi_vio_channel *vioch) { unsigned long flags; DECLARE_COMPLETION_ONSTACK(vioch_shutdown_done); - void *deferred_wq = NULL; /* * Prepare to wait for the last release if not already released @@ -162,16 +161,11 @@ static void scmi_vio_channel_cleanup_sync(struct scmi_vio_channel *vioch) vioch->shutdown_done = &vioch_shutdown_done; virtio_break_device(vioch->vqueue->vdev); - if (!vioch->is_rx && vioch->deferred_tx_wq) { - deferred_wq = vioch->deferred_tx_wq; + if (!vioch->is_rx && vioch->deferred_tx_wq) /* Cannot be kicked anymore after this...*/ vioch->deferred_tx_wq = NULL; - } spin_unlock_irqrestore(&vioch->lock, flags); - if (deferred_wq) - destroy_workqueue(deferred_wq); - scmi_vio_channel_release(vioch); /* Let any possibly concurrent RX path release the channel */ @@ -416,6 +410,11 @@ static bool virtio_chan_available(struct device *dev, int idx) return vioch && !vioch->cinfo; } +static void scmi_destroy_tx_workqueue(void *deferred_tx_wq) +{ + destroy_workqueue(deferred_tx_wq); +} + static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, bool tx) { @@ -430,6 +429,8 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, /* Setup a deferred worker for polling. */ if (tx && !vioch->deferred_tx_wq) { + int ret; + vioch->deferred_tx_wq = alloc_workqueue(dev_name(&scmi_vdev->dev), WQ_UNBOUND | WQ_FREEZABLE | WQ_SYSFS, @@ -437,6 +438,11 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev, if (!vioch->deferred_tx_wq) return -ENOMEM; + ret = devm_add_action_or_reset(dev, scmi_destroy_tx_workqueue, + vioch->deferred_tx_wq); + if (ret) + return ret; + INIT_WORK(&vioch->deferred_tx_work, scmi_vio_deferred_tx_worker); } From c4a7b9b587ca1bb4678d48d8be7132492b23a81c Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 28 Oct 2022 15:08:33 +0100 Subject: [PATCH 133/328] arm64: dts: juno: Add thermal critical trip points When thermnal zones are defined, trip points definitions are mandatory. Define a couple of critical trip points for monitoring of existing PMIC and SOC thermal zones. This was lost between txt to yaml conversion and was re-enforced recently via the commit 8c596324232d ("dt-bindings: thermal: Fix missing required property") Cc: Rob Herring Cc: Krzysztof Kozlowski Cc: devicetree@vger.kernel.org Signed-off-by: Cristian Marussi Fixes: f7b636a8d83c ("arm64: dts: juno: add thermal zones for scpi sensors") Link: https://lore.kernel.org/r/20221028140833.280091-8-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- arch/arm64/boot/dts/arm/juno-base.dtsi | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index 2f27619d8abd..8b4d280b1e7e 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -751,12 +751,26 @@ polling-delay = <1000>; polling-delay-passive = <100>; thermal-sensors = <&scpi_sensors0 0>; + trips { + pmic_crit0: trip0 { + temperature = <90000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; soc { polling-delay = <1000>; polling-delay-passive = <100>; thermal-sensors = <&scpi_sensors0 3>; + trips { + soc_crit0: trip0 { + temperature = <80000>; + hysteresis = <2000>; + type = "critical"; + }; + }; }; big_cluster_thermal_zone: big-cluster { From 569bea74c94d37785682b11bab76f557520477cd Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Thu, 27 Oct 2022 20:13:53 +0800 Subject: [PATCH 134/328] i2c: piix4: Fix adapter not be removed in piix4_remove() In piix4_probe(), the piix4 adapter will be registered in: piix4_probe() piix4_add_adapters_sb800() / piix4_add_adapter() i2c_add_adapter() Based on the probed device type, piix4_add_adapters_sb800() or single piix4_add_adapter() will be called. For the former case, piix4_adapter_count is set as the number of adapters, while for antoher case it is not set and kept default *zero*. When piix4 is removed, piix4_remove() removes the adapters added in piix4_probe(), basing on the piix4_adapter_count value. Because the count is zero for the single adapter case, the adapter won't be removed and makes the sources allocated for adapter leaked, such as the i2c client and device. These sources can still be accessed by i2c or bus and cause problems. An easily reproduced case is that if a new adapter is registered, i2c will get the leaked adapter and try to call smbus_algorithm, which was already freed: Triggered by: rmmod i2c_piix4 && modprobe max31730 BUG: unable to handle page fault for address: ffffffffc053d860 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 3752 Comm: modprobe Tainted: G Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:i2c_default_probe (drivers/i2c/i2c-core-base.c:2259) i2c_core RSP: 0018:ffff888107477710 EFLAGS: 00000246 ... i2c_detect (drivers/i2c/i2c-core-base.c:2302) i2c_core __process_new_driver (drivers/i2c/i2c-core-base.c:1336) i2c_core bus_for_each_dev (drivers/base/bus.c:301) i2c_for_each_dev (drivers/i2c/i2c-core-base.c:1823) i2c_core i2c_register_driver (drivers/i2c/i2c-core-base.c:1861) i2c_core do_one_initcall (init/main.c:1296) do_init_module (kernel/module/main.c:2455) ... ---[ end trace 0000000000000000 ]--- Fix this problem by correctly set piix4_adapter_count as 1 for the single adapter so it can be normally removed. Fixes: 528d53a1592b ("i2c: piix4: Fix probing of reserved ports on AMD Family 16h Model 30h") Signed-off-by: Chen Zhongjin Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-piix4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 39cb1b7bb865..809fbd014cd6 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -1080,6 +1080,7 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id) "", &piix4_main_adapters[0]); if (retval < 0) return retval; + piix4_adapter_count = 1; } /* Check for auxiliary SMBus on some AMD chipsets */ From cdbf26251d3b35c4ccaea0c3a6de4318f727d3d2 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 20 Oct 2022 16:39:33 +0200 Subject: [PATCH 135/328] i2c: tegra: Allocate DMA memory for DMA engine When the I2C controllers are running in DMA mode, it is the DMA engine that performs the memory accesses rather than the I2C controller. Pass the DMA engine's struct device pointer to the DMA API to make sure the correct DMA operations are used. This fixes an issue where the DMA engine's SMMU stream ID needs to be misleadingly set for the I2C controllers in device tree. Suggested-by: Robin Murphy Signed-off-by: Thierry Reding Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 954022c04cc4..3869c258a529 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -284,6 +284,7 @@ struct tegra_i2c_dev { struct dma_chan *tx_dma_chan; struct dma_chan *rx_dma_chan; unsigned int dma_buf_size; + struct device *dma_dev; dma_addr_t dma_phys; void *dma_buf; @@ -420,7 +421,7 @@ static int tegra_i2c_dma_submit(struct tegra_i2c_dev *i2c_dev, size_t len) static void tegra_i2c_release_dma(struct tegra_i2c_dev *i2c_dev) { if (i2c_dev->dma_buf) { - dma_free_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, + dma_free_coherent(i2c_dev->dma_dev, i2c_dev->dma_buf_size, i2c_dev->dma_buf, i2c_dev->dma_phys); i2c_dev->dma_buf = NULL; } @@ -472,10 +473,13 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev) i2c_dev->tx_dma_chan = chan; + WARN_ON(i2c_dev->tx_dma_chan->device != i2c_dev->rx_dma_chan->device); + i2c_dev->dma_dev = chan->device->dev; + i2c_dev->dma_buf_size = i2c_dev->hw->quirks->max_write_len + I2C_PACKET_HEADER_SIZE; - dma_buf = dma_alloc_coherent(i2c_dev->dev, i2c_dev->dma_buf_size, + dma_buf = dma_alloc_coherent(i2c_dev->dma_dev, i2c_dev->dma_buf_size, &dma_phys, GFP_KERNEL | __GFP_NOWARN); if (!dma_buf) { dev_err(i2c_dev->dev, "failed to allocate DMA buffer\n"); @@ -1272,7 +1276,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (i2c_dev->dma_mode) { if (i2c_dev->msg_read) { - dma_sync_single_for_device(i2c_dev->dev, + dma_sync_single_for_device(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_FROM_DEVICE); @@ -1280,7 +1284,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (err) return err; } else { - dma_sync_single_for_cpu(i2c_dev->dev, + dma_sync_single_for_cpu(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_TO_DEVICE); } @@ -1293,7 +1297,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, memcpy(i2c_dev->dma_buf + I2C_PACKET_HEADER_SIZE, msg->buf, msg->len); - dma_sync_single_for_device(i2c_dev->dev, + dma_sync_single_for_device(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_TO_DEVICE); @@ -1344,7 +1348,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, } if (i2c_dev->msg_read && i2c_dev->msg_err == I2C_ERR_NONE) { - dma_sync_single_for_cpu(i2c_dev->dev, + dma_sync_single_for_cpu(i2c_dev->dma_dev, i2c_dev->dma_phys, xfer_size, DMA_FROM_DEVICE); From d6643d7207c572c1b0305ed505101f15502c6c87 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 6 Oct 2022 16:54:40 +0200 Subject: [PATCH 136/328] i2c: i801: add lis3lv02d's I2C address for Vostro 5568 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dell Vostro 5568 laptop has lis3lv02d, but its i2c address is not known to the kernel. Add this address. Output of "cat /sys/devices/platform/lis3lv02d/position" on Dell Vostro 5568 laptop: - Horizontal: (-18,0,1044) - Front elevated: (522,-18,1080) - Left elevated: (-18,-360,1080) - Upside down: (36,108,-1134) Signed-off-by: Nam Cao Reviewed-by: Jean Delvare Reviewed-by: Pali Rohár Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index e06509edc5f3..1fda1eaa6d6a 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1243,6 +1243,7 @@ static const struct { */ { "Latitude 5480", 0x29 }, { "Vostro V131", 0x1d }, + { "Vostro 5568", 0x29 }, }; static void register_dell_lis3lv02d_i2c_device(struct i801_priv *priv) From 42378a9ca55347102bbf86708776061d8fe3ece2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 28 Oct 2022 19:54:30 -0700 Subject: [PATCH 137/328] bpf, verifier: Fix memory leak in array reallocation for stack state If an error (NULL) is returned by krealloc(), callers of realloc_array() were setting their allocation pointers to NULL, but on error krealloc() does not touch the original allocation. This would result in a memory resource leak. Instead, free the old allocation on the error handling path. The memory leak information is as follows as also reported by Zhengchao: unreferenced object 0xffff888019801800 (size 256): comm "bpf_repo", pid 6490, jiffies 4294959200 (age 17.170s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000b211474b>] __kmalloc_node_track_caller+0x45/0xc0 [<0000000086712a0b>] krealloc+0x83/0xd0 [<00000000139aab02>] realloc_array+0x82/0xe2 [<00000000b1ca41d1>] grow_stack_state+0xfb/0x186 [<00000000cd6f36d2>] check_mem_access.cold+0x141/0x1341 [<0000000081780455>] do_check_common+0x5358/0xb350 [<0000000015f6b091>] bpf_check.cold+0xc3/0x29d [<000000002973c690>] bpf_prog_load+0x13db/0x2240 [<00000000028d1644>] __sys_bpf+0x1605/0x4ce0 [<00000000053f29bd>] __x64_sys_bpf+0x75/0xb0 [<0000000056fedaf5>] do_syscall_64+0x35/0x80 [<000000002bd58261>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: c69431aab67a ("bpf: verifier: Improve function state reallocation") Reported-by: Zhengchao Shao Reported-by: Kees Cook Signed-off-by: Kees Cook Signed-off-by: Daniel Borkmann Reviewed-by: Bill Wendling Cc: Lorenz Bauer Link: https://lore.kernel.org/bpf/20221029025433.2533810-1-keescook@chromium.org --- kernel/bpf/verifier.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7f0a9f6cb889..dd9019c8b0db 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1027,12 +1027,17 @@ out: */ static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) { + void *new_arr; + if (!new_n || old_n == new_n) goto out; - arr = krealloc_array(arr, new_n, size, GFP_KERNEL); - if (!arr) + new_arr = krealloc_array(arr, new_n, size, GFP_KERNEL); + if (!new_arr) { + kfree(arr); return NULL; + } + arr = new_arr; if (new_n > old_n) memset(arr + old_n * size, 0, (new_n - old_n) * size); From ff52fe006fdeacee49745dabed26154db52a6343 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Sun, 9 Oct 2022 10:35:27 +0800 Subject: [PATCH 138/328] drm/imx: Kconfig: Remove duplicated 'select DRM_KMS_HELPER' line A duplicated line 'select DRM_KMS_HELPER' was introduced in Kconfig file by commit 09717af7d13d ("drm: Remove CONFIG_DRM_KMS_CMA_HELPER option"), so remove it. Fixes: 09717af7d13d ("drm: Remove CONFIG_DRM_KMS_CMA_HELPER option") Signed-off-by: Liu Ying Reviewed-by: Philipp Zabel Signed-off-by: Philipp Zabel Link: https://patchwork.freedesktop.org/patch/msgid/20221009023527.3669647-1-victor.liu@nxp.com --- drivers/gpu/drm/imx/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/imx/Kconfig b/drivers/gpu/drm/imx/Kconfig index 975de4ff7313..fd5b2471fdf0 100644 --- a/drivers/gpu/drm/imx/Kconfig +++ b/drivers/gpu/drm/imx/Kconfig @@ -4,7 +4,6 @@ config DRM_IMX select DRM_KMS_HELPER select VIDEOMODE_HELPERS select DRM_GEM_DMA_HELPER - select DRM_KMS_HELPER depends on DRM && (ARCH_MXC || ARCH_MULTIPLATFORM || COMPILE_TEST) depends on IMX_IPUV3_CORE help From fc007fb815ab5395c3962c09b79a1630b0fbed9c Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Tue, 13 Sep 2022 13:55:44 -0700 Subject: [PATCH 139/328] drm/imx: imx-tve: Fix return type of imx_tve_connector_mode_valid The mode_valid field in drm_connector_helper_funcs is expected to be of type: enum drm_mode_status (* mode_valid) (struct drm_connector *connector, struct drm_display_mode *mode); The mismatched return type breaks forward edge kCFI since the underlying function definition does not match the function hook definition. The return type of imx_tve_connector_mode_valid should be changed from int to enum drm_mode_status. Reported-by: Dan Carpenter Link: https://github.com/ClangBuiltLinux/linux/issues/1703 Cc: llvm@lists.linux.dev Signed-off-by: Nathan Huckleberry Reviewed-by: Nathan Chancellor Reviewed-by: Fabio Estevam Reviewed-by: Philipp Zabel Signed-off-by: Philipp Zabel Link: https://patchwork.freedesktop.org/patch/msgid/20220913205544.155106-1-nhuck@google.com --- drivers/gpu/drm/imx/imx-tve.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c index 6b34fac3f73a..ab4d1c878fda 100644 --- a/drivers/gpu/drm/imx/imx-tve.c +++ b/drivers/gpu/drm/imx/imx-tve.c @@ -218,8 +218,9 @@ static int imx_tve_connector_get_modes(struct drm_connector *connector) return ret; } -static int imx_tve_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +imx_tve_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { struct imx_tve *tve = con_to_tve(connector); unsigned long rate; From 4151bb636acf32bb2e6126cec8216b023117c0e9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Nov 2022 12:19:51 +0000 Subject: [PATCH 140/328] KVM: arm64: Fix SMPRI_EL1/TPIDR2_EL0 trapping on VHE The trapping of SMPRI_EL1 and TPIDR2_EL0 currently only really work on nVHE, as only this mode uses the fine-grained trapping that controls these two registers. Move the trapping enable/disable code into __{de,}activate_traps_common(), allowing it to be called when it actually matters on VHE, and remove the flipping of EL2 control for TPIDR2_EL0, which only affects the host access of this register. Fixes: 861262ab8627 ("KVM: arm64: Handle SME host state when running guests") Reported-by: Mark Brown Reviewed-by: Mark Brown Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/86bkpqer4z.wl-maz@kernel.org --- arch/arm64/kvm/hyp/include/hyp/switch.h | 20 +++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/switch.c | 26 ------------------------- arch/arm64/kvm/hyp/vhe/switch.c | 8 -------- 3 files changed, 20 insertions(+), 34 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 6cbbb6c02f66..3330d1b76bdd 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -87,6 +87,17 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + + if (cpus_have_final_cap(ARM64_SME)) { + sysreg_clear_set_s(SYS_HFGRTR_EL2, + HFGxTR_EL2_nSMPRI_EL1_MASK | + HFGxTR_EL2_nTPIDR2_EL0_MASK, + 0); + sysreg_clear_set_s(SYS_HFGWTR_EL2, + HFGxTR_EL2_nSMPRI_EL1_MASK | + HFGxTR_EL2_nTPIDR2_EL0_MASK, + 0); + } } static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) @@ -96,6 +107,15 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(0, hstr_el2); if (kvm_arm_support_pmu_v3()) write_sysreg(0, pmuserenr_el0); + + if (cpus_have_final_cap(ARM64_SME)) { + sysreg_clear_set_s(SYS_HFGRTR_EL2, 0, + HFGxTR_EL2_nSMPRI_EL1_MASK | + HFGxTR_EL2_nTPIDR2_EL0_MASK); + sysreg_clear_set_s(SYS_HFGWTR_EL2, 0, + HFGxTR_EL2_nSMPRI_EL1_MASK | + HFGxTR_EL2_nTPIDR2_EL0_MASK); + } } static inline void ___activate_traps(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8e9d49a964be..c2cb46ca4fb6 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -55,18 +55,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu) write_sysreg(val, cptr_el2); write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); - if (cpus_have_final_cap(ARM64_SME)) { - val = read_sysreg_s(SYS_HFGRTR_EL2); - val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK | - HFGxTR_EL2_nSMPRI_EL1_MASK); - write_sysreg_s(val, SYS_HFGRTR_EL2); - - val = read_sysreg_s(SYS_HFGWTR_EL2); - val &= ~(HFGxTR_EL2_nTPIDR2_EL0_MASK | - HFGxTR_EL2_nSMPRI_EL1_MASK); - write_sysreg_s(val, SYS_HFGWTR_EL2); - } - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; @@ -110,20 +98,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); - if (cpus_have_final_cap(ARM64_SME)) { - u64 val; - - val = read_sysreg_s(SYS_HFGRTR_EL2); - val |= HFGxTR_EL2_nTPIDR2_EL0_MASK | - HFGxTR_EL2_nSMPRI_EL1_MASK; - write_sysreg_s(val, SYS_HFGRTR_EL2); - - val = read_sysreg_s(SYS_HFGWTR_EL2); - val |= HFGxTR_EL2_nTPIDR2_EL0_MASK | - HFGxTR_EL2_nSMPRI_EL1_MASK; - write_sysreg_s(val, SYS_HFGWTR_EL2); - } - cptr = CPTR_EL2_DEFAULT; if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) cptr |= CPTR_EL2_TZ; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 7acb87eaa092..1a97391fedd2 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -63,10 +63,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu) __activate_traps_fpsimd32(vcpu); } - if (cpus_have_final_cap(ARM64_SME)) - write_sysreg(read_sysreg(sctlr_el2) & ~SCTLR_ELx_ENTP2, - sctlr_el2); - write_sysreg(val, cpacr_el1); write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el1); @@ -88,10 +84,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) */ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); - if (cpus_have_final_cap(ARM64_SME)) - write_sysreg(read_sysreg(sctlr_el2) | SCTLR_ELx_ENTP2, - sctlr_el2); - write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); if (!arm64_kernel_unmapped_at_el0()) From a6dd6f39008bb3ef7c73ef0a2acc2a4209555bd8 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 28 Oct 2022 17:12:19 +0300 Subject: [PATCH 141/328] x86/tdx: Prepare for using "INFO" call for a second purpose The TDG.VP.INFO TDCALL provides the guest with various details about the TDX system that the guest needs to run. Only one field is currently used: 'gpa_width' which tells the guest which PTE bits mark pages shared or private. A second field is now needed: the guest "TD attributes" to tell if virtualization exceptions are configured in a way that can harm the guest. Make the naming and calling convention more generic and discrete from the mask-centric one. Thanks to Sathya for the inspiration here, but there's no code, comments or changelogs left from where he started. Signed-off-by: Dave Hansen Acked-by: Kirill A. Shutemov Tested-by: Kirill A. Shutemov Cc: stable@vger.kernel.org --- arch/x86/coco/tdx/tdx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 928dcf7a20d9..3fee96931ff5 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -98,7 +98,7 @@ static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); } -static u64 get_cc_mask(void) +static void tdx_parse_tdinfo(u64 *cc_mask) { struct tdx_module_output out; unsigned int gpa_width; @@ -121,7 +121,7 @@ static u64 get_cc_mask(void) * The highest bit of a guest physical address is the "sharing" bit. * Set it for shared pages and clear it for private pages. */ - return BIT_ULL(gpa_width - 1); + *cc_mask = BIT_ULL(gpa_width - 1); } /* @@ -758,7 +758,7 @@ void __init tdx_early_init(void) setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); cc_set_vendor(CC_VENDOR_INTEL); - cc_mask = get_cc_mask(); + tdx_parse_tdinfo(&cc_mask); cc_set_mask(cc_mask); /* From 024f4b2e1f874934943eb2d3d288ebc52c79f55c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 17 Oct 2022 10:01:57 +0100 Subject: [PATCH 142/328] arm64: entry: avoid kprobe recursion The cortex_a76_erratum_1463225_debug_handler() function is called when handling debug exceptions (and synchronous exceptions from BRK instructions), and so is called when a probed function executes. If the compiler does not inline cortex_a76_erratum_1463225_debug_handler(), it can be probed. If cortex_a76_erratum_1463225_debug_handler() is probed, any debug exception or software breakpoint exception will result in recursive exceptions leading to a stack overflow. This can be triggered with the ftrace multiple_probes selftest, and as per the example splat below. This is a regression caused by commit: 6459b8469753e9fe ("arm64: entry: consolidate Cortex-A76 erratum 1463225 workaround") ... which removed the NOKPROBE_SYMBOL() annotation associated with the function. My intent was that cortex_a76_erratum_1463225_debug_handler() would be inlined into its caller, el1_dbg(), which is marked noinstr and cannot be probed. Mark cortex_a76_erratum_1463225_debug_handler() as __always_inline to ensure this. Example splat prior to this patch (with recursive entries elided): | # echo p cortex_a76_erratum_1463225_debug_handler > /sys/kernel/debug/tracing/kprobe_events | # echo p do_el0_svc >> /sys/kernel/debug/tracing/kprobe_events | # echo 1 > /sys/kernel/debug/tracing/events/kprobes/enable | Insufficient stack space to handle exception! | ESR: 0x0000000096000047 -- DABT (current EL) | FAR: 0xffff800009cefff0 | Task stack: [0xffff800009cf0000..0xffff800009cf4000] | IRQ stack: [0xffff800008000000..0xffff800008004000] | Overflow stack: [0xffff00007fbc00f0..0xffff00007fbc10f0] | CPU: 0 PID: 145 Comm: sh Not tainted 6.0.0 #2 | Hardware name: linux,dummy-virt (DT) | pstate: 604003c5 (nZCv DAIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : arm64_enter_el1_dbg+0x4/0x20 | lr : el1_dbg+0x24/0x5c | sp : ffff800009cf0000 | x29: ffff800009cf0000 x28: ffff000002c74740 x27: 0000000000000000 | x26: 0000000000000000 x25: 0000000000000000 x24: 0000000000000000 | x23: 00000000604003c5 x22: ffff80000801745c x21: 0000aaaac95ac068 | x20: 00000000f2000004 x19: ffff800009cf0040 x18: 0000000000000000 | x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 | x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 | x11: 0000000000000010 x10: ffff800008c87190 x9 : ffff800008ca00d0 | x8 : 000000000000003c x7 : 0000000000000000 x6 : 0000000000000000 | x5 : 0000000000000000 x4 : 0000000000000000 x3 : 00000000000043a4 | x2 : 00000000f2000004 x1 : 00000000f2000004 x0 : ffff800009cf0040 | Kernel panic - not syncing: kernel stack overflow | CPU: 0 PID: 145 Comm: sh Not tainted 6.0.0 #2 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0xe4/0x104 | show_stack+0x18/0x4c | dump_stack_lvl+0x64/0x7c | dump_stack+0x18/0x38 | panic+0x14c/0x338 | test_taint+0x0/0x2c | panic_bad_stack+0x104/0x118 | handle_bad_stack+0x34/0x48 | __bad_stack+0x78/0x7c | arm64_enter_el1_dbg+0x4/0x20 | el1h_64_sync_handler+0x40/0x98 | el1h_64_sync+0x64/0x68 | cortex_a76_erratum_1463225_debug_handler+0x0/0x34 ... | el1h_64_sync_handler+0x40/0x98 | el1h_64_sync+0x64/0x68 | cortex_a76_erratum_1463225_debug_handler+0x0/0x34 ... | el1h_64_sync_handler+0x40/0x98 | el1h_64_sync+0x64/0x68 | cortex_a76_erratum_1463225_debug_handler+0x0/0x34 | el1h_64_sync_handler+0x40/0x98 | el1h_64_sync+0x64/0x68 | do_el0_svc+0x0/0x28 | el0t_64_sync_handler+0x84/0xf0 | el0t_64_sync+0x18c/0x190 | Kernel Offset: disabled | CPU features: 0x0080,00005021,19001080 | Memory Limit: none | ---[ end Kernel panic - not syncing: kernel stack overflow ]--- With this patch, cortex_a76_erratum_1463225_debug_handler() is inlined into el1_dbg(), and el1_dbg() cannot be probed: | # echo p cortex_a76_erratum_1463225_debug_handler > /sys/kernel/debug/tracing/kprobe_events | sh: write error: No such file or directory | # grep -w cortex_a76_erratum_1463225_debug_handler /proc/kallsyms | wc -l | 0 | # echo p el1_dbg > /sys/kernel/debug/tracing/kprobe_events | sh: write error: Invalid argument | # grep -w el1_dbg /proc/kallsyms | wc -l | 1 Fixes: 6459b8469753 ("arm64: entry: consolidate Cortex-A76 erratum 1463225 workaround") Cc: # 5.12.x Signed-off-by: Mark Rutland Cc: Will Deacon Link: https://lore.kernel.org/r/20221017090157.2881408-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry-common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 9173fad279af..27369fa1c032 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -329,7 +329,8 @@ static void cortex_a76_erratum_1463225_svc_handler(void) __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0); } -static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +static __always_inline bool +cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) { if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) return false; From be0ddf5293a7895a8c9096e1a8560930c6a0ab3f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 1 Nov 2022 11:27:14 +0000 Subject: [PATCH 143/328] arm64: booting: Document our requirements for fine grained traps with SME With SME we require that fine grained traps on access to TPIDR2_EL0 and SMPRI_EL1 are disabled but did not document that fact. Add the relevant register bits. Signed-off-by: Mark Brown Reviewed-by: Oliver Upton Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221101112716.52035-2-broonie@kernel.org --- Documentation/arm64/booting.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 8aefa1001ae5..8c324ad638de 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -340,6 +340,14 @@ Before jumping into the kernel, the following conditions must be met: - SMCR_EL2.LEN must be initialised to the same value for all CPUs the kernel will execute on. + - HWFGRTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01. + + - HWFGWTR_EL2.nTPIDR2_EL0 (bit 55) must be initialised to 0b01. + + - HWFGRTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01. + + - HWFGWTR_EL2.nSMPRI_EL1 (bit 54) must be initialised to 0b01. + For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64) - If EL3 is present: From 8ec95b94716a1e4d126edc3fb2bc426a717e2dba Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 1 Nov 2022 09:31:36 +0800 Subject: [PATCH 144/328] bpf, sockmap: Fix the sk->sk_forward_alloc warning of sk_stream_kill_queues When running `test_sockmap` selftests, the following warning appears: WARNING: CPU: 2 PID: 197 at net/core/stream.c:205 sk_stream_kill_queues+0xd3/0xf0 Call Trace: inet_csk_destroy_sock+0x55/0x110 tcp_rcv_state_process+0xd28/0x1380 ? tcp_v4_do_rcv+0x77/0x2c0 tcp_v4_do_rcv+0x77/0x2c0 __release_sock+0x106/0x130 __tcp_close+0x1a7/0x4e0 tcp_close+0x20/0x70 inet_release+0x3c/0x80 __sock_release+0x3a/0xb0 sock_close+0x14/0x20 __fput+0xa3/0x260 task_work_run+0x59/0xb0 exit_to_user_mode_prepare+0x1b3/0x1c0 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x48/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae The root case is in commit 84472b436e76 ("bpf, sockmap: Fix more uncharged while msg has more_data"), where I used msg->sg.size to replace the tosend, causing breakage: if (msg->apply_bytes && msg->apply_bytes < tosend) tosend = psock->apply_bytes; Fixes: 84472b436e76 ("bpf, sockmap: Fix more uncharged while msg has more_data") Reported-by: Jakub Sitnicki Signed-off-by: Wang Yufen Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/1667266296-8794-1-git-send-email-wangyufen@huawei.com --- net/ipv4/tcp_bpf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index c501c329b1db..cf9c3e8f7ccb 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -278,7 +278,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, { bool cork = false, enospc = sk_msg_full(msg); struct sock *sk_redir; - u32 tosend, delta = 0; + u32 tosend, origsize, sent, delta = 0; u32 eval = __SK_NONE; int ret; @@ -333,10 +333,12 @@ more_data: cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, msg->sg.size); + sk_msg_return(sk, msg, tosend); release_sock(sk); + origsize = msg->sg.size; ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); + sent = origsize - msg->sg.size; if (eval == __SK_REDIRECT) sock_put(sk_redir); @@ -375,7 +377,7 @@ more_data: msg->sg.data[msg->sg.start].page_link && msg->sg.data[msg->sg.start].length) { if (eval == __SK_REDIRECT) - sk_mem_charge(sk, msg->sg.size); + sk_mem_charge(sk, tosend - sent); goto more_data; } } From 71ee71d7adcba648077997a29a91158d20c40b09 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Tue, 1 Nov 2022 01:41:00 -0600 Subject: [PATCH 145/328] cxl/region: Fix decoder allocation crash When an intermediate port's decoders have been exhausted by existing regions, and creating a new region with the port in question in it's hierarchical path is attempted, cxl_port_attach_region() fails to find a port decoder (as would be expected), and drops into the failure / cleanup path. However, during cleanup of the region reference, a sanity check attempts to dereference the decoder, which in the above case didn't exist. This causes a NULL pointer dereference BUG. To fix this, refactor the decoder allocation and de-allocation into helper routines, and in this 'free' routine, check that the decoder, @cxld, is valid before attempting any operations on it. Cc: Suggested-by: Dan Williams Signed-off-by: Vishal Verma Reviewed-by: Dave Jiang Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders") Link: https://lore.kernel.org/r/20221101074100.1732003-1-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 67 ++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c49d9a5f1091..bb6f4fc84a3f 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -687,18 +687,27 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, return cxl_rr; } -static void free_region_ref(struct cxl_region_ref *cxl_rr) +static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr) { - struct cxl_port *port = cxl_rr->port; struct cxl_region *cxlr = cxl_rr->region; struct cxl_decoder *cxld = cxl_rr->decoder; + if (!cxld) + return; + dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n"); if (cxld->region == cxlr) { cxld->region = NULL; put_device(&cxlr->dev); } +} +static void free_region_ref(struct cxl_region_ref *cxl_rr) +{ + struct cxl_port *port = cxl_rr->port; + struct cxl_region *cxlr = cxl_rr->region; + + cxl_rr_free_decoder(cxl_rr); xa_erase(&port->regions, (unsigned long)cxlr); xa_destroy(&cxl_rr->endpoints); kfree(cxl_rr); @@ -729,6 +738,33 @@ static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr, return 0; } +static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, + struct cxl_region_ref *cxl_rr) +{ + struct cxl_decoder *cxld; + + if (port == cxled_to_port(cxled)) + cxld = &cxled->cxld; + else + cxld = cxl_region_find_decoder(port, cxlr); + if (!cxld) { + dev_dbg(&cxlr->dev, "%s: no decoder available\n", + dev_name(&port->dev)); + return -EBUSY; + } + + if (cxld->region) { + dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", + dev_name(&port->dev), dev_name(&cxld->dev), + dev_name(&cxld->region->dev)); + return -EBUSY; + } + + cxl_rr->decoder = cxld; + return 0; +} + /** * cxl_port_attach_region() - track a region's interest in a port by endpoint * @port: port to add a new region reference 'struct cxl_region_ref' @@ -795,12 +831,6 @@ static int cxl_port_attach_region(struct cxl_port *port, cxl_rr->nr_targets++; nr_targets_inc = true; } - - /* - * The decoder for @cxlr was allocated when the region was first - * attached to @port. - */ - cxld = cxl_rr->decoder; } else { cxl_rr = alloc_region_ref(port, cxlr); if (IS_ERR(cxl_rr)) { @@ -811,26 +841,11 @@ static int cxl_port_attach_region(struct cxl_port *port, } nr_targets_inc = true; - if (port == cxled_to_port(cxled)) - cxld = &cxled->cxld; - else - cxld = cxl_region_find_decoder(port, cxlr); - if (!cxld) { - dev_dbg(&cxlr->dev, "%s: no decoder available\n", - dev_name(&port->dev)); + rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr); + if (rc) goto out_erase; - } - - if (cxld->region) { - dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n", - dev_name(&port->dev), dev_name(&cxld->dev), - dev_name(&cxld->region->dev)); - rc = -EBUSY; - goto out_erase; - } - - cxl_rr->decoder = cxld; } + cxld = cxl_rr->decoder; rc = cxl_rr_ep_add(cxl_rr, cxled); if (rc) { From 373e715e31bf4e0f129befe87613a278fac228d3 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 28 Oct 2022 17:12:20 +0300 Subject: [PATCH 146/328] x86/tdx: Panic on bad configs that #VE on "private" memory access All normal kernel memory is "TDX private memory". This includes everything from kernel stacks to kernel text. Handling exceptions on arbitrary accesses to kernel memory is essentially impossible because they can happen in horribly nasty places like kernel entry/exit. But, TDX hardware can theoretically _deliver_ a virtualization exception (#VE) on any access to private memory. But, it's not as bad as it sounds. TDX can be configured to never deliver these exceptions on private memory with a "TD attribute" called ATTR_SEPT_VE_DISABLE. The guest has no way to *set* this attribute, but it can check it. Ensure ATTR_SEPT_VE_DISABLE is set in early boot. panic() if it is unset. There is no sane way for Linux to run with this attribute clear so a panic() is appropriate. There's small window during boot before the check where kernel has an early #VE handler. But the handler is only for port I/O and will also panic() as soon as it sees any other #VE, such as a one generated by a private memory access. [ dhansen: Rewrite changelog and rebase on new tdx_parse_tdinfo(). Add Kirill's tested-by because I made changes since he wrote this. ] Fixes: 9a22bf6debbf ("x86/traps: Add #VE support for TDX guest") Reported-by: ruogui.ygr@alibaba-inc.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Tested-by: Kirill A. Shutemov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20221028141220.29217-3-kirill.shutemov%40linux.intel.com --- arch/x86/coco/tdx/tdx.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 3fee96931ff5..b8998cf0508a 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -34,6 +34,8 @@ #define VE_GET_PORT_NUM(e) ((e) >> 16) #define VE_IS_IO_STRING(e) ((e) & BIT(4)) +#define ATTR_SEPT_VE_DISABLE BIT(28) + /* * Wrapper for standard use of __tdx_hypercall with no output aside from * return code. @@ -102,6 +104,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask) { struct tdx_module_output out; unsigned int gpa_width; + u64 td_attr; /* * TDINFO TDX module call is used to get the TD execution environment @@ -109,19 +112,27 @@ static void tdx_parse_tdinfo(u64 *cc_mask) * information, etc. More details about the ABI can be found in TDX * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL * [TDG.VP.INFO]. - * - * The GPA width that comes out of this call is critical. TDX guests - * can not meaningfully run without it. */ tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out); - gpa_width = out.rcx & GENMASK(5, 0); - /* * The highest bit of a guest physical address is the "sharing" bit. * Set it for shared pages and clear it for private pages. + * + * The GPA width that comes out of this call is critical. TDX guests + * can not meaningfully run without it. */ + gpa_width = out.rcx & GENMASK(5, 0); *cc_mask = BIT_ULL(gpa_width - 1); + + /* + * The kernel can not handle #VE's when accessing normal kernel + * memory. Ensure that no #VE will be delivered for accesses to + * TD-private memory. Only VMM-shared memory (MMIO) will #VE. + */ + td_attr = out.rdx; + if (!(td_attr & ATTR_SEPT_VE_DISABLE)) + panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n"); } /* From 7a263a0402561035199cd9049baadb908a92b6b4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 24 Oct 2022 04:10:55 +0900 Subject: [PATCH 147/328] kconfig: fix segmentation fault in menuconfig search Since commit d05377e184fc ("kconfig: Create links to main menu items in search"), menuconfig shows a jump key next to "Main menu" if the nearest visible parent is the rootmenu. If you press that jump key, menuconfig crashes with a segmentation fault. For example, do this: $ make ARCH=arm64 allnoconfig menuconfig Press '/' to search for the string "ACPI". Press '1' to choose "(1) Main menu". Then, menuconfig crashed with a segmentation fault. The following code in search_conf() conf(targets[i]->parent, targets[i]); results in NULL pointer dereference because targets[i] is the rootmenu, which does not have a parent. Commit d05377e184fc tried to fix the issue of top-level items not having a jump key, but adding the "Main menu" was not the right fix. The correct fix is to show the searched item itself. This fixes another weird behavior described in the comment block. Fixes: d05377e184fc ("kconfig: Create links to main menu items in search") Reported-by: Johannes Zink Signed-off-by: Masahiro Yamada Tested-by: Bagas Sanjaya Tested-by: Johannes Zink --- scripts/kconfig/menu.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c index 62b6313f51c8..109325f31bef 100644 --- a/scripts/kconfig/menu.c +++ b/scripts/kconfig/menu.c @@ -722,8 +722,8 @@ static void get_prompt_str(struct gstr *r, struct property *prop, if (!expr_eq(prop->menu->dep, prop->visible.expr)) get_dep_str(r, prop->visible.expr, " Visible if: "); - menu = prop->menu->parent; - for (i = 0; menu && i < 8; menu = menu->parent) { + menu = prop->menu; + for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent) { bool accessible = menu_is_visible(menu); submenu[i++] = menu; @@ -733,16 +733,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, if (head && location) { jump = xmalloc(sizeof(struct jump_key)); - if (menu_is_visible(prop->menu)) { - /* - * There is not enough room to put the hint at the - * beginning of the "Prompt" line. Put the hint on the - * last "Location" line even when it would belong on - * the former. - */ - jump->target = prop->menu; - } else - jump->target = location; + jump->target = location; if (list_empty(head)) jump->index = 0; @@ -758,13 +749,7 @@ static void get_prompt_str(struct gstr *r, struct property *prop, menu = submenu[i]; if (jump && menu == location) jump->offset = strlen(r->s); - - if (menu == &rootmenu) - /* The real rootmenu prompt is ugly */ - str_printf(r, "%*cMain menu", j, ' '); - else - str_printf(r, "%*c-> %s", j, ' ', menu_get_prompt(menu)); - + str_printf(r, "%*c-> %s", j, ' ', menu_get_prompt(menu)); if (menu->sym) { str_printf(r, " (%s [=%s])", menu->sym->name ? menu->sym->name : "", From 780854186946e0de2be192ee7fa5125666533b3a Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 26 Oct 2022 14:39:59 +0800 Subject: [PATCH 148/328] wifi: mac80211: fix general-protection-fault in ieee80211_subif_start_xmit() When device is running and the interface status is changed, the gpf issue is triggered. The problem triggering process is as follows: Thread A: Thread B ieee80211_runtime_change_iftype() process_one_work() ... ... ieee80211_do_stop() ... ... ... sdata->bss = NULL ... ... ieee80211_subif_start_xmit() ieee80211_multicast_to_unicast //!sdata->bss->multicast_to_unicast cause gpf issue When the interface status is changed, the sending queue continues to send packets. After the bss is set to NULL, the bss is accessed. As a result, this causes a general-protection-fault issue. The following is the stack information: general protection fault, probably for non-canonical address 0xdffffc000000002f: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000178-0x000000000000017f] Workqueue: mld mld_ifc_work RIP: 0010:ieee80211_subif_start_xmit+0x25b/0x1310 Call Trace: dev_hard_start_xmit+0x1be/0x990 __dev_queue_xmit+0x2c9a/0x3b60 ip6_finish_output2+0xf92/0x1520 ip6_finish_output+0x6af/0x11e0 ip6_output+0x1ed/0x540 mld_sendpack+0xa09/0xe70 mld_ifc_work+0x71c/0xdb0 process_one_work+0x9bf/0x1710 worker_thread+0x665/0x1080 kthread+0x2e4/0x3a0 ret_from_fork+0x1f/0x30 Fixes: f856373e2f31 ("wifi: mac80211: do not wake queues on a vif that is being stopped") Reported-by: syzbot+c6e8fca81c294fd5620a@syzkaller.appspotmail.com Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221026063959.177813-1-shaozhengchao@huawei.com Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a364148149f9..874f2a4d831d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4418,6 +4418,11 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, if (likely(!is_multicast_ether_addr(eth->h_dest))) goto normal; + if (unlikely(!ieee80211_sdata_running(sdata))) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + if (unlikely(ieee80211_multicast_to_unicast(skb, dev))) { struct sk_buff_head queue; From 39e7b5de9853bd92ddbfa4b14165babacd7da0ba Mon Sep 17 00:00:00 2001 From: Nicolas Cavallari Date: Thu, 27 Oct 2022 16:01:33 +0200 Subject: [PATCH 149/328] wifi: mac80211: Fix ack frame idr leak when mesh has no route When trying to transmit an data frame with tx_status to a destination that have no route in the mesh, then it is dropped without recrediting the ack_status_frames idr. Once it is exhausted, wpa_supplicant starts failing to do SAE with NL80211_CMD_FRAME and logs "nl80211: Frame command failed". Use ieee80211_free_txskb() instead of kfree_skb() to fix it. Signed-off-by: Nicolas Cavallari Link: https://lore.kernel.org/r/20221027140133.1504-1-nicolas.cavallari@green-communications.fr Signed-off-by: Johannes Berg --- net/mac80211/mesh_pathtbl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index acc1c299f1ae..69d5e1ec6ede 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -710,7 +710,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath) void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - kfree_skb(skb); + ieee80211_free_txskb(&sdata->local->hw, skb); sdata->u.mesh.mshstats.dropped_frames_no_route++; } From 30ac96f7cc973bb850c718c9bbe1fdcedfbe826b Mon Sep 17 00:00:00 2001 From: Howard Hsu Date: Thu, 27 Oct 2022 09:56:53 +0800 Subject: [PATCH 150/328] wifi: mac80211: Set TWT Information Frame Disabled bit as 1 The TWT Information Frame Disabled bit of control field of TWT Setup frame shall be set to 1 since handling TWT Information frame is not supported by current mac80211 implementation. Fixes: f5a4c24e689f ("mac80211: introduce individual TWT support in AP mode") Signed-off-by: Howard Hsu Link: https://lore.kernel.org/r/20221027015653.1448-1-howard-yh.hsu@mediatek.com Signed-off-by: Johannes Berg --- net/mac80211/s1g.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index 8ca7d45d6daa..c1f964e9991c 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -112,6 +112,9 @@ ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata, goto out; } + /* TWT Information not supported yet */ + twt->control |= IEEE80211_TWT_CONTROL_RX_DISABLED; + drv_add_twt_setup(sdata->local, sdata, &sta->sta, twt); out: ieee80211_s1g_send_twt_setup(sdata, mgmt->sa, sdata->vif.addr, twt); From 89c1017aac67ca81973b7c8eac5d021315811a93 Mon Sep 17 00:00:00 2001 From: Zhao Gongyi Date: Tue, 1 Nov 2022 11:56:02 +0800 Subject: [PATCH 151/328] selftests/pidfd_test: Remove the erroneous ',' Remove the erroneous ',', otherwise it might result in wrong output and report: ... Bail out! (errno %d) test: Unexpected epoll_wait result (c=4208480, events=2) ... Fixes: 740378dc7834 ("pidfd: add polling selftests") Signed-off-by: Zhao Gongyi Signed-off-by: Shuah Khan --- tools/testing/selftests/pidfd/pidfd_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index d36654265b7a..e2dd4ed84984 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -413,7 +413,7 @@ static void poll_pidfd(const char *test_name, int pidfd) c = epoll_wait(epoll_fd, events, MAX_EVENTS, 5000); if (c != 1 || !(events[0].events & EPOLLIN)) - ksft_exit_fail_msg("%s test: Unexpected epoll_wait result (c=%d, events=%x) ", + ksft_exit_fail_msg("%s test: Unexpected epoll_wait result (c=%d, events=%x) " "(errno %d)\n", test_name, c, events[0].events, errno); From 34de8e6e0e1f66e431abf4123934a2581cb5f133 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Wed, 2 Nov 2022 16:40:34 +0800 Subject: [PATCH 152/328] bpftool: Fix NULL pointer dereference when pin {PROG, MAP, LINK} without FILE When using bpftool to pin {PROG, MAP, LINK} without FILE, segmentation fault will occur. The reson is that the lack of FILE will cause strlen to trigger NULL pointer dereference. The corresponding stacktrace is shown below: do_pin do_pin_any do_pin_fd mount_bpffs_for_pin strlen(name) <- NULL pointer dereference Fix it by adding validation to the common process. Fixes: 75a1e792c335 ("tools: bpftool: Allow all prog/map handles for pinning objects") Signed-off-by: Pu Lehui Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20221102084034.3342995-1-pulehui@huaweicloud.com --- tools/bpf/bpftool/common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 8727765add88..0cdb4f711510 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -300,6 +300,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) int err; int fd; + if (!REQ_ARGS(3)) + return -EINVAL; + fd = get_fd(&argc, &argv); if (fd < 0) return fd; From 4b18cb3f74dcfc183c2434e17bfce09ce6302e37 Mon Sep 17 00:00:00 2001 From: David Gow Date: Wed, 26 Oct 2022 22:10:40 +0800 Subject: [PATCH 153/328] perf/hw_breakpoint: test: Skip the test if dependencies unmet Running the test currently fails on non-SMP systems, despite being enabled by default. This means that running the test with: ./tools/testing/kunit/kunit.py run --arch x86_64 hw_breakpoint results in every hw_breakpoint test failing with: # test_one_cpu: failed to initialize: -22 not ok 1 - test_one_cpu Instead, use kunit_skip(), which will mark the test as skipped, and give a more comprehensible message: ok 1 - test_one_cpu # SKIP not enough cpus This makes it more obvious that the test is not suited to the test environment, and so wasn't run, rather than having run and failed. Signed-off-by: David Gow Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Latypov Acked-by: Marco Elver Link: https://lore.kernel.org/r/20221026141040.1609203-1-davidgow@google.com --- kernel/events/hw_breakpoint_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/hw_breakpoint_test.c b/kernel/events/hw_breakpoint_test.c index 5ced822df788..c57610f52bb4 100644 --- a/kernel/events/hw_breakpoint_test.c +++ b/kernel/events/hw_breakpoint_test.c @@ -295,11 +295,11 @@ static int test_init(struct kunit *test) { /* Most test cases want 2 distinct CPUs. */ if (num_online_cpus() < 2) - return -EINVAL; + kunit_skip(test, "not enough cpus"); /* Want the system to not use breakpoints elsewhere. */ if (hw_breakpoint_is_used()) - return -EBUSY; + kunit_skip(test, "hw breakpoint already in use"); return 0; } From 80275ca9e525c198c7efe045c4a6cdb68a2ea763 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 24 Sep 2022 13:47:37 +0800 Subject: [PATCH 154/328] perf/x86/rapl: Use standard Energy Unit for SPR Dram RAPL domain Intel Xeon servers used to use a fixed energy resolution (15.3uj) for Dram RAPL domain. But on SPR, Dram RAPL domain follows the standard energy resolution as described in MSR_RAPL_POWER_UNIT. Remove the SPR Dram energy unit quirk. Fixes: bcfd218b6679 ("perf/x86/rapl: Add support for Intel SPR platform") Signed-off-by: Zhang Rui Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kan Liang Tested-by: Wang Wendy Link: https://lkml.kernel.org/r/20220924054738.12076-3-rui.zhang@intel.com --- arch/x86/events/rapl.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index fea544e5842a..a829492bca4c 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -619,12 +619,8 @@ static int rapl_check_hw_unit(struct rapl_model *rm) case RAPL_UNIT_QUIRK_INTEL_HSW: rapl_hw_unit[PERF_RAPL_RAM] = 16; break; - /* - * SPR shares the same DRAM domain energy unit as HSW, plus it - * also has a fixed energy unit for Psys domain. - */ + /* SPR uses a fixed energy unit for Psys domain. */ case RAPL_UNIT_QUIRK_INTEL_SPR: - rapl_hw_unit[PERF_RAPL_RAM] = 16; rapl_hw_unit[PERF_RAPL_PSYS] = 0; break; default: From acc5568b90c19ac6375508a93b9676cd18a92a35 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 31 Oct 2022 08:41:18 -0700 Subject: [PATCH 155/328] perf/x86/intel: Fix pebs event constraints for ICL According to the latest event list, update the MEM_INST_RETIRED events which support the DataLA facility. Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support") Reported-by: Jannis Klinkenberg Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221031154119.571386-1-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7839507b3844..41e8d6591777 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -982,8 +982,13 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */ INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */ - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */ From 0916886bb978e7eae1ca3955ba07f51c020da20c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 31 Oct 2022 08:41:19 -0700 Subject: [PATCH 156/328] perf/x86/intel: Fix pebs event constraints for SPR According to the latest event list, update the MEM_INST_RETIRED events which support the DataLA facility for SPR. Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221031154119.571386-2-kan.liang@linux.intel.com --- arch/x86/events/intel/ds.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 41e8d6591777..446d2833efa7 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1009,8 +1009,13 @@ struct event_constraint intel_spr_pebs_event_constraints[] = { INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe), INTEL_PLD_CONSTRAINT(0x1cd, 0xfe), INTEL_PSD_CONSTRAINT(0x2cd, 0x1), - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), - INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), From 6f8faf471446844bb9c318e0340221049d5c19f4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 31 Oct 2022 08:45:50 -0700 Subject: [PATCH 157/328] perf/x86/intel: Add Cooper Lake stepping to isolation_ucodes[] The intel_pebs_isolation quirk checks both model number and stepping. Cooper Lake has a different stepping (11) than the other Skylake Xeon. It cannot benefit from the optimization in commit 9b545c04abd4f ("perf/x86/kvm: Avoid unnecessary work in guest filtering"). Add the stepping of Cooper Lake into the isolation_ucodes[] table. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221031154550.571663-1-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a646a5f9a235..1b92bf05fd65 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4911,6 +4911,7 @@ static const struct x86_cpu_desc isolation_ucodes[] = { INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), From 5614dc3a47e3310fbc77ea3b67eaadd1c6417bf1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:37 +0000 Subject: [PATCH 158/328] btrfs: fix inode list leak during backref walking at resolve_indirect_refs() During backref walking, at resolve_indirect_refs(), if we get an error we jump to the 'out' label and call ulist_free() on the 'parents' ulist, which frees all the elements in the ulist - however that does not free any inode lists that may be attached to elements, through the 'aux' field of a ulist node, so we end up leaking lists if we have any attached to the unodes. Fix this by calling free_leaf_list() instead of ulist_free() when we exit from resolve_indirect_refs(). The static function free_leaf_list() is moved up for this to be possible and it's slightly simplified by removing unnecessary code. Fixes: 3301958b7c1d ("Btrfs: add inodes before dropping the extent lock in find_all_leafs") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 4ec18ceb2f21..40afae0af4e6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -648,6 +648,18 @@ unode_aux_to_inode_list(struct ulist_node *node) return (struct extent_inode_elem *)(uintptr_t)node->aux; } +static void free_leaf_list(struct ulist *ulist) +{ + struct ulist_node *node; + struct ulist_iterator uiter; + + ULIST_ITER_INIT(&uiter); + while ((node = ulist_next(ulist, &uiter))) + free_inode_elem_list(unode_aux_to_inode_list(node)); + + ulist_free(ulist); +} + /* * We maintain three separate rbtrees: one for direct refs, one for * indirect refs which have a key, and one for indirect refs which do not @@ -762,7 +774,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, cond_resched(); } out: - ulist_free(parents); + /* + * We may have inode lists attached to refs in the parents ulist, so we + * must free them before freeing the ulist and its refs. + */ + free_leaf_list(parents); return ret; } @@ -1409,24 +1425,6 @@ out: return ret; } -static void free_leaf_list(struct ulist *blocks) -{ - struct ulist_node *node = NULL; - struct extent_inode_elem *eie; - struct ulist_iterator uiter; - - ULIST_ITER_INIT(&uiter); - while ((node = ulist_next(blocks, &uiter))) { - if (!node->aux) - continue; - eie = unode_aux_to_inode_list(node); - free_inode_elem_list(eie); - node->aux = 0; - } - - ulist_free(blocks); -} - /* * Finds all leafs with a reference to the specified combination of bytenr and * offset. key_list_head will point to a list of corresponding keys (caller must From 92876eec382a0f19f33d09d2c939e9ca49038ae5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:38 +0000 Subject: [PATCH 159/328] btrfs: fix inode list leak during backref walking at find_parent_nodes() During backref walking, at find_parent_nodes(), if we are dealing with a data extent and we get an error while resolving the indirect backrefs, at resolve_indirect_refs(), or in the while loop that iterates over the refs in the direct refs rbtree, we end up leaking the inode lists attached to the direct refs we have in the direct refs rbtree that were not yet added to the refs ulist passed as argument to find_parent_nodes(). Since they were not yet added to the refs ulist and prelim_release() does not free the lists, on error the caller can only free the lists attached to the refs that were added to the refs ulist, all the remaining refs get their inode lists never freed, therefore leaking their memory. Fix this by having prelim_release() always free any attached inode list to each ref found in the rbtree, and have find_parent_nodes() set the ref's inode list to NULL once it transfers ownership of the inode list to a ref added to the refs ulist passed to find_parent_nodes(). Fixes: 86d5f9944252 ("btrfs: convert prelimary reference tracking to use rbtrees") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 40afae0af4e6..18374a6d05bd 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -289,8 +289,10 @@ static void prelim_release(struct preftree *preftree) struct prelim_ref *ref, *next_ref; rbtree_postorder_for_each_entry_safe(ref, next_ref, - &preftree->root.rb_root, rbnode) + &preftree->root.rb_root, rbnode) { + free_inode_elem_list(ref->inode_list); free_pref(ref); + } preftree->root = RB_ROOT_CACHED; preftree->count = 0; @@ -1384,6 +1386,12 @@ again: if (ret < 0) goto out; ref->inode_list = eie; + /* + * We transferred the list ownership to the ref, + * so set to NULL to avoid a double free in case + * an error happens after this. + */ + eie = NULL; } ret = ulist_add_merge_ptr(refs, ref->parent, ref->inode_list, @@ -1409,6 +1417,14 @@ again: eie->next = ref->inode_list; } eie = NULL; + /* + * We have transferred the inode list ownership from + * this ref to the ref we added to the 'refs' ulist. + * So set this ref's inode list to NULL to avoid + * use-after-free when our caller uses it or double + * frees in case an error happens before we return. + */ + ref->inode_list = NULL; } cond_resched(); } From d37de92b38932d40e4a251e876cc388f9aee5f42 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:39 +0000 Subject: [PATCH 160/328] btrfs: fix ulist leaks in error paths of qgroup self tests In the test_no_shared_qgroup() and test_multiple_refs() qgroup self tests, if we fail to add the tree ref, remove the extent item or remove the extent ref, we are returning from the test function without freeing the "old_roots" ulist that was allocated by the previous calls to btrfs_find_all_roots(). Fix that by calling ulist_free() before returning. Fixes: 442244c96332 ("btrfs: qgroup: Switch self test to extent-oriented qgroup mechanism.") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tests/qgroup-tests.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index eee1e4459541..843dd3d3adbe 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -232,8 +232,10 @@ static int test_no_shared_qgroup(struct btrfs_root *root, ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, BTRFS_FS_TREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { @@ -266,8 +268,10 @@ static int test_no_shared_qgroup(struct btrfs_root *root, } ret = remove_extent_item(root, nodesize, nodesize); - if (ret) + if (ret) { + ulist_free(old_roots); return -EINVAL; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { @@ -329,8 +333,10 @@ static int test_multiple_refs(struct btrfs_root *root, ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, BTRFS_FS_TREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { @@ -362,8 +368,10 @@ static int test_multiple_refs(struct btrfs_root *root, ret = add_tree_ref(root, nodesize, nodesize, 0, BTRFS_FIRST_FREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { @@ -401,8 +409,10 @@ static int test_multiple_refs(struct btrfs_root *root, ret = remove_extent_ref(root, nodesize, nodesize, 0, BTRFS_FIRST_FREE_OBJECTID); - if (ret) + if (ret) { + ulist_free(old_roots); return ret; + } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { From d0ea17aec12ea0f7b9d2ed727d8ef8169d1e7699 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:40 +0000 Subject: [PATCH 161/328] btrfs: remove pointless and double ulist frees in error paths of qgroup tests Several places in the qgroup self tests follow the pattern of freeing the ulist pointer they passed to btrfs_find_all_roots() if the call to that function returned an error. That is pointless because that function always frees the ulist in case it returns an error. Also In some places like at test_multiple_refs(), after a call to btrfs_qgroup_account_extent() we also leave "old_roots" and "new_roots" pointing to ulists that were freed, because btrfs_qgroup_account_extent() has freed those ulists, and if after that the next call to btrfs_find_all_roots() fails, we call ulist_free() on the "old_roots" ulist again, resulting in a double free. So remove those calls to reduce the code size and avoid double ulist free in case of an error. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tests/qgroup-tests.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 843dd3d3adbe..63676ea19f29 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -225,7 +225,6 @@ static int test_no_shared_qgroup(struct btrfs_root *root, */ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -240,7 +239,6 @@ static int test_no_shared_qgroup(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -252,17 +250,18 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; } + /* btrfs_qgroup_account_extent() always frees the ulists passed to it. */ + old_roots = NULL; + new_roots = NULL; + if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, nodesize, nodesize)) { test_err("qgroup counts didn't match expected values"); return -EINVAL; } - old_roots = NULL; - new_roots = NULL; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -276,7 +275,6 @@ static int test_no_shared_qgroup(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -326,7 +324,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -341,7 +338,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -361,7 +357,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -376,7 +371,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -402,7 +396,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); if (ret) { - ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } @@ -417,7 +410,6 @@ static int test_multiple_refs(struct btrfs_root *root, ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); if (ret) { ulist_free(old_roots); - ulist_free(new_roots); test_err("couldn't find old roots: %d", ret); return ret; } From a348c8d4f6cf23ef04b0edaccdfe9d94c2d335db Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 2 Nov 2022 12:46:35 +0000 Subject: [PATCH 162/328] btrfs: fix nowait buffered write returning -ENOSPC If we are doing a buffered write in NOWAIT context and we can't reserve metadata space due to -ENOSPC, then we should return -EAGAIN so that we retry the write in a context allowed to block and do metadata reservation with flushing, which might succeed this time due to the allowed flushing. Returning -ENOSPC while in NOWAIT context simply makes some writes fail with -ENOSPC when they would likely succeed after switching from NOWAIT context to blocking context. That is unexpected behaviour and even fio complains about it with a warning like this: fio: io_u error on file /mnt/sdi/task_0.0.0: No space left on device: write offset=1535705088, buflen=65536 fio: pid=592630, err=28/file:io_u.c:1846, func=io_u error, error=No space left on device The fio's job config is this: [global] bs=64K ioengine=io_uring iodepth=1 size=2236962133 nr_files=1 filesize=2236962133 direct=0 runtime=10 fallocate=posix io_size=2236962133 group_reporting time_based [task_0] rw=randwrite directory=/mnt/sdi numjobs=4 So fix this by returning -EAGAIN if we are in NOWAIT context and the metadata reservation failed with -ENOSPC. Fixes: 304e45acdb8f ("btrfs: plumb NOWAIT through the write path") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d55ad46384d1..5ce2ae9d4f72 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1598,6 +1598,9 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, write_bytes); else btrfs_check_nocow_unlock(BTRFS_I(inode)); + + if (nowait && ret == -ENOSPC) + ret = -EAGAIN; break; } From eb81b682b131642405a05c627ab08cf0967b3dd8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 2 Nov 2022 12:46:36 +0000 Subject: [PATCH 163/328] btrfs: fix inode reserve space leak due to nowait buffered write During a nowait buffered write, if we fail to balance dirty pages we exit btrfs_buffered_write() without releasing the delalloc space reserved for an extent, resulting in leaking space from the inode's block reserve. So fix that by releasing the delalloc space for the extent when balancing dirty pages fails. Reported-by: kernel test robot Link: https://lore.kernel.org/all/202210111304.d369bc32-yujie.liu@intel.com Fixes: 965f47aeb5de ("btrfs: make btrfs_buffered_write nowait compatible") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5ce2ae9d4f72..d01631d47806 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1607,8 +1607,10 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, release_bytes = reserve_bytes; again: ret = balance_dirty_pages_ratelimited_flags(inode->i_mapping, bdp_flags); - if (ret) + if (ret) { + btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); break; + } /* * This is going to setup the pages array with the number of From 3a89b6dec9920026eaa90fe8457f4348d3388a98 Mon Sep 17 00:00:00 2001 From: "Tyler J. Stachecki" Date: Wed, 2 Nov 2022 18:56:39 +0200 Subject: [PATCH 164/328] wifi: ath11k: Fix QCN9074 firmware boot on x86 The 2.7.0 series of QCN9074's firmware requests 5 segments of memory instead of 3 (as in the 2.5.0 series). The first segment (11M) is too large to be kalloc'd in one go on x86 and requires piecemeal 1MB allocations, as was the case with the prior public firmware (2.5.0, 15M). Since f6f92968e1e5, ath11k will break the memory requests, but only if there were fewer than 3 segments requested by the firmware. It seems that 5 segments works fine and allows QCN9074 to boot on x86 with firmware 2.7.0, so change things accordingly. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1 Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.5.0.1-01208-QCAHKSWPL_SILICONZ-1 Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.16 Signed-off-by: Tyler J. Stachecki Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221022042728.43015-1-stachecki.tyler@gmail.com --- drivers/net/wireless/ath/ath11k/qmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/qmi.h b/drivers/net/wireless/ath/ath11k/qmi.h index 2ec56a34fa81..0909d53cefeb 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.h +++ b/drivers/net/wireless/ath/ath11k/qmi.h @@ -27,7 +27,7 @@ #define ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01 52 #define ATH11K_QMI_CALDB_SIZE 0x480000 #define ATH11K_QMI_BDF_EXT_STR_LENGTH 0x20 -#define ATH11K_QMI_FW_MEM_REQ_SEGMENT_CNT 3 +#define ATH11K_QMI_FW_MEM_REQ_SEGMENT_CNT 5 #define QMI_WLFW_REQUEST_MEM_IND_V01 0x0035 #define QMI_WLFW_FW_MEM_READY_IND_V01 0x0037 From f45cb6b29cd36514e13f7519770873d8c0457008 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Wed, 2 Nov 2022 13:48:03 +0200 Subject: [PATCH 165/328] wifi: ath11k: avoid deadlock during regulatory update in ath11k_regd_update() (cherry picked from commit d99884ad9e3673a12879bc2830f6e5a66cccbd78 in ath-next as users are seeing this bug more now, also cc stable) Running this test in a loop it is easy to reproduce an rtnl deadlock: iw reg set FI ifconfig wlan0 down What happens is that thread A (workqueue) tries to update the regulatory: try to acquire the rtnl_lock of ar->regd_update_work rtnl_lock+0x17/0x20 ath11k_regd_update+0x15a/0x260 [ath11k] ath11k_regd_update_work+0x15/0x20 [ath11k] process_one_work+0x228/0x670 worker_thread+0x4d/0x440 kthread+0x16d/0x1b0 ret_from_fork+0x22/0x30 And thread B (ifconfig) tries to stop the interface: try to cancel_work_sync(&ar->regd_update_work) in ath11k_mac_op_stop(). ifconfig 3109 [003] 2414.232506: probe: ath11k_mac_op_stop: (ffffffffc14187a0) drv_stop+0x30 ([mac80211]) ieee80211_do_stop+0x5d2 ([mac80211]) ieee80211_stop+0x3e ([mac80211]) __dev_close_many+0x9e ([kernel.kallsyms]) __dev_change_flags+0xbe ([kernel.kallsyms]) dev_change_flags+0x23 ([kernel.kallsyms]) devinet_ioctl+0x5e3 ([kernel.kallsyms]) inet_ioctl+0x197 ([kernel.kallsyms]) sock_do_ioctl+0x4d ([kernel.kallsyms]) sock_ioctl+0x264 ([kernel.kallsyms]) __x64_sys_ioctl+0x92 ([kernel.kallsyms]) do_syscall_64+0x3a ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe+0x63 ([kernel.kallsyms]) __GI___ioctl+0x7 (/lib/x86_64-linux-gnu/libc-2.23.so) The sequence of deadlock is: 1. Thread B calls rtnl_lock(). 2. Thread A starts to run and calls rtnl_lock() from within ath11k_regd_update_work(), then enters wait state because the lock is owned by thread B. 3. Thread B continues to run and tries to call cancel_work_sync(&ar->regd_update_work), but thread A is in ath11k_regd_update_work() waiting for rtnl_lock(). So cancel_work_sync() forever waits for ath11k_regd_update_work() to finish and we have a deadlock. Fix this by switching from using regulatory_set_wiphy_regd_sync() to regulatory_set_wiphy_regd(). Now cfg80211 will schedule another workqueue which handles the locking on it's own. So the ath11k workqueue can simply exit without taking any locks, avoiding the deadlock. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3 Cc: Signed-off-by: Wen Gong [kvalo: improve commit log] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath11k/reg.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c index 7ee3ff69dfc8..6fae4e61ede7 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -287,11 +287,7 @@ int ath11k_regd_update(struct ath11k *ar) goto err; } - rtnl_lock(); - wiphy_lock(ar->hw->wiphy); - ret = regulatory_set_wiphy_regd_sync(ar->hw->wiphy, regd_copy); - wiphy_unlock(ar->hw->wiphy); - rtnl_unlock(); + ret = regulatory_set_wiphy_regd(ar->hw->wiphy, regd_copy); kfree(regd_copy); From 145dfad998eac74abc59219d936e905766ba2d98 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:03:08 +0000 Subject: [PATCH 166/328] KVM: VMX: Advertise PMU LBRs if and only if perf supports LBRs Advertise LBR support to userspace via MSR_IA32_PERF_CAPABILITIES if and only if perf fully supports LBRs. Perf may disable LBRs (by zeroing the number of LBRs) even on platforms the allegedly support LBRs, e.g. if probing any LBR MSRs during setup fails. Fixes: be635e34c284 ("KVM: vmx/pmu: Expose LBR_FMT in the MSR_IA32_PERF_CAPABILITIES") Reported-by: Like Xu Signed-off-by: Sean Christopherson Message-Id: <20221006000314.73240-3-seanjc@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/capabilities.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 87c4e46daf37..3bd7a8970618 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -400,6 +400,7 @@ static inline bool vmx_pebs_supported(void) static inline u64 vmx_get_perf_capabilities(void) { u64 perf_cap = PMU_CAP_FW_WRITES; + struct x86_pmu_lbr lbr; u64 host_perf_cap = 0; if (!enable_pmu) @@ -408,7 +409,8 @@ static inline u64 vmx_get_perf_capabilities(void) if (boot_cpu_has(X86_FEATURE_PDCM)) rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); - perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + if (x86_perf_get_lbr(&lbr) >= 0 && lbr.nr) + perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; From 18e897d213cb152c786abab14919196bd9dc3a9f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:03:09 +0000 Subject: [PATCH 167/328] KVM: VMX: Fold vmx_supported_debugctl() into vcpu_supported_debugctl() Fold vmx_supported_debugctl() into vcpu_supported_debugctl(), its only caller. Setting bits only to clear them a few instructions later is rather silly, and splitting the logic makes things seem more complicated than they actually are. Opportunistically drop DEBUGCTLMSR_LBR_MASK now that there's a single reference to the pair of bits. The extra layer of indirection provides no meaningful value and makes it unnecessarily tedious to understand what KVM is doing. No functional change. Signed-off-by: Sean Christopherson Message-Id: <20221006000314.73240-4-seanjc@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/capabilities.h | 15 --------------- arch/x86/kvm/vmx/vmx.c | 12 +++++++----- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 3bd7a8970618..07254314f3dd 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -24,8 +24,6 @@ extern int __read_mostly pt_mode; #define PMU_CAP_FW_WRITES (1ULL << 13) #define PMU_CAP_LBR_FMT 0x3f -#define DEBUGCTLMSR_LBR_MASK (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) - struct nested_vmx_msrs { /* * We only store the "true" versions of the VMX capability MSRs. We @@ -421,19 +419,6 @@ static inline u64 vmx_get_perf_capabilities(void) return perf_cap; } -static inline u64 vmx_supported_debugctl(void) -{ - u64 debugctl = 0; - - if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) - debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; - - if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) - debugctl |= DEBUGCTLMSR_LBR_MASK; - - return debugctl; -} - static inline bool cpu_has_notify_vmexit(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 65f092e4a81b..981b38355066 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2023,13 +2023,15 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu) { - u64 debugctl = vmx_supported_debugctl(); + u64 debugctl = 0; - if (!intel_pmu_lbr_is_enabled(vcpu)) - debugctl &= ~DEBUGCTLMSR_LBR_MASK; + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && + guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; - if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) - debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + if ((vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) && + intel_pmu_lbr_is_enabled(vcpu)) + debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; return debugctl; } From b333b8ebb85d62469f32b52fa03fd7d1522afc03 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 Oct 2022 00:03:10 +0000 Subject: [PATCH 168/328] KVM: VMX: Ignore guest CPUID for host userspace writes to DEBUGCTL Ignore guest CPUID for host userspace writes to the DEBUGCTL MSR, KVM's ABI is that setting CPUID vs. state can be done in any order, i.e. KVM allows userspace to stuff MSRs prior to setting the guest's CPUID that makes the new MSR "legal". Keep the vmx_get_perf_capabilities() check for guest writes, even though it's technically unnecessary since the vCPU's PERF_CAPABILITIES is consulted when refreshing LBR support. A future patch will clean up vmx_get_perf_capabilities() to avoid the RDMSR on every call, at which point the paranoia will incur no meaningful overhead. Note, prior to vmx_get_perf_capabilities() checking that the host fully supports LBRs via x86_perf_get_lbr(), KVM effectively relied on intel_pmu_lbr_is_enabled() to guard against host userspace enabling LBRs on platforms without full support. Fixes: c646236344e9 ("KVM: vmx/pmu: Add PMU_CAP_LBR_FMT check when guest LBR is enabled") Signed-off-by: Sean Christopherson Message-Id: <20221006000314.73240-5-seanjc@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 981b38355066..63247c57c72c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2021,16 +2021,16 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, return (unsigned long)data; } -static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu) +static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated) { u64 debugctl = 0; if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && - guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))) debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; if ((vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) && - intel_pmu_lbr_is_enabled(vcpu)) + (host_initiated || intel_pmu_lbr_is_enabled(vcpu))) debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; return debugctl; @@ -2105,7 +2105,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_DEBUGCTLMSR: { - u64 invalid = data & ~vcpu_supported_debugctl(vcpu); + u64 invalid; + + invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated); if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) { if (report_ignored_msrs) vcpu_unimpl(vcpu, "%s: BTF|LBR in IA32_DEBUGCTLMSR 0x%llx, nop\n", From 8e987f1f4da92d9f1dd020418bfab9fe04b1c54c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 2 Nov 2022 21:45:59 +0800 Subject: [PATCH 169/328] Documentation: devres: add missing I2C helper Add missing devm_i2c_add_adapter() to devres.rst. It's introduced by commit 07740c92ae57 ("i2c: core: add managed function for adding i2c adapters"). Fixes: 07740c92ae57 ("i2c: core: add managed function for adding i2c adapters") Signed-off-by: Yang Yingliang Acked-by: Yicong Yang Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- Documentation/driver-api/driver-model/devres.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 687adb58048e..56082265e8e5 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -279,6 +279,7 @@ GPIO devm_gpio_request_one() I2C + devm_i2c_add_adapter() devm_i2c_new_dummy_device() IIO From 341421084d705475817f7f0d68e130370d10b20d Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 20 Oct 2022 11:46:40 -0400 Subject: [PATCH 170/328] drm/amd/display: Update DSC capabilitie for DCN314 dcn314 has 4 DSC - conflicted hardware document updated and confirmed. Tested-by: Mark Broadworth Reviewed-by: Charlene Liu Acked-by: Rodrigo Siqueira Signed-off-by: Leo Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index d0ad72caead2..9066c511a052 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -847,7 +847,7 @@ static const struct resource_caps res_cap_dcn314 = { .num_ddc = 5, .num_vmid = 16, .num_mpc_3dlut = 2, - .num_dsc = 3, + .num_dsc = 4, }; static const struct dc_plane_cap plane_cap = { From 14aed119942f6c2f1286022323139f7404db5d2b Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Thu, 20 Oct 2022 11:46:41 -0400 Subject: [PATCH 171/328] drm/amd/display: Ignore Cable ID Feature Ignore cable ID for DP2 receivers that does not support the feature. Tested-by: Mark Broadworth Reviewed-by: Roman Li Acked-by: Rodrigo Siqueira Signed-off-by: Fangzhi Zuo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c053cb79cd06..589bee9acf16 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1549,6 +1549,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm; + /* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */ + adev->dm.dc->debug.ignore_cable_id = true; + r = dm_dmub_hw_init(adev); if (r) { DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); From e59843c4cdd68a369591630088171eeacce9859f Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Thu, 20 Oct 2022 11:46:44 -0400 Subject: [PATCH 172/328] drm/amd/display: Limit dcn32 to 1950Mhz display clock [why] Hardware team recommends we limit dispclock to 1950Mhz for all DCN3.2.x [how] Limit to 1950 when initializing clocks. Tested-by: Mark Broadworth Reviewed-by: Alvin Lee Acked-by: Rodrigo Siqueira Signed-off-by: Jun Lei Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- .../gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 1c612ccf1944..fd0313468fdb 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -157,6 +157,7 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); unsigned int num_levels; struct clk_limit_num_entries *num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk; + unsigned int i; memset(&(clk_mgr_base->clks), 0, sizeof(struct dc_clocks)); clk_mgr_base->clks.p_state_change_support = true; @@ -205,18 +206,17 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) clk_mgr->dpm_present = true; if (clk_mgr_base->ctx->dc->debug.min_disp_clk_khz) { - unsigned int i; - for (i = 0; i < num_levels; i++) if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz)) clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz = khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz); } + for (i = 0; i < num_levels; i++) + if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz > 1950) + clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz = 1950; if (clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz) { - unsigned int i; - for (i = 0; i < num_levels; i++) if (clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz < khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz)) From c580d758ba1b79de9ea7a475d95a6278736ae462 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 20 Oct 2022 11:46:47 -0400 Subject: [PATCH 173/328] drm/amd/display: Update latencies on DCN321 Update DF related latencies based on new measurements. Tested-by: Mark Broadworth Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Signed-off-by: Dillon Varone Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index dd90f241e906..7352f7514410 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -119,15 +119,15 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { }, }, .num_states = 1, - .sr_exit_time_us = 12.36, - .sr_enter_plus_exit_time_us = 16.72, + .sr_exit_time_us = 19.95, + .sr_enter_plus_exit_time_us = 24.36, .sr_exit_z8_time_us = 285.0, .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, + .urgent_latency_pixel_data_only_us = 9.35, + .urgent_latency_pixel_mixed_with_vm_data_us = 9.35, + .urgent_latency_vm_data_only_us = 9.35, .fclk_change_latency_us = 20, .usr_retraining_latency_us = 2, .smn_latency_us = 2, From 6cb5cec16c380be4cf9776a8c23b72e9fe742fd1 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 20 Oct 2022 11:46:48 -0400 Subject: [PATCH 174/328] drm/amd/display: Set memclk levels to be at least 1 for dcn32 [Why] Cannot report 0 memclk levels even when SMU does not provide any. [How] When memclk levels reported by SMU is 0, set levels to 1. Tested-by: Mark Broadworth Reviewed-by: Martin Leung Acked-by: Rodrigo Siqueira Signed-off-by: Dillon Varone Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index fd0313468fdb..6f77d8e538ab 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -669,6 +669,9 @@ static void dcn32_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) &clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz, &num_entries_per_clk->num_memclk_levels); + /* memclk must have at least one level */ + num_entries_per_clk->num_memclk_levels = num_entries_per_clk->num_memclk_levels ? num_entries_per_clk->num_memclk_levels : 1; + dcn32_init_single_clock(clk_mgr, PPCLK_FCLK, &clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz, &num_entries_per_clk->num_fclk_levels); From c3d3f35b725bf9c93bec6d3c056f6bb7cfd27403 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 20 Oct 2022 11:46:51 -0400 Subject: [PATCH 175/328] drm/amd/display: Enable timing sync on DCN32 Missed enabling timing sync on DCN32 because DCN32 has a different DML param. Tested-by: Mark Broadworth Reviewed-by: Martin Leung Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index d680f1c5b69f..45db40c41882 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -1228,6 +1228,7 @@ int dcn20_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.dcc = false; pipes[pipe_cnt].pipe.src.dcc_rate = 1; pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank; + pipes[pipe_cnt].pipe.dest.synchronize_timings = synchronized_vblank; pipes[pipe_cnt].pipe.dest.hblank_start = timing->h_total - timing->h_front_porch; pipes[pipe_cnt].pipe.dest.hblank_end = pipes[pipe_cnt].pipe.dest.hblank_start - timing->h_addressable From 9cb0dc6ccb7df9abe1407574ed4ad84895822d11 Mon Sep 17 00:00:00 2001 From: Max Tseng Date: Thu, 20 Oct 2022 11:46:52 -0400 Subject: [PATCH 176/328] drm/amd/display: cursor update command incomplete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Missing send cursor_rect width & Height into DMUB. PSR-SU would use these information. But missing these assignment in last refactor commit Reported-by: Timur Kristóf Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2227 Fixes: b73353f7f3d4 ("drm/amd/display: Use the same cursor info across features") Tested-by: Mark Broadworth Reviewed-by: Anthony Koo Acked-by: Rodrigo Siqueira Signed-off-by: Max Tseng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 4996d2810edb..938dba5249d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -623,6 +623,10 @@ void hubp2_cursor_set_attributes( hubp->att.size.bits.width = attr->width; hubp->att.size.bits.height = attr->height; hubp->att.cur_ctl.bits.mode = attr->color_format; + + hubp->cur_rect.w = attr->width; + hubp->cur_rect.h = attr->height; + hubp->att.cur_ctl.bits.pitch = hw_pitch; hubp->att.cur_ctl.bits.line_per_chunk = lpc; hubp->att.cur_ctl.bits.cur_2x_magnify = attr->attribute_flags.bits.ENABLE_MAGNIFICATION; From 89b3554782e6b65894f0551e9e0a82ad02dac94d Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Mon, 24 Oct 2022 12:47:47 +0800 Subject: [PATCH 177/328] drm/amdgpu: set fb_modifiers_not_supported in vkms This patch to fix the gdm3 start failure with virual display: /usr/libexec/gdm-x-session[1711]: (II) AMDGPU(0): Setting screen physical size to 270 x 203 /usr/libexec/gdm-x-session[1711]: (EE) AMDGPU(0): Failed to make import prime FD as pixmap: 22 /usr/libexec/gdm-x-session[1711]: (EE) AMDGPU(0): failed to set mode: Invalid argument /usr/libexec/gdm-x-session[1711]: (WW) AMDGPU(0): Failed to set mode on CRTC 0 /usr/libexec/gdm-x-session[1711]: (EE) AMDGPU(0): Failed to enable any CRTC gnome-shell[1840]: Running GNOME Shell (using mutter 42.2) as a X11 window and compositing manager /usr/libexec/gdm-x-session[1711]: (EE) AMDGPU(0): failed to set mode: Invalid argument vkms doesn't have modifiers support, set fb_modifiers_not_supported to bring the gdm back. Signed-off-by: Yifan Zhang Acked-by: Guchun Chen Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index f4b5301ea2a0..500a1dc4fe02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -500,6 +500,8 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; + adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; + r = amdgpu_display_modeset_create_props(adev); if (r) return r; From e542ca6e3e554bad53b2ea5741873b67f4585ea9 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Tue, 25 Oct 2022 14:42:13 -0400 Subject: [PATCH 178/328] drm/amdgpu: correct MES debugfs versions Use mes.sched_version, mes.kiq_version for debugfs as mes.ucode_fw_version does not contain correct versioning information. Signed-off-by: Graham Sider Reviewed-by: Jack Xiao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bf1ff8f0e712..4e42dcb1950f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -337,12 +337,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = adev->psp.cap_feature_version; break; case AMDGPU_INFO_FW_MES_KIQ: - fw_info->ver = adev->mes.ucode_fw_version[0]; - fw_info->feature = 0; + fw_info->ver = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK; + fw_info->feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) + >> AMDGPU_MES_FEAT_VERSION_SHIFT; break; case AMDGPU_INFO_FW_MES: - fw_info->ver = adev->mes.ucode_fw_version[1]; - fw_info->feature = 0; + fw_info->ver = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + fw_info->feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) + >> AMDGPU_MES_FEAT_VERSION_SHIFT; break; case AMDGPU_INFO_FW_IMU: fw_info->ver = adev->gfx.imu_fw_version; From 5b994354af3cab770bf13386469c5725713679af Mon Sep 17 00:00:00 2001 From: Yang Li Date: Wed, 26 Oct 2022 10:00:54 +0800 Subject: [PATCH 179/328] drm/amdkfd: Fix NULL pointer dereference in svm_migrate_to_ram() ./drivers/gpu/drm/amd/amdkfd/kfd_migrate.c:985:58-62: ERROR: p is NULL but dereferenced. Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2549 Reported-by: Abaci Robot Signed-off-by: Yang Li Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 2797029bd500..22b077ac9a19 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -973,12 +973,10 @@ out_unlock_prange: out_unlock_svms: mutex_unlock(&p->svms.lock); out_unref_process: + pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); kfd_unref_process(p); out_mmput: mmput(mm); - - pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); - return r ? VM_FAULT_SIGBUS : 0; } From 8d4de331f1b24a22d18e3c6116aa25228cf54854 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 26 Oct 2022 14:03:55 -0500 Subject: [PATCH 180/328] drm/amd: Fail the suspend if resources can't be evicted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a system does not have swap and memory is under 100% usage, amdgpu will fail to evict resources. Currently the suspend carries on proceeding to reset the GPU: ``` [drm] evicting device resources failed [drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block failed -12 [drm] free PSP TMR buffer [TTM] Failed allocating page table [drm] evicting device resources failed amdgpu 0000:03:00.0: amdgpu: MODE1 reset amdgpu 0000:03:00.0: amdgpu: GPU mode1 reset amdgpu 0000:03:00.0: amdgpu: GPU smu mode1 reset ``` At this point if the suspend actually succeeded I think that amdgpu would have recovered because the GPU would have power cut off and restored. However the kernel fails to continue the suspend from the memory pressure and amdgpu fails to run the "resume" from the aborted suspend. ``` ACPI: PM: Preparing to enter system sleep state S3 SLUB: Unable to allocate memory on node -1, gfp=0xdc0(GFP_KERNEL|__GFP_ZERO) cache: Acpi-State, object size: 80, buffer size: 80, default order: 0, min order: 0 node 0: slabs: 22, objs: 1122, free: 0 ACPI Error: AE_NO_MEMORY, Could not update object reference count (20210730/utdelete-651) [drm:psp_hw_start [amdgpu]] *ERROR* PSP load kdb failed! [drm:psp_resume [amdgpu]] *ERROR* PSP resume failed [drm:amdgpu_device_fw_loading [amdgpu]] *ERROR* resume of IP block failed -62 amdgpu 0000:03:00.0: amdgpu: amdgpu_device_ip_resume failed (-62). PM: dpm_run_callback(): pci_pm_resume+0x0/0x100 returns -62 amdgpu 0000:03:00.0: PM: failed to resume async: error -62 ``` To avoid this series of unfortunate events, fail amdgpu's suspend when the memory eviction fails. This will let the system gracefully recover and the user can try suspend again when the memory pressure is relieved. Reported-by: post@davidak.de Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2223 Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ddaecb2610c9..64510898eedd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4060,15 +4060,18 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) * at suspend time. * */ -static void amdgpu_device_evict_resources(struct amdgpu_device *adev) +static int amdgpu_device_evict_resources(struct amdgpu_device *adev) { + int ret; + /* No need to evict vram on APUs for suspend to ram or s2idle */ if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) - return; + return 0; - if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM)) + ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); + if (ret) DRM_WARN("evicting device resources failed\n"); - + return ret; } /* @@ -4118,7 +4121,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); - amdgpu_device_evict_resources(adev); + r = amdgpu_device_evict_resources(adev); + if (r) + return r; amdgpu_fence_driver_hw_fini(adev); From a3e5ce56f3d260f2ec8e5242c33f57e60ae9eba7 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Wed, 26 Oct 2022 15:08:24 -0400 Subject: [PATCH 181/328] drm/amdgpu: disable GFXOFF during compute for GFX11 Temporary workaround to fix issues observed in some compute applications when GFXOFF is enabled on GFX11. Signed-off-by: Graham Sider Acked-by: Alex Deucher Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0561812aa0a4..5d9a34601a1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -706,6 +706,13 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { + /* Temporary workaround to fix issues observed in some + * compute applications when GFXOFF is enabled on GFX11. + */ + if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) { + pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); + amdgpu_gfx_off_ctrl(adev, idle); + } amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle); From 8fe8ce896c1cc29d6bfebb3c7b3cc948f72cd32c Mon Sep 17 00:00:00 2001 From: Gavin Wan Date: Wed, 26 Oct 2022 13:45:25 -0400 Subject: [PATCH 182/328] drm/amdgpu: Disable GPU reset on SRIOV before remove pci. The recent change brought a bug on SRIOV envrionment. It caused unloading amdgpu failed on Guest VM. The reason is that the VF FLR was requested while unloading amdgpu driver, but the VF FLR of SRIOV sequence is wrong while removing PCI device. For SRIOV, the guest driver should not trigger the whole XGMI hive to do the reset. Host driver control how the device been reset. Fixes: f5c7e7797060 ("drm/amdgpu: Adjust removal control flow for smu v13_0_2") Acked-by: Alex Deucher Reviewed-by: Shaoyun Liu Signed-off-by: Gavin Wan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 3c9fecdd6b2f..bf2d50c8c92a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2201,7 +2201,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) pm_runtime_forbid(dev->dev); } - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)) { + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && + !amdgpu_sriov_vf(adev)) { bool need_to_reset_gpu = false; if (adev->gmc.xgmi.num_physical_nodes > 1) { From bad610c97c08eef3ed1fa769a8b08b94f95b451e Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 7 Oct 2022 12:43:26 -0400 Subject: [PATCH 183/328] drm/amd/display: Fix DCN32 DSC delay calculation [Why] DCN32 DSC delay calculation had an unintentional integer division, resulting in a mismatch against the DML spreadsheet. [How] Cast numerator to double before performing the division. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: George Shen Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index ad66e241f9ae..4a3e7a5d2758 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1746,7 +1746,7 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, } DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * - dml_ceil(DSCDelayRequirement_val / HActive, 1); + dml_ceil((double)DSCDelayRequirement_val / HActive, 1); DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; From ab007e5db5d3b8b8975c7eec69992ff38fe2a46c Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 14 Oct 2022 17:36:32 -0400 Subject: [PATCH 184/328] drm/amd/display: Use forced DSC bpp in DML [Why] DSC config is calculated separately from DML calculations. DML should use these separately calculated DSC params. The issue is that the calculated bpp is not properly propagated into DML. [How] Correctly used forced_bpp value in DML. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: George Shen Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c | 2 +- drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 5b91660a6496..60351b2891d0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1627,7 +1627,7 @@ static void mode_support_configuration(struct vba_vars_st *v, && !mode_lib->vba.MSOOrODMSplitWithNonDPLink && !mode_lib->vba.NotEnoughLanesForMSO && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 - && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP + //&& !mode_lib->vba.DSCOnlyIfNecessaryWithBPP && !mode_lib->vba.DSC422NativeNotSupported && !mode_lib->vba.MPCCombineMethodIncompatible && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 03924aed8d5c..8e6585dab20e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -625,7 +625,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.skip_dio_check[mode_lib->vba.NumberOfActivePlanes] = dout->is_virtual; - if (!dout->dsc_enable) + if (dout->dsc_enable) mode_lib->vba.ForcedOutputLinkBPP[mode_lib->vba.NumberOfActivePlanes] = dout->output_bpp; else mode_lib->vba.ForcedOutputLinkBPP[mode_lib->vba.NumberOfActivePlanes] = 0.0; From 8dc323133d74518e3b5b07242e2b2f088799ea6e Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 14 Oct 2022 17:40:08 -0400 Subject: [PATCH 185/328] drm/amd/display: Round up DST_after_scaler to nearest int [Why] The DST_after_scaler value that DML spreadsheet outputs is generally the driver value round up to the nearest int. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: George Shen Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c index a1276f6b9581..395ae8761980 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c @@ -291,8 +291,8 @@ void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, dlg_regs->vready_after_vcount0); - dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); - dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); + dst_x_after_scaler = dml_ceil(get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1); + dst_y_after_scaler = dml_ceil(get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1); // do some adjustment on the dst_after scaler to account for odm combine mode dml_print("DML_DLG: %s: input dst_x_after_scaler = %d\n", __func__, dst_x_after_scaler); From d5e0fb0d9dea545defb963ec1073bd9a1a8b5395 Mon Sep 17 00:00:00 2001 From: George Shen Date: Fri, 14 Oct 2022 17:46:03 -0400 Subject: [PATCH 186/328] drm/amd/display: Add DSC delay factor workaround [Why] Certain 4K high refresh rate modes requiring DSC are exhibiting top of screen underflow corruption. Increasing the DSC delay by a factor of 6 percent stops the underflow for most use cases. [How] Multiply DSC delay requirement in DML by a factor. Add debug option to make this DSC delay factor configurable. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: George Shen Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 4 +++- .../gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c | 5 +++-- .../drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 5 +++-- .../drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h | 3 ++- drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 5 ++++- drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 3 +++ 7 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index bfc5474c0f4c..737b221ca689 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -852,6 +852,7 @@ struct dc_debug_options { bool enable_double_buffered_dsc_pg_support; bool enable_dp_dig_pixel_rate_div_policy; enum lttpr_mode lttpr_mode_override; + unsigned int dsc_delay_factor_wa_x1000; }; struct gpu_info_soc_bounding_box_v1_0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 819de0f11012..f37c9a6b3b7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2359,9 +2359,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - } + /* DML DSC delay factor workaround */ + dcn3_2_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0; + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 60351b2891d0..47ff0a8bd105 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -367,7 +367,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman mode_lib->vba.OutputBpp[k], mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k], mode_lib->vba.PixelClock[k], - mode_lib->vba.PixelClockBackEnd[k]); + mode_lib->vba.PixelClockBackEnd[k], mode_lib->vba.ip.dsc_delay_factor_wa); } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) @@ -2475,7 +2475,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.OutputBppPerState[i][k], mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k], - mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k]); + mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k], + mode_lib->vba.ip.dsc_delay_factor_wa); } for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 4a3e7a5d2758..968924c491c1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1726,7 +1726,8 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, enum output_format_class OutputFormat, enum output_encoder_class Output, double PixelClock, - double PixelClockBackEnd) + double PixelClockBackEnd, + double dsc_delay_factor_wa) { unsigned int DSCDelayRequirement_val; @@ -1764,7 +1765,7 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); #endif - return DSCDelayRequirement_val; + return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1); } void dml32_CalculateSurfaceSizeInMall( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 55cead0d4237..2c3827546ac7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -327,7 +327,8 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, enum output_format_class OutputFormat, enum output_encoder_class Output, double PixelClock, - double PixelClockBackEnd); + double PixelClockBackEnd, + double dsc_delay_factor_wa); void dml32_CalculateSurfaceSizeInMall( unsigned int NumberOfActiveSurfaces, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 7352f7514410..ec0486efab14 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -29,6 +29,7 @@ #include "dcn321_fpu.h" #include "dcn32/dcn32_resource.h" #include "dcn321/dcn321_resource.h" +#include "dml/dcn32/display_mode_vba_util_32.h" #define DCN3_2_DEFAULT_DET_SIZE 256 @@ -538,9 +539,11 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - } + /* DML DSC delay factor workaround */ + dcn3_21_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0; + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index f33a8879b05a..d7be01ac0751 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -364,6 +364,9 @@ struct _vcs_dpi_ip_params_st { unsigned int max_num_dp2p0_outputs; unsigned int max_num_dp2p0_streams; unsigned int VBlankNomDefaultUS; + + /* DM workarounds */ + double dsc_delay_factor_wa; // TODO: Remove after implementing root cause fix }; struct _vcs_dpi_display_xfc_params_st { From 7461016c5706eb8c477752bf69e5c9f5a38f502b Mon Sep 17 00:00:00 2001 From: Nevenko Stupar Date: Fri, 6 May 2022 16:32:38 -0400 Subject: [PATCH 187/328] drm/amd/display: Investigate tool reported FCLK P-state deviations [Why] Fix for some of the tool reported modes for FCLK P-state deviations and UCLK P-state deviations that are coming from DSC terms and/or Scaling terms causing MinActiveFCLKChangeLatencySupported and MaxActiveDRAMClockChangeLatencySupported incorrectly calculated in DML for these configurations. Reviewed-by: Chaitanya Dhere Acked-by: Jasdeep Dhillon Acked-by: Alex Hung Signed-off-by: Nevenko Stupar Tested-by: Mark Broadworth Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 47ff0a8bd105..3d184679f129 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -364,7 +364,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { v->DSCDelay[k] = dml32_DSCDelayRequirement(mode_lib->vba.DSCEnabled[k], mode_lib->vba.ODMCombineEnabled[k], mode_lib->vba.DSCInputBitPerComponent[k], - mode_lib->vba.OutputBpp[k], mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k], + mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k], + mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k], mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k], mode_lib->vba.ip.dsc_delay_factor_wa); From 6640f8e5adb69a0550fe1d224d3ac64c10f00eef Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 13 Oct 2022 21:41:13 -0500 Subject: [PATCH 188/328] drm/amdkfd: update GFX11 CWSR trap handler With corresponding FW change fixes issue where triggering CWSR on a workgroup with waves in s_barrier wouldn't lead to a back-off and therefore cause a hang. Signed-off-by: Jay Cornwall Tested-by: Graham Sider Acked-by: Harish Kasiviswanathan Acked-by: Felix Kuehling Reviewed-by: Graham Sider Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 792 +++++++++--------- .../amd/amdkfd/cwsr_trap_handler_gfx10.asm | 6 + 2 files changed, 403 insertions(+), 395 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index c7118843db05..0c4c5499bb5c 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -2495,442 +2495,444 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx11_hex[] = { - 0xbfa00001, 0xbfa0021e, + 0xbfa00001, 0xbfa00221, 0xb0804006, 0xb8f8f802, 0x9178ff78, 0x00020006, - 0xb8fbf803, 0xbf0d9f6d, - 0xbfa20006, 0x8b6eff78, - 0x00002000, 0xbfa10009, - 0x8b6eff6d, 0x00ff0000, - 0xbfa2001e, 0x8b6eff7b, - 0x00000400, 0xbfa20041, - 0xbf830010, 0xb8fbf803, - 0xbfa0fffa, 0x8b6eff7b, - 0x00000900, 0xbfa20015, - 0x8b6eff7b, 0x000071ff, - 0xbfa10008, 0x8b6fff7b, - 0x00007080, 0xbfa10001, - 0xbeee1287, 0xb8eff801, - 0x846e8c6e, 0x8b6e6f6e, - 0xbfa2000a, 0x8b6eff6d, - 0x00ff0000, 0xbfa20007, - 0xb8eef801, 0x8b6eff6e, - 0x00000800, 0xbfa20003, + 0xb8fbf803, 0xbf0d9e6d, + 0xbfa10001, 0xbfbd0000, + 0xbf0d9f6d, 0xbfa20006, + 0x8b6eff78, 0x00002000, + 0xbfa10009, 0x8b6eff6d, + 0x00ff0000, 0xbfa2001e, 0x8b6eff7b, 0x00000400, - 0xbfa20026, 0xbefa4d82, - 0xbf89fc07, 0x84fa887a, - 0xf4005bbd, 0xf8000010, - 0xbf89fc07, 0x846e976e, - 0x9177ff77, 0x00800000, - 0x8c776e77, 0xf4045bbd, - 0xf8000000, 0xbf89fc07, - 0xf4045ebd, 0xf8000008, - 0xbf89fc07, 0x8bee6e6e, - 0xbfa10001, 0xbe80486e, - 0x8b6eff6d, 0x01ff0000, - 0xbfa20005, 0x8c78ff78, - 0x00002000, 0x80ec886c, - 0x82ed806d, 0xbfa00005, - 0x8b6eff6d, 0x01000000, - 0xbfa20002, 0x806c846c, - 0x826d806d, 0x8b6dff6d, - 0x0000ffff, 0x8bfe7e7e, - 0x8bea6a6a, 0xb978f802, - 0xbe804a6c, 0x8b6dff6d, - 0x0000ffff, 0xbefa0080, - 0xb97a0283, 0xbeee007e, - 0xbeef007f, 0xbefe0180, - 0xbefe4d84, 0xbf89fc07, - 0x8b7aff7f, 0x04000000, - 0x847a857a, 0x8c6d7a6d, - 0xbefa007e, 0x8b7bff7f, - 0x0000ffff, 0xbefe00c1, - 0xbeff00c1, 0xdca6c000, - 0x007a0000, 0x7e000280, - 0xbefe007a, 0xbeff007b, - 0xb8fb02dc, 0x847b997b, - 0xb8fa3b05, 0x807a817a, - 0xbf0d997b, 0xbfa20002, - 0x847a897a, 0xbfa00001, - 0x847a8a7a, 0xb8fb1e06, - 0x847b8a7b, 0x807a7b7a, + 0xbfa20041, 0xbf830010, + 0xb8fbf803, 0xbfa0fffa, + 0x8b6eff7b, 0x00000900, + 0xbfa20015, 0x8b6eff7b, + 0x000071ff, 0xbfa10008, + 0x8b6fff7b, 0x00007080, + 0xbfa10001, 0xbeee1287, + 0xb8eff801, 0x846e8c6e, + 0x8b6e6f6e, 0xbfa2000a, + 0x8b6eff6d, 0x00ff0000, + 0xbfa20007, 0xb8eef801, + 0x8b6eff6e, 0x00000800, + 0xbfa20003, 0x8b6eff7b, + 0x00000400, 0xbfa20026, + 0xbefa4d82, 0xbf89fc07, + 0x84fa887a, 0xf4005bbd, + 0xf8000010, 0xbf89fc07, + 0x846e976e, 0x9177ff77, + 0x00800000, 0x8c776e77, + 0xf4045bbd, 0xf8000000, + 0xbf89fc07, 0xf4045ebd, + 0xf8000008, 0xbf89fc07, + 0x8bee6e6e, 0xbfa10001, + 0xbe80486e, 0x8b6eff6d, + 0x01ff0000, 0xbfa20005, + 0x8c78ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbfa00005, 0x8b6eff6d, + 0x01000000, 0xbfa20002, + 0x806c846c, 0x826d806d, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb978f802, 0xbe804a6c, + 0x8b6dff6d, 0x0000ffff, + 0xbefa0080, 0xb97a0283, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbefe4d84, + 0xbf89fc07, 0x8b7aff7f, + 0x04000000, 0x847a857a, + 0x8c6d7a6d, 0xbefa007e, 0x8b7bff7f, 0x0000ffff, - 0x807aff7a, 0x00000200, - 0x807a7e7a, 0x827b807b, - 0xd7610000, 0x00010870, - 0xd7610000, 0x00010a71, - 0xd7610000, 0x00010c72, - 0xd7610000, 0x00010e73, - 0xd7610000, 0x00011074, - 0xd7610000, 0x00011275, - 0xd7610000, 0x00011476, - 0xd7610000, 0x00011677, - 0xd7610000, 0x00011a79, - 0xd7610000, 0x00011c7e, - 0xd7610000, 0x00011e7f, - 0xbefe00ff, 0x00003fff, - 0xbeff0080, 0xdca6c040, - 0x007a0000, 0xd760007a, - 0x00011d00, 0xd760007b, - 0x00011f00, 0xbefe007a, - 0xbeff007b, 0xbef4007e, - 0x8b75ff7f, 0x0000ffff, - 0x8c75ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x10807fac, 0xbef1007d, - 0xbef00080, 0xb8f302dc, - 0x84739973, 0xbefe00c1, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00002, - 0xbeff00c1, 0xbfa00009, + 0xbefe00c1, 0xbeff00c1, + 0xdca6c000, 0x007a0000, + 0x7e000280, 0xbefe007a, + 0xbeff007b, 0xb8fb02dc, + 0x847b997b, 0xb8fa3b05, + 0x807a817a, 0xbf0d997b, + 0xbfa20002, 0x847a897a, + 0xbfa00001, 0x847a8a7a, + 0xb8fb1e06, 0x847b8a7b, + 0x807a7b7a, 0x8b7bff7f, + 0x0000ffff, 0x807aff7a, + 0x00000200, 0x807a7e7a, + 0x827b807b, 0xd7610000, + 0x00010870, 0xd7610000, + 0x00010a71, 0xd7610000, + 0x00010c72, 0xd7610000, + 0x00010e73, 0xd7610000, + 0x00011074, 0xd7610000, + 0x00011275, 0xd7610000, + 0x00011476, 0xd7610000, + 0x00011677, 0xd7610000, + 0x00011a79, 0xd7610000, + 0x00011c7e, 0xd7610000, + 0x00011e7f, 0xbefe00ff, + 0x00003fff, 0xbeff0080, + 0xdca6c040, 0x007a0000, + 0xd760007a, 0x00011d00, + 0xd760007b, 0x00011f00, + 0xbefe007a, 0xbeff007b, + 0xbef4007e, 0x8b75ff7f, + 0x0000ffff, 0x8c75ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x10807fac, + 0xbef1007d, 0xbef00080, + 0xb8f302dc, 0x84739973, + 0xbefe00c1, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00002, 0xbeff00c1, + 0xbfa00009, 0xbef600ff, + 0x01000000, 0xe0685080, + 0x701d0100, 0xe0685100, + 0x701d0200, 0xe0685180, + 0x701d0300, 0xbfa00008, 0xbef600ff, 0x01000000, - 0xe0685080, 0x701d0100, - 0xe0685100, 0x701d0200, - 0xe0685180, 0x701d0300, - 0xbfa00008, 0xbef600ff, - 0x01000000, 0xe0685100, - 0x701d0100, 0xe0685200, - 0x701d0200, 0xe0685300, - 0x701d0300, 0xb8f03b05, - 0x80708170, 0xbf0d9973, - 0xbfa20002, 0x84708970, - 0xbfa00001, 0x84708a70, - 0xb8fa1e06, 0x847a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0xbef600ff, - 0x01000000, 0x7e000280, - 0x7e020280, 0x7e040280, - 0xbefd0080, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xd7610002, 0x0000fa6c, - 0x807d817d, 0x917aff6d, - 0x80000000, 0xd7610002, - 0x0000fa7a, 0x807d817d, - 0xd7610002, 0x0000fa6e, - 0x807d817d, 0xd7610002, - 0x0000fa6f, 0x807d817d, - 0xd7610002, 0x0000fa78, - 0x807d817d, 0xb8faf803, - 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xd7610002, - 0x0000fa7b, 0x807d817d, - 0xb8f1f801, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f814, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f815, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xbefe00ff, 0x0000ffff, - 0xbeff0080, 0xe0685000, - 0x701d0200, 0xbefe00c1, + 0xe0685100, 0x701d0100, + 0xe0685200, 0x701d0200, + 0xe0685300, 0x701d0300, 0xb8f03b05, 0x80708170, 0xbf0d9973, 0xbfa20002, 0x84708970, 0xbfa00001, 0x84708a70, 0xb8fa1e06, 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, 0xbef600ff, 0x01000000, - 0xbef90080, 0xbefd0080, - 0xbf800000, 0xbe804100, - 0xbe824102, 0xbe844104, - 0xbe864106, 0xbe884108, - 0xbe8a410a, 0xbe8c410c, - 0xbe8e410e, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, - 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, - 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, - 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, - 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, - 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, - 0x80798179, 0xd7610002, - 0x0000f20c, 0x80798179, - 0xd7610002, 0x0000f20d, - 0x80798179, 0xd7610002, - 0x0000f20e, 0x80798179, - 0xd7610002, 0x0000f20f, - 0x80798179, 0xbf06a079, - 0xbfa10006, 0xe0685000, - 0x701d0200, 0x8070ff70, - 0x00000080, 0xbef90080, - 0x7e040280, 0x807d907d, - 0xbf0aff7d, 0x00000060, - 0xbfa2ffbc, 0xbe804100, - 0xbe824102, 0xbe844104, - 0xbe864106, 0xbe884108, - 0xbe8a410a, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, - 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, - 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, - 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, - 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, - 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, - 0x80798179, 0xe0685000, - 0x701d0200, 0xbefe00c1, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8fb4306, - 0x8b7bc17b, 0xbfa10044, - 0xbfbd0000, 0x8b7aff6d, - 0x80000000, 0xbfa10040, - 0x847b867b, 0x847b827b, - 0xbef6007b, 0xb8f03b05, + 0x7e000280, 0x7e020280, + 0x7e040280, 0xbefd0080, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xd7610002, + 0x0000fa6c, 0x807d817d, + 0x917aff6d, 0x80000000, + 0xd7610002, 0x0000fa7a, + 0x807d817d, 0xd7610002, + 0x0000fa6e, 0x807d817d, + 0xd7610002, 0x0000fa6f, + 0x807d817d, 0xd7610002, + 0x0000fa78, 0x807d817d, + 0xb8faf803, 0xd7610002, + 0x0000fa7a, 0x807d817d, + 0xd7610002, 0x0000fa7b, + 0x807d817d, 0xb8f1f801, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8f1f814, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xb8f1f815, + 0xd7610002, 0x0000fa71, + 0x807d817d, 0xbefe00ff, + 0x0000ffff, 0xbeff0080, + 0xe0685000, 0x701d0200, + 0xbefe00c1, 0xb8f03b05, 0x80708170, 0xbf0d9973, 0xbfa20002, 0x84708970, 0xbfa00001, 0x84708a70, 0xb8fa1e06, 0x847a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xd71f0000, - 0x000100c1, 0xd7200000, - 0x000200c1, 0x16000084, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbefd0080, - 0xbfa20012, 0xbe8300ff, - 0x00000080, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf890000, 0xe0685000, - 0x701d0100, 0x807d037d, - 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000080, - 0xbf0a7b7d, 0xbfa2fff4, - 0xbfa00011, 0xbe8300ff, - 0x00000100, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf890000, 0xe0685000, - 0x701d0100, 0x807d037d, - 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000100, - 0xbf0a7b7d, 0xbfa2fff4, + 0x80707a70, 0xbef600ff, + 0x01000000, 0xbef90080, + 0xbefd0080, 0xbf800000, + 0xbe804100, 0xbe824102, + 0xbe844104, 0xbe864106, + 0xbe884108, 0xbe8a410a, + 0xbe8c410c, 0xbe8e410e, + 0xd7610002, 0x0000f200, + 0x80798179, 0xd7610002, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, + 0x80798179, 0xd7610002, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, + 0x80798179, 0xd7610002, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, + 0x80798179, 0xd7610002, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, + 0x80798179, 0xd7610002, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, + 0x80798179, 0xd7610002, + 0x0000f20b, 0x80798179, + 0xd7610002, 0x0000f20c, + 0x80798179, 0xd7610002, + 0x0000f20d, 0x80798179, + 0xd7610002, 0x0000f20e, + 0x80798179, 0xd7610002, + 0x0000f20f, 0x80798179, + 0xbf06a079, 0xbfa10006, + 0xe0685000, 0x701d0200, + 0x8070ff70, 0x00000080, + 0xbef90080, 0x7e040280, + 0x807d907d, 0xbf0aff7d, + 0x00000060, 0xbfa2ffbc, + 0xbe804100, 0xbe824102, + 0xbe844104, 0xbe864106, + 0xbe884108, 0xbe8a410a, + 0xd7610002, 0x0000f200, + 0x80798179, 0xd7610002, + 0x0000f201, 0x80798179, + 0xd7610002, 0x0000f202, + 0x80798179, 0xd7610002, + 0x0000f203, 0x80798179, + 0xd7610002, 0x0000f204, + 0x80798179, 0xd7610002, + 0x0000f205, 0x80798179, + 0xd7610002, 0x0000f206, + 0x80798179, 0xd7610002, + 0x0000f207, 0x80798179, + 0xd7610002, 0x0000f208, + 0x80798179, 0xd7610002, + 0x0000f209, 0x80798179, + 0xd7610002, 0x0000f20a, + 0x80798179, 0xd7610002, + 0x0000f20b, 0x80798179, + 0xe0685000, 0x701d0200, 0xbefe00c1, 0x857d9973, 0x8b7d817d, 0xbf06817d, - 0xbfa20004, 0xbef000ff, - 0x00000200, 0xbeff0080, - 0xbfa00003, 0xbef000ff, - 0x00000400, 0xbeff00c1, - 0xb8fb3b05, 0x807b817b, - 0x847b827b, 0x857d9973, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8fb4306, 0x8b7bc17b, + 0xbfa10044, 0xbfbd0000, + 0x8b7aff6d, 0x80000000, + 0xbfa10040, 0x847b867b, + 0x847b827b, 0xbef6007b, + 0xb8f03b05, 0x80708170, + 0xbf0d9973, 0xbfa20002, + 0x84708970, 0xbfa00001, + 0x84708a70, 0xb8fa1e06, + 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xd71f0000, 0x000100c1, + 0xd7200000, 0x000200c1, + 0x16000084, 0x857d9973, 0x8b7d817d, 0xbf06817d, - 0xbfa20017, 0xbef600ff, + 0xbefd0080, 0xbfa20012, + 0xbe8300ff, 0x00000080, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf890000, + 0xe0685000, 0x701d0100, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000080, 0xbf0a7b7d, + 0xbfa2fff4, 0xbfa00011, + 0xbe8300ff, 0x00000100, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf890000, + 0xe0685000, 0x701d0100, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a7b7d, + 0xbfa2fff4, 0xbefe00c1, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20004, + 0xbef000ff, 0x00000200, + 0xbeff0080, 0xbfa00003, + 0xbef000ff, 0x00000400, + 0xbeff00c1, 0xb8fb3b05, + 0x807b817b, 0x847b827b, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20017, + 0xbef600ff, 0x01000000, + 0xbefd0084, 0xbf0a7b7d, + 0xbfa10037, 0x7e008700, + 0x7e028701, 0x7e048702, + 0x7e068703, 0xe0685000, + 0x701d0000, 0xe0685080, + 0x701d0100, 0xe0685100, + 0x701d0200, 0xe0685180, + 0x701d0300, 0x807d847d, + 0x8070ff70, 0x00000200, + 0xbf0a7b7d, 0xbfa2ffef, + 0xbfa00025, 0xbef600ff, 0x01000000, 0xbefd0084, - 0xbf0a7b7d, 0xbfa10037, + 0xbf0a7b7d, 0xbfa10011, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xe0685000, 0x701d0000, - 0xe0685080, 0x701d0100, - 0xe0685100, 0x701d0200, - 0xe0685180, 0x701d0300, + 0xe0685100, 0x701d0100, + 0xe0685200, 0x701d0200, + 0xe0685300, 0x701d0300, 0x807d847d, 0x8070ff70, - 0x00000200, 0xbf0a7b7d, - 0xbfa2ffef, 0xbfa00025, - 0xbef600ff, 0x01000000, - 0xbefd0084, 0xbf0a7b7d, - 0xbfa10011, 0x7e008700, - 0x7e028701, 0x7e048702, - 0x7e068703, 0xe0685000, - 0x701d0000, 0xe0685100, - 0x701d0100, 0xe0685200, - 0x701d0200, 0xe0685300, - 0x701d0300, 0x807d847d, - 0x8070ff70, 0x00000400, - 0xbf0a7b7d, 0xbfa2ffef, - 0xb8fb1e06, 0x8b7bc17b, - 0xbfa1000c, 0x847b837b, - 0x807b7d7b, 0xbefe00c1, - 0xbeff0080, 0x7e008700, - 0xe0685000, 0x701d0000, - 0x807d817d, 0x8070ff70, - 0x00000080, 0xbf0a7b7d, - 0xbfa2fff8, 0xbfa00146, - 0xbef4007e, 0x8b75ff7f, - 0x0000ffff, 0x8c75ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x10807fac, - 0xb8f202dc, 0x84729972, - 0x8b6eff7f, 0x04000000, - 0xbfa1003a, 0xbefe00c1, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8ef4306, - 0x8b6fc16f, 0xbfa1002f, - 0x846f866f, 0x846f826f, - 0xbef6006f, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x8078ff78, - 0x00000080, 0xbef600ff, - 0x01000000, 0x857d9972, - 0x8b7d817d, 0xbf06817d, - 0xbefd0080, 0xbfa2000c, - 0xe0500000, 0x781d0000, - 0xbf8903f7, 0xdac00000, - 0x00000000, 0x807dff7d, - 0x00000080, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff5, 0xbfa0000b, - 0xe0500000, 0x781d0000, - 0xbf8903f7, 0xdac00000, - 0x00000000, 0x807dff7d, - 0x00000100, 0x8078ff78, - 0x00000100, 0xbf0a6f7d, - 0xbfa2fff5, 0xbef80080, + 0x00000400, 0xbf0a7b7d, + 0xbfa2ffef, 0xb8fb1e06, + 0x8b7bc17b, 0xbfa1000c, + 0x847b837b, 0x807b7d7b, + 0xbefe00c1, 0xbeff0080, + 0x7e008700, 0xe0685000, + 0x701d0000, 0x807d817d, + 0x8070ff70, 0x00000080, + 0xbf0a7b7d, 0xbfa2fff8, + 0xbfa00146, 0xbef4007e, + 0x8b75ff7f, 0x0000ffff, + 0x8c75ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x10807fac, 0xb8f202dc, + 0x84729972, 0x8b6eff7f, + 0x04000000, 0xbfa1003a, 0xbefe00c1, 0x857d9972, 0x8b7d817d, 0xbf06817d, 0xbfa20002, 0xbeff0080, 0xbfa00001, 0xbeff00c1, - 0xb8ef3b05, 0x806f816f, - 0x846f826f, 0x857d9972, - 0x8b7d817d, 0xbf06817d, - 0xbfa20024, 0xbef600ff, - 0x01000000, 0xbeee0078, - 0x8078ff78, 0x00000200, - 0xbefd0084, 0xbf0a6f7d, - 0xbfa10050, 0xe0505000, - 0x781d0000, 0xe0505080, - 0x781d0100, 0xe0505100, - 0x781d0200, 0xe0505180, - 0x781d0300, 0xbf8903f7, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807d847d, 0x8078ff78, - 0x00000200, 0xbf0a6f7d, - 0xbfa2ffee, 0xe0505000, - 0x6e1d0000, 0xe0505080, - 0x6e1d0100, 0xe0505100, - 0x6e1d0200, 0xe0505180, - 0x6e1d0300, 0xbf8903f7, - 0xbfa00034, 0xbef600ff, - 0x01000000, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefd0084, 0xbf0a6f7d, - 0xbfa10012, 0xe0505000, - 0x781d0000, 0xe0505100, - 0x781d0100, 0xe0505200, - 0x781d0200, 0xe0505300, - 0x781d0300, 0xbf8903f7, - 0x7e008500, 0x7e028501, - 0x7e048502, 0x7e068503, - 0x807d847d, 0x8078ff78, - 0x00000400, 0xbf0a6f7d, - 0xbfa2ffee, 0xb8ef1e06, - 0x8b6fc16f, 0xbfa1000e, - 0x846f836f, 0x806f7d6f, - 0xbefe00c1, 0xbeff0080, - 0xe0505000, 0x781d0000, - 0xbf8903f7, 0x7e008500, - 0x807d817d, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff7, 0xbeff00c1, - 0xe0505000, 0x6e1d0000, - 0xe0505100, 0x6e1d0100, - 0xe0505200, 0x6e1d0200, - 0xe0505300, 0x6e1d0300, - 0xbf8903f7, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x80f8ff78, - 0x00000050, 0xbef600ff, - 0x01000000, 0xbefd00ff, - 0x0000006c, 0x80f89078, - 0xf428403a, 0xf0000000, - 0xbf89fc07, 0x80fd847d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0x80f8a078, - 0xf42c403a, 0xf0000000, - 0xbf89fc07, 0x80fd887d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0x80f8c078, - 0xf430403a, 0xf0000000, - 0xbf89fc07, 0x80fd907d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0xbe884308, - 0xbe8a430a, 0xbe8c430c, - 0xbe8e430e, 0xbf06807d, - 0xbfa1fff0, 0xb980f801, - 0x00000000, 0xbfbd0000, + 0xb8ef4306, 0x8b6fc16f, + 0xbfa1002f, 0x846f866f, + 0x846f826f, 0xbef6006f, 0xb8f83b05, 0x80788178, 0xbf0d9972, 0xbfa20002, 0x84788978, 0xbfa00001, 0x84788a78, 0xb8ee1e06, 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, + 0x8078ff78, 0x00000080, 0xbef600ff, 0x01000000, - 0xf4205bfa, 0xf0000000, - 0x80788478, 0xf4205b3a, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbefd0080, + 0xbfa2000c, 0xe0500000, + 0x781d0000, 0xbf8903f7, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000080, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff5, + 0xbfa0000b, 0xe0500000, + 0x781d0000, 0xbf8903f7, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000100, + 0x8078ff78, 0x00000100, + 0xbf0a6f7d, 0xbfa2fff5, + 0xbef80080, 0xbefe00c1, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbfa20002, + 0xbeff0080, 0xbfa00001, + 0xbeff00c1, 0xb8ef3b05, + 0x806f816f, 0x846f826f, + 0x857d9972, 0x8b7d817d, + 0xbf06817d, 0xbfa20024, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000200, 0xbefd0084, + 0xbf0a6f7d, 0xbfa10050, + 0xe0505000, 0x781d0000, + 0xe0505080, 0x781d0100, + 0xe0505100, 0x781d0200, + 0xe0505180, 0x781d0300, + 0xbf8903f7, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807d847d, + 0x8078ff78, 0x00000200, + 0xbf0a6f7d, 0xbfa2ffee, + 0xe0505000, 0x6e1d0000, + 0xe0505080, 0x6e1d0100, + 0xe0505100, 0x6e1d0200, + 0xe0505180, 0x6e1d0300, + 0xbf8903f7, 0xbfa00034, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefd0084, + 0xbf0a6f7d, 0xbfa10012, + 0xe0505000, 0x781d0000, + 0xe0505100, 0x781d0100, + 0xe0505200, 0x781d0200, + 0xe0505300, 0x781d0300, + 0xbf8903f7, 0x7e008500, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807d847d, + 0x8078ff78, 0x00000400, + 0xbf0a6f7d, 0xbfa2ffee, + 0xb8ef1e06, 0x8b6fc16f, + 0xbfa1000e, 0x846f836f, + 0x806f7d6f, 0xbefe00c1, + 0xbeff0080, 0xe0505000, + 0x781d0000, 0xbf8903f7, + 0x7e008500, 0x807d817d, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff7, + 0xbeff00c1, 0xe0505000, + 0x6e1d0000, 0xe0505100, + 0x6e1d0100, 0xe0505200, + 0x6e1d0200, 0xe0505300, + 0x6e1d0300, 0xbf8903f7, + 0xb8f83b05, 0x80788178, + 0xbf0d9972, 0xbfa20002, + 0x84788978, 0xbfa00001, + 0x84788a78, 0xb8ee1e06, + 0x846e8a6e, 0x80786e78, + 0x8078ff78, 0x00000200, + 0x80f8ff78, 0x00000050, + 0xbef600ff, 0x01000000, + 0xbefd00ff, 0x0000006c, + 0x80f89078, 0xf428403a, + 0xf0000000, 0xbf89fc07, + 0x80fd847d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0x80f8a078, 0xf42c403a, + 0xf0000000, 0xbf89fc07, + 0x80fd887d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0x80f8c078, 0xf430403a, + 0xf0000000, 0xbf89fc07, + 0x80fd907d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0xbe884308, 0xbe8a430a, + 0xbe8c430c, 0xbe8e430e, + 0xbf06807d, 0xbfa1fff0, + 0xb980f801, 0x00000000, + 0xbfbd0000, 0xb8f83b05, + 0x80788178, 0xbf0d9972, + 0xbfa20002, 0x84788978, + 0xbfa00001, 0x84788a78, + 0xb8ee1e06, 0x846e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0xbef600ff, + 0x01000000, 0xf4205bfa, 0xf0000000, 0x80788478, - 0xf4205b7a, 0xf0000000, - 0x80788478, 0xf4205c3a, + 0xf4205b3a, 0xf0000000, + 0x80788478, 0xf4205b7a, 0xf0000000, 0x80788478, - 0xf4205c7a, 0xf0000000, - 0x80788478, 0xf4205eba, + 0xf4205c3a, 0xf0000000, + 0x80788478, 0xf4205c7a, 0xf0000000, 0x80788478, - 0xf4205efa, 0xf0000000, - 0x80788478, 0xf4205e7a, + 0xf4205eba, 0xf0000000, + 0x80788478, 0xf4205efa, 0xf0000000, 0x80788478, - 0xf4205cfa, 0xf0000000, - 0x80788478, 0xf4205bba, + 0xf4205e7a, 0xf0000000, + 0x80788478, 0xf4205cfa, 0xf0000000, 0x80788478, - 0xbf89fc07, 0xb96ef814, 0xf4205bba, 0xf0000000, 0x80788478, 0xbf89fc07, - 0xb96ef815, 0xbefd006f, - 0xbefe0070, 0xbeff0071, - 0x8b6f7bff, 0x000003ff, - 0xb96f4803, 0x8b6f7bff, - 0xfffff800, 0x856f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee3b05, 0x806e816e, - 0xbf0d9972, 0xbfa20002, - 0x846e896e, 0xbfa00001, - 0x846e8a6e, 0xb8ef1e06, - 0x846f8a6f, 0x806e6f6e, - 0x806eff6e, 0x00000200, - 0x806e746e, 0x826f8075, - 0x8b6fff6f, 0x0000ffff, - 0xf4085c37, 0xf8000050, - 0xf4085d37, 0xf8000060, - 0xf4005e77, 0xf8000074, - 0xbf89fc07, 0x8b6dff6d, - 0x0000ffff, 0x8bfe7e7e, - 0x8bea6a6a, 0xb8eef802, - 0xbf0d866e, 0xbfa20002, - 0xb97af802, 0xbe80486c, - 0xb97af802, 0xbe804a6c, - 0xbfb00000, 0xbf9f0000, + 0xb96ef814, 0xf4205bba, + 0xf0000000, 0x80788478, + 0xbf89fc07, 0xb96ef815, + 0xbefd006f, 0xbefe0070, + 0xbeff0071, 0x8b6f7bff, + 0x000003ff, 0xb96f4803, + 0x8b6f7bff, 0xfffff800, + 0x856f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee3b05, + 0x806e816e, 0xbf0d9972, + 0xbfa20002, 0x846e896e, + 0xbfa00001, 0x846e8a6e, + 0xb8ef1e06, 0x846f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x8b6fff6f, + 0x0000ffff, 0xf4085c37, + 0xf8000050, 0xf4085d37, + 0xf8000060, 0xf4005e77, + 0xf8000074, 0xbf89fc07, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb8eef802, 0xbf0d866e, + 0xbfa20002, 0xb97af802, + 0xbe80486c, 0xb97af802, + 0xbe804a6c, 0xbfb00000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0x00000000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm index 0f81670f6f9c..8b92c33c2a7c 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm @@ -186,6 +186,12 @@ L_SKIP_RESTORE: s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) #if SW_SA_TRAP + // If ttmp1[30] is set then issue s_barrier to unblock dependent waves. + s_bitcmp1_b32 s_save_pc_hi, 30 + s_cbranch_scc0 L_TRAP_NO_BARRIER + s_barrier + +L_TRAP_NO_BARRIER: // If ttmp1[31] is set then trap may occur early. // Spin wait until SAVECTX exception is raised. s_bitcmp1_b32 s_save_pc_hi, 31 From 7433632c9ff68a991bd0bc38cabf354e9d2de410 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 1 Nov 2022 19:10:09 -0400 Subject: [PATCH 189/328] ring-buffer: Check for NULL cpu_buffer in ring_buffer_wake_waiters() On some machines the number of listed CPUs may be bigger than the actual CPUs that exist. The tracing subsystem allocates a per_cpu directory with access to the per CPU ring buffer via a cpuX file. But to save space, the ring buffer will only allocate buffers for online CPUs, even though the CPU array will be as big as the nr_cpu_ids. With the addition of waking waiters on the ring buffer when closing the file, the ring_buffer_wake_waiters() now needs to make sure that the buffer is allocated (with the irq_work allocated with it) before trying to wake waiters, as it will cause a NULL pointer dereference. While debugging this, I added a NULL check for the buffer itself (which is OK to do), and also NULL pointer checks against buffer->buffers (which is not fine, and will WARN) as well as making sure the CPU number passed in is within the nr_cpu_ids (which is also not fine if it isn't). Link: https://lore.kernel.org/all/87h6zklb6n.wl-tiwai@suse.de/ Link: https://lore.kernel.org/all/CAM6Wdxc0KRJMXVAA0Y=u6Jh2V=uWB-_Fn6M4xRuNppfXzL1mUg@mail.gmail.com/ Link: https://lkml.kernel.org/linux-trace-kernel/20221101191009.1e7378c8@rorschach.local.home Cc: stable@vger.kernel.org Cc: Steven Noonan Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=1204705 Reported-by: Takashi Iwai Reported-by: Roland Ruckerbauer Fixes: f3ddb74ad079 ("tracing: Wake up ring buffer waiters on closing of the file") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 199759c73519..9712083832f4 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -937,6 +937,9 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *rbwork; + if (!buffer) + return; + if (cpu == RING_BUFFER_ALL_CPUS) { /* Wake up individual ones too. One level recursion */ @@ -945,7 +948,15 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) rbwork = &buffer->irq_work; } else { + if (WARN_ON_ONCE(!buffer->buffers)) + return; + if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) + return; + cpu_buffer = buffer->buffers[cpu]; + /* The CPU buffer may not have been initialized yet */ + if (!cpu_buffer) + return; rbwork = &cpu_buffer->irq_work; } From 0e792b89e6800cd9cb4757a76a96f7ef3e8b6294 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Thu, 3 Nov 2022 11:10:10 +0800 Subject: [PATCH 190/328] ftrace: Fix use-after-free for dynamic ftrace_ops KASAN reported a use-after-free with ftrace ops [1]. It was found from vmcore that perf had registered two ops with the same content successively, both dynamic. After unregistering the second ops, a use-after-free occurred. In ftrace_shutdown(), when the second ops is unregistered, the FTRACE_UPDATE_CALLS command is not set because there is another enabled ops with the same content. Also, both ops are dynamic and the ftrace callback function is ftrace_ops_list_func, so the FTRACE_UPDATE_TRACE_FUNC command will not be set. Eventually the value of 'command' will be 0 and ftrace_shutdown() will skip the rcu synchronization. However, ftrace may be activated. When the ops is released, another CPU may be accessing the ops. Add the missing synchronization to fix this problem. [1] BUG: KASAN: use-after-free in __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline] BUG: KASAN: use-after-free in ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049 Read of size 8 at addr ffff56551965bbc8 by task syz-executor.2/14468 CPU: 1 PID: 14468 Comm: syz-executor.2 Not tainted 5.10.0 #7 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x40c arch/arm64/kernel/stacktrace.c:132 show_stack+0x30/0x40 arch/arm64/kernel/stacktrace.c:196 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b4/0x248 lib/dump_stack.c:118 print_address_description.constprop.0+0x28/0x48c mm/kasan/report.c:387 __kasan_report mm/kasan/report.c:547 [inline] kasan_report+0x118/0x210 mm/kasan/report.c:564 check_memory_region_inline mm/kasan/generic.c:187 [inline] __asan_load8+0x98/0xc0 mm/kasan/generic.c:253 __ftrace_ops_list_func kernel/trace/ftrace.c:7020 [inline] ftrace_ops_list_func+0x2b0/0x31c kernel/trace/ftrace.c:7049 ftrace_graph_call+0x0/0x4 __might_sleep+0x8/0x100 include/linux/perf_event.h:1170 __might_fault mm/memory.c:5183 [inline] __might_fault+0x58/0x70 mm/memory.c:5171 do_strncpy_from_user lib/strncpy_from_user.c:41 [inline] strncpy_from_user+0x1f4/0x4b0 lib/strncpy_from_user.c:139 getname_flags+0xb0/0x31c fs/namei.c:149 getname+0x2c/0x40 fs/namei.c:209 [...] Allocated by task 14445: kasan_save_stack+0x24/0x50 mm/kasan/common.c:48 kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:479 [inline] __kasan_kmalloc.constprop.0+0x110/0x13c mm/kasan/common.c:449 kasan_kmalloc+0xc/0x14 mm/kasan/common.c:493 kmem_cache_alloc_trace+0x440/0x924 mm/slub.c:2950 kmalloc include/linux/slab.h:563 [inline] kzalloc include/linux/slab.h:675 [inline] perf_event_alloc.part.0+0xb4/0x1350 kernel/events/core.c:11230 perf_event_alloc kernel/events/core.c:11733 [inline] __do_sys_perf_event_open kernel/events/core.c:11831 [inline] __se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723 __arm64_sys_perf_event_open+0x6c/0x80 kernel/events/core.c:11723 [...] Freed by task 14445: kasan_save_stack+0x24/0x50 mm/kasan/common.c:48 kasan_set_track+0x24/0x34 mm/kasan/common.c:56 kasan_set_free_info+0x20/0x40 mm/kasan/generic.c:358 __kasan_slab_free.part.0+0x11c/0x1b0 mm/kasan/common.c:437 __kasan_slab_free mm/kasan/common.c:445 [inline] kasan_slab_free+0x2c/0x40 mm/kasan/common.c:446 slab_free_hook mm/slub.c:1569 [inline] slab_free_freelist_hook mm/slub.c:1608 [inline] slab_free mm/slub.c:3179 [inline] kfree+0x12c/0xc10 mm/slub.c:4176 perf_event_alloc.part.0+0xa0c/0x1350 kernel/events/core.c:11434 perf_event_alloc kernel/events/core.c:11733 [inline] __do_sys_perf_event_open kernel/events/core.c:11831 [inline] __se_sys_perf_event_open+0x550/0x15f4 kernel/events/core.c:11723 [...] Link: https://lore.kernel.org/linux-trace-kernel/20221103031010.166498-1-lihuafei1@huawei.com Fixes: edb096e00724f ("ftrace: Fix memleak when unregistering dynamic ops when tracing disabled") Cc: stable@vger.kernel.org Suggested-by: Steven Rostedt Signed-off-by: Li Huafei Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fbf2543111c0..7dc023641bf1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3028,18 +3028,8 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) command |= FTRACE_UPDATE_TRACE_FUNC; } - if (!command || !ftrace_enabled) { - /* - * If these are dynamic or per_cpu ops, they still - * need their data freed. Since, function tracing is - * not currently active, we can just free them - * without synchronizing all CPUs. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - goto free_ops; - - return 0; - } + if (!command || !ftrace_enabled) + goto out; /* * If the ops uses a trampoline, then it needs to be @@ -3076,6 +3066,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) removed_ops = NULL; ops->flags &= ~FTRACE_OPS_FL_REMOVING; +out: /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. @@ -3103,7 +3094,6 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) if (IS_ENABLED(CONFIG_PREEMPTION)) synchronize_rcu_tasks(); - free_ops: ftrace_trampoline_free(ops); } From 354d8a4b165697e6da9585b3f651d87735f30415 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 20 Oct 2022 13:21:43 +0200 Subject: [PATCH 191/328] x86/xen: silence smatch warning in pmu_msr_chk_emulated() Commit 8714f7bcd3c2 ("xen/pv: add fault recovery control to pmu msr accesses") introduced code resulting in a warning issued by the smatch static checker, claiming to use an uninitialized variable. This is a false positive, but work around the warning nevertheless. Fixes: 8714f7bcd3c2 ("xen/pv: add fault recovery control to pmu msr accesses") Reported-by: Dan Carpenter Signed-off-by: Juergen Gross --- arch/x86/xen/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 68aff1382872..246d67dab510 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -302,7 +302,7 @@ static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read, bool *emul) { - int type, index; + int type, index = 0; if (is_amd_pmu_msr(msr)) *emul = xen_amd_pmu_emulate(msr, val, is_read); From 4bff677b30156435afa2cc4c3601b542b4ddd439 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 20 Oct 2022 13:25:12 +0200 Subject: [PATCH 192/328] x86/xen: simplify sysenter and syscall setup xen_enable_sysenter() and xen_enable_syscall() can be simplified a lot. While at it, switch to use cpu_feature_enabled() instead of boot_cpu_has(). Signed-off-by: Juergen Gross --- arch/x86/xen/setup.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index cfa99e8f054b..4f4309500559 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -910,17 +910,9 @@ static int register_callback(unsigned type, const void *func) void xen_enable_sysenter(void) { - int ret; - unsigned sysenter_feature; - - sysenter_feature = X86_FEATURE_SYSENTER32; - - if (!boot_cpu_has(sysenter_feature)) - return; - - ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); - if(ret != 0) - setup_clear_cpu_cap(sysenter_feature); + if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) && + register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat)) + setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); } void xen_enable_syscall(void) @@ -934,12 +926,9 @@ void xen_enable_syscall(void) mechanism for syscalls. */ } - if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { - ret = register_callback(CALLBACKTYPE_syscall32, - xen_entry_SYSCALL_compat); - if (ret != 0) - setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); - } + if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) && + register_callback(CALLBACKTYPE_syscall32, xen_entry_SYSCALL_compat)) + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); } static void __init xen_pvmmu_arch_setup(void) From aec1dc972d27c837d1406310dab5170189eb01e5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 2 Nov 2022 11:25:16 -0700 Subject: [PATCH 193/328] net/ipv4: Fix linux/in.h header dependencies __DECLARE_FLEX_ARRAY is defined in include/uapi/linux/stddef.h but doesn't seem to be explicitly included from include/uapi/linux/in.h, which breaks BPF selftests builds (once we sync linux/stddef.h into tools/include directory in the next patch). Fix this by explicitly including linux/stddef.h. Given this affects BPF CI and bpf tree, targeting this for bpf tree. Fixes: 5854a09b4957 ("net/ipv4: Use __DECLARE_FLEX_ARRAY() helper") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Cc: Gustavo A. R. Silva Cc: Jakub Kicinski Link: https://lore.kernel.org/bpf/20221102182517.2675301-1-andrii@kernel.org --- include/uapi/linux/in.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index f243ce665f74..07a4cb149305 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -20,6 +20,7 @@ #define _UAPI_LINUX_IN_H #include +#include #include #include From a778f5d46b6287ebe26e24b48f3e8079c2db8ed2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 2 Nov 2022 11:25:17 -0700 Subject: [PATCH 194/328] tools/headers: Pull in stddef.h to uapi to fix BPF selftests build in CI With recent sync of linux/in.h tools/include headers are now relying on __DECLARE_FLEX_ARRAY macro, which isn't itself defined inside tools/include headers anywhere and is instead assumed to be present in system-wide UAPI header. This breaks isolated environments that don't have kernel UAPI headers installed system-wide, like BPF CI ([0]). To fix this, bring in include/uapi/linux/stddef.h into tools/include. We can't just copy/paste it, though, it has to be processed with scripts/headers_install.sh, which has a dependency on scripts/unifdef. So the full command to (re-)generate stddef.h for inclusion into tools/include directory is: $ make scripts_unifdef && \ cp $KBUILD_OUTPUT/scripts/unifdef scripts/ && \ scripts/headers_install.sh include/uapi/linux/stddef.h tools/include/uapi/linux/stddef.h This assumes KBUILD_OUTPUT envvar is set and used for out-of-tree builds. [0] https://github.com/kernel-patches/bpf/actions/runs/3379432493/jobs/5610982609 Fixes: 036b8f5b8970 ("tools headers uapi: Update linux/in.h copy") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Cc: Jakub Kicinski Cc: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/bpf/20221102182517.2675301-2-andrii@kernel.org --- tools/include/uapi/linux/in.h | 1 + tools/include/uapi/linux/stddef.h | 47 +++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 tools/include/uapi/linux/stddef.h diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index f243ce665f74..07a4cb149305 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -20,6 +20,7 @@ #define _UAPI_LINUX_IN_H #include +#include #include #include diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h new file mode 100644 index 000000000000..bb6ea517efb5 --- /dev/null +++ b/tools/include/uapi/linux/stddef.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_STDDEF_H +#define _LINUX_STDDEF_H + + + +#ifndef __always_inline +#define __always_inline __inline__ +#endif + +/** + * __struct_group() - Create a mirrored named and anonyomous struct + * + * @TAG: The tag name for the named sub-struct (usually empty) + * @NAME: The identifier name of the mirrored sub-struct + * @ATTRS: Any struct attributes (usually empty) + * @MEMBERS: The member declarations for the mirrored structs + * + * Used to create an anonymous union of two structs with identical layout + * and size: one anonymous and one named. The former's members can be used + * normally without sub-struct naming, and the latter can be used to + * reason about the start, end, and size of the group of struct members. + * The named struct can also be explicitly tagged for layer reuse, as well + * as both having struct attributes appended. + */ +#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ + } + +/** + * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ + struct { \ + struct { } __empty_ ## NAME; \ + TYPE NAME[]; \ + } +#endif From 8bbabb3fddcd0f858be69ed5abc9b470a239d6f2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 1 Nov 2022 21:34:17 -0700 Subject: [PATCH 195/328] bpf, sock_map: Move cancel_work_sync() out of sock lock Stanislav reported a lockdep warning, which is caused by the cancel_work_sync() called inside sock_map_close(), as analyzed below by Jakub: psock->work.func = sk_psock_backlog() ACQUIRE psock->work_mutex sk_psock_handle_skb() skb_send_sock() __skb_send_sock() sendpage_unlocked() kernel_sendpage() sock->ops->sendpage = inet_sendpage() sk->sk_prot->sendpage = tcp_sendpage() ACQUIRE sk->sk_lock tcp_sendpage_locked() RELEASE sk->sk_lock RELEASE psock->work_mutex sock_map_close() ACQUIRE sk->sk_lock sk_psock_stop() sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED) cancel_work_sync() __cancel_work_timer() __flush_work() // wait for psock->work to finish RELEASE sk->sk_lock We can move the cancel_work_sync() out of the sock lock protection, but still before saved_close() was called. Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") Reported-by: Stanislav Fomichev Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Tested-by: Jakub Sitnicki Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20221102043417.279409-1-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 2 +- net/core/skmsg.c | 7 ++----- net/core/sock_map.c | 7 ++++--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 48f4b645193b..70d6cb94e580 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -376,7 +376,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err) } struct sk_psock *sk_psock_init(struct sock *sk, int node); -void sk_psock_stop(struct sk_psock *psock, bool wait); +void sk_psock_stop(struct sk_psock *psock); #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 1efdc47a999b..e6b9ced3eda8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -803,16 +803,13 @@ static void sk_psock_link_destroy(struct sk_psock *psock) } } -void sk_psock_stop(struct sk_psock *psock, bool wait) +void sk_psock_stop(struct sk_psock *psock) { spin_lock_bh(&psock->ingress_lock); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); sk_psock_cork_free(psock); __sk_psock_zap_ingress(psock); spin_unlock_bh(&psock->ingress_lock); - - if (wait) - cancel_work_sync(&psock->work); } static void sk_psock_done_strp(struct sk_psock *psock); @@ -850,7 +847,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); - sk_psock_stop(psock, false); + sk_psock_stop(psock); INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); queue_rcu_work(system_wq, &psock->rwork); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index a660baedd9e7..81beb16ab1eb 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1596,7 +1596,7 @@ void sock_map_destroy(struct sock *sk) saved_destroy = psock->saved_destroy; sock_map_remove_links(sk, psock); rcu_read_unlock(); - sk_psock_stop(psock, false); + sk_psock_stop(psock); sk_psock_put(sk, psock); saved_destroy(sk); } @@ -1619,9 +1619,10 @@ void sock_map_close(struct sock *sk, long timeout) saved_close = psock->saved_close; sock_map_remove_links(sk, psock); rcu_read_unlock(); - sk_psock_stop(psock, true); - sk_psock_put(sk, psock); + sk_psock_stop(psock); release_sock(sk); + cancel_work_sync(&psock->work); + sk_psock_put(sk, psock); saved_close(sk, timeout); } EXPORT_SYMBOL_GPL(sock_map_close); From 074c008007197297aaff9dd93627fb89b27d21e5 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Wed, 2 Nov 2022 20:53:59 +0000 Subject: [PATCH 196/328] KVM: x86: Use SRCU to protect zap in __kvm_set_or_clear_apicv_inhibit() kvm_zap_gfn_range() must be called in an SRCU read-critical section, but there is no SRCU annotation in __kvm_set_or_clear_apicv_inhibit(). This can lead to the following warning via kvm_arch_vcpu_ioctl_set_guest_debug() if a Shadow MMU is in use (TDP MMU disabled or nesting): [ 1416.659809] ============================= [ 1416.659810] WARNING: suspicious RCU usage [ 1416.659839] 6.1.0-dbg-DEV #1 Tainted: G S I [ 1416.659853] ----------------------------- [ 1416.659854] include/linux/kvm_host.h:954 suspicious rcu_dereference_check() usage! [ 1416.659856] ... [ 1416.659904] dump_stack_lvl+0x84/0xaa [ 1416.659910] dump_stack+0x10/0x15 [ 1416.659913] lockdep_rcu_suspicious+0x11e/0x130 [ 1416.659919] kvm_zap_gfn_range+0x226/0x5e0 [ 1416.659926] ? kvm_make_all_cpus_request_except+0x18b/0x1e0 [ 1416.659935] __kvm_set_or_clear_apicv_inhibit+0xcc/0x100 [ 1416.659940] kvm_arch_vcpu_ioctl_set_guest_debug+0x350/0x390 [ 1416.659946] kvm_vcpu_ioctl+0x2fc/0x620 [ 1416.659955] __se_sys_ioctl+0x77/0xc0 [ 1416.659962] __x64_sys_ioctl+0x1d/0x20 [ 1416.659965] do_syscall_64+0x3d/0x80 [ 1416.659969] entry_SYSCALL_64_after_hwframe+0x63/0xcd Always take the KVM SRCU read lock in __kvm_set_or_clear_apicv_inhibit() to protect the GFN to memslot translation. The SRCU read lock is not technically required when no Shadow MMUs are in use, since the TDP MMU walks the paging structures from the roots and does not need to look up GFN translations in the memslots, but make the SRCU locking unconditional for simplicty. In most cases, the SRCU locking is taken care of in the vCPU run loop, but when called through other ioctls (such as KVM_SET_GUEST_DEBUG) there is no srcu_read_lock. Tested: ran tools/testing/selftests/kvm/x86_64/debug_regs on a DBG build. This patch causes the suspicious RCU warning to disappear. Note that the warning is hit in __kvm_zap_rmaps(), so kvm_memslots_have_rmaps() must return true in order for this to repro (i.e. the TDP MMU must be off or nesting in use.) Reported-by: Greg Thelen Fixes: 36222b117e36 ("KVM: x86: don't disable APICv memslot when inhibited") Signed-off-by: Ben Gardon Message-Id: <20221102205359.1260980-1-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 521b433f978c..5f5eb577d583 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10404,7 +10404,10 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, kvm->arch.apicv_inhibit_reasons = new; if (new) { unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE); + int idx = srcu_read_lock(&kvm->srcu); + kvm_zap_gfn_range(kvm, gfn, gfn+1); + srcu_read_unlock(&kvm->srcu, idx); } } else { kvm->arch.apicv_inhibit_reasons = new; From 8670866b236eafbe9d502294561c3ddd298266bc Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Thu, 3 Nov 2022 09:17:49 +0800 Subject: [PATCH 197/328] KVM: x86: Fix a typo about the usage of kvcalloc() Swap the 1st and 2nd arguments to be consistent with the usage of kvcalloc(). Fixes: c9b8fecddb5b ("KVM: use kvcalloc for array allocations") Signed-off-by: Liao Chang Message-Id: <20221103011749.139262-1-liaochang1@huawei.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0810e93cbedc..62bc7a01cecc 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1338,7 +1338,7 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, if (sanity_check_entries(entries, cpuid->nent, type)) return -EINVAL; - array.entries = kvcalloc(sizeof(struct kvm_cpuid_entry2), cpuid->nent, GFP_KERNEL); + array.entries = kvcalloc(cpuid->nent, sizeof(struct kvm_cpuid_entry2), GFP_KERNEL); if (!array.entries) return -ENOMEM; From 23715a26c8d812912a70c6ac1ce67af649b95914 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 28 Oct 2022 16:39:14 +0200 Subject: [PATCH 198/328] arm64: efi: Recover from synchronous exceptions occurring in firmware Unlike x86, which has machinery to deal with page faults that occur during the execution of EFI runtime services, arm64 has nothing like that, and a synchronous exception raised by firmware code brings down the whole system. With more EFI based systems appearing that were not built to run Linux (such as the Windows-on-ARM laptops based on Qualcomm SOCs), as well as the introduction of PRM (platform specific firmware routines that are callable just like EFI runtime services), we are more likely to run into issues of this sort, and it is much more likely that we can identify and work around such issues if they don't bring down the system entirely. Since we already use a EFI runtime services call wrapper in assembler, we can quite easily add some code that captures the execution state at the point where the call is made, allowing us to revert to this state and proceed execution if the call triggered a synchronous exception. Given that the kernel and the firmware don't share any data structures that could end up in an indeterminate state, we can happily continue running, as long as we mark the EFI runtime services as unavailable from that point on. Signed-off-by: Ard Biesheuvel Acked-by: Catalin Marinas --- arch/arm64/include/asm/efi.h | 8 ++++++++ arch/arm64/kernel/efi-rt-wrapper.S | 33 ++++++++++++++++++++++++++++-- arch/arm64/kernel/efi.c | 26 +++++++++++++++++++++++ arch/arm64/mm/fault.c | 4 ++++ 4 files changed, 69 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 439e2bc5d5d8..d6cf535d8352 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,8 +14,16 @@ #ifdef CONFIG_EFI extern void efi_init(void); + +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() + +static inline +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) +{ + return false; +} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 75691a2641c1..67babd5f04c2 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -6,7 +6,7 @@ #include SYM_FUNC_START(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-32]! + stp x29, x30, [sp, #-112]! mov x29, sp /* @@ -16,6 +16,20 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) */ stp x1, x18, [sp, #16] + /* + * Preserve all callee saved registers and record the stack pointer + * value in a per-CPU variable so we can recover from synchronous + * exceptions occurring while running the firmware routines. + */ + stp x19, x20, [sp, #32] + stp x21, x22, [sp, #48] + stp x23, x24, [sp, #64] + stp x25, x26, [sp, #80] + stp x27, x28, [sp, #96] + + adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 + str x29, [x8] + /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the @@ -31,7 +45,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ldp x1, x2, [sp, #16] cmp x2, x18 - ldp x29, x30, [sp], #32 + ldp x29, x30, [sp], #112 b.ne 0f ret 0: @@ -45,3 +59,18 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x18, x2 b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) + +SYM_FUNC_START(__efi_rt_asm_recover) + ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 + mov sp, x8 + + ldp x0, x18, [sp, #16] + ldp x19, x20, [sp, #32] + ldp x21, x22, [sp, #48] + ldp x23, x24, [sp, #64] + ldp x25, x26, [sp, #80] + ldp x27, x28, [sp, #96] + ldp x29, x30, [sp], #112 + + b efi_handle_runtime_exception +SYM_FUNC_END(__efi_rt_asm_recover) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index e1be6c429810..8d36e66a6e64 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,6 +9,7 @@ #include #include +#include #include @@ -128,3 +129,28 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } + +asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); + +asmlinkage efi_status_t __efi_rt_asm_recover(void); + +asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) +{ + pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return EFI_ABORTED; +} + +bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) +{ + /* Check whether the exception occurred while running the firmware */ + if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) + return false; + + pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + dump_stack(); + + regs->pc = (u64)__efi_rt_asm_recover; + return true; +} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 5b391490e045..3e9cf9826417 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -391,6 +392,9 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "paging request"; } + if (efi_runtime_fixup_exception(regs, msg)) + return; + die_kernel_fault(msg, addr, esr, regs); } From a207620123f27e6f63bab13b46b20e03a3deec8d Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 31 Oct 2022 10:29:20 +0100 Subject: [PATCH 199/328] mm/slab_common: repair kernel-doc for __ksize() Commit 445d41d7a7c1 ("Merge branch 'slab/for-6.1/kmalloc_size_roundup' into slab/for-next") resolved a conflict of two concurrent changes to __ksize(). However, it did not adjust the kernel-doc comment of __ksize(), while the name of the argument to __ksize() was renamed. Hence, ./scripts/ kernel-doc -none mm/slab_common.c warns about it. Adjust the kernel-doc comment for __ksize() for make W=1 happiness. Signed-off-by: Lukas Bulwahn Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 33b1886b06eb..74a991fd9d31 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1010,7 +1010,7 @@ EXPORT_SYMBOL(kfree); /** * __ksize -- Report full size of underlying allocation - * @objp: pointer to the object + * @object: pointer to the object * * This should only be used internally to query the true size of allocations. * It is not meant to be a way to discover the usable size of an allocation @@ -1018,7 +1018,7 @@ EXPORT_SYMBOL(kfree); * the originally requested allocation size may trigger KASAN, UBSAN_BOUNDS, * and/or FORTIFY_SOURCE. * - * Return: size of the actual memory used by @objp in bytes + * Return: size of the actual memory used by @object in bytes */ size_t __ksize(const void *object) { From 85f1506337f0c79a4955edfeee86a18628e3735f Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Thu, 3 Nov 2022 13:52:32 +0530 Subject: [PATCH 200/328] arm64: cpufeature: Fix the visibility of compat hwcaps Commit 237405ebef58 ("arm64: cpufeature: Force HWCAP to be based on the sysreg visible to user-space") forced the hwcaps to use sanitised user-space view of the id registers. However, the ID register structures used to select few compat cpufeatures (vfp, crc32, ...) are masked and hence such hwcaps do not appear in /proc/cpuinfo anymore for PER_LINUX32 personality. Add the ID register structures explicitly and set the relevant entry as visible. As these ID registers are now of type visible so make them available in 64-bit userspace by making necessary changes in register emulation logic and documentation. While at it, update the comment for structure ftr_generic_32bits[] which lists the ID register that use it. Fixes: 237405ebef58 ("arm64: cpufeature: Force HWCAP to be based on the sysreg visible to user-space") Cc: Suzuki K Poulose Reviewed-by: James Morse Signed-off-by: Amit Daniel Kachhap Link: https://lore.kernel.org/r/20221103082232.19189-1-amit.kachhap@arm.com Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.rst | 38 ++++++++++++++++- arch/arm64/kernel/cpufeature.c | 42 +++++++++++++++---- 2 files changed, 70 insertions(+), 10 deletions(-) diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 04ba83e1965f..c7adc7897df6 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -92,7 +92,7 @@ operation if the source belongs to the supported system register space. The infrastructure emulates only the following system register space:: - Op0=3, Op1=0, CRn=0, CRm=0,4,5,6,7 + Op0=3, Op1=0, CRn=0, CRm=0,2,3,4,5,6,7 (See Table C5-6 'System instruction encodings for non-Debug System register accesses' in ARMv8 ARM DDI 0487A.h, for the list of @@ -293,6 +293,42 @@ infrastructure: | WFXT | [3-0] | y | +------------------------------+---------+---------+ + 10) MVFR0_EL1 - AArch32 Media and VFP Feature Register 0 + + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | FPDP | [11-8] | y | + +------------------------------+---------+---------+ + + 11) MVFR1_EL1 - AArch32 Media and VFP Feature Register 1 + + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | SIMDFMAC | [31-28] | y | + +------------------------------+---------+---------+ + | SIMDSP | [19-16] | y | + +------------------------------+---------+---------+ + | SIMDInt | [15-12] | y | + +------------------------------+---------+---------+ + | SIMDLS | [11-8] | y | + +------------------------------+---------+---------+ + + 12) ID_ISAR5_EL1 - AArch32 Instruction Set Attribute Register 5 + + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | CRC32 | [19-16] | y | + +------------------------------+---------+---------+ + | SHA2 | [15-12] | y | + +------------------------------+---------+---------+ + | SHA1 | [11-8] | y | + +------------------------------+---------+---------+ + | AES | [7-4] | y | + +------------------------------+---------+---------+ + Appendix I: Example ------------------- diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6062454a9067..b3f37e2209ad 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -428,6 +428,30 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_mvfr0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPROUND_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSHVEC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSQRT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPDIVIDE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPTRAP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPDP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_FPSP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_SIMD_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static const struct arm64_ftr_bits ftr_mvfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDFMAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPHP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDHP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDSP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDINT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_SIMDLS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPDNAN_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_FPFTZ_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_mvfr2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_FPMISC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_SIMDMISC_SHIFT, 4, 0), @@ -458,10 +482,10 @@ static const struct arm64_ftr_bits ftr_id_isar0[] = { static const struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SEVL_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -574,7 +598,7 @@ static const struct arm64_ftr_bits ftr_smcr[] = { * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of * 0. Covers the following 32bit registers: - * id_isar[1-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] + * id_isar[1-3], id_mmfr[1-3] */ static const struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), @@ -645,8 +669,8 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6), /* Op1 = 0, CRn = 0, CRm = 3 */ - ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits), - ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_mvfr0), + ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_mvfr1), ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), ARM64_FTR_REG(SYS_ID_PFR2_EL1, ftr_id_pfr2), ARM64_FTR_REG(SYS_ID_DFR1_EL1, ftr_id_dfr1), @@ -3339,7 +3363,7 @@ static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *c /* * We emulate only the following system register space. - * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] + * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 2 - 7] * See Table C5-6 System instruction encodings for System register accesses, * ARMv8 ARM(ARM DDI 0487A.f) for more details. */ @@ -3349,7 +3373,7 @@ static inline bool __attribute_const__ is_emulated(u32 id) sys_reg_CRn(id) == 0x0 && sys_reg_Op1(id) == 0x0 && (sys_reg_CRm(id) == 0 || - ((sys_reg_CRm(id) >= 4) && (sys_reg_CRm(id) <= 7)))); + ((sys_reg_CRm(id) >= 2) && (sys_reg_CRm(id) <= 7)))); } /* From 4f1aa35f1fb7d51b125487c835982af792697ecb Mon Sep 17 00:00:00 2001 From: Yu Zhe Date: Tue, 27 Sep 2022 15:02:47 +0800 Subject: [PATCH 201/328] cxl/pmem: Use size_add() against integer overflow "struct_size() + n" may cause a integer overflow, use size_add() to handle it. Signed-off-by: Yu Zhe Link: https://lore.kernel.org/r/20220927070247.23148-1-yuzhe@nfschina.com Signed-off-by: Dan Williams --- drivers/cxl/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 115a7b79f343..0bac05d804bc 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -148,7 +148,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, return -EINVAL; /* 4-byte status follows the input data in the payload */ - if (struct_size(cmd, in_buf, cmd->in_length) + 4 > buf_len) + if (size_add(struct_size(cmd, in_buf, cmd->in_length), 4) > buf_len) return -EINVAL; set_lsa = From f1db20814af532f85e091231223e5e4818e8464b Mon Sep 17 00:00:00 2001 From: Youlin Li Date: Thu, 3 Nov 2022 17:34:39 +0800 Subject: [PATCH 202/328] bpf: Fix wrong reg type conversion in release_reference() Some helper functions will allocate memory. To avoid memory leaks, the verifier requires the eBPF program to release these memories by calling the corresponding helper functions. When a resource is released, all pointer registers corresponding to the resource should be invalidated. The verifier use release_references() to do this job, by apply __mark_reg_unknown() to each relevant register. It will give these registers the type of SCALAR_VALUE. A register that will contain a pointer value at runtime, but of type SCALAR_VALUE, which may allow the unprivileged user to get a kernel pointer by storing this register into a map. Using __mark_reg_not_init() while NOT allow_ptr_leaks can mitigate this problem. Fixes: fd978bf7fd31 ("bpf: Add reference tracking to verifier") Signed-off-by: Youlin Li Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20221103093440.3161-1-liulin063@gmail.com --- kernel/bpf/verifier.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dd9019c8b0db..225666307bba 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6623,8 +6623,12 @@ static int release_reference(struct bpf_verifier_env *env, return err; bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ - if (reg->ref_obj_id == ref_obj_id) - __mark_reg_unknown(env, reg); + if (reg->ref_obj_id == ref_obj_id) { + if (!env->allow_ptr_leaks) + __mark_reg_not_init(env, reg); + else + __mark_reg_unknown(env, reg); + } })); return 0; From 475244f5e06beeda7b557d9dde46a5f439bf3379 Mon Sep 17 00:00:00 2001 From: Youlin Li Date: Thu, 3 Nov 2022 17:34:40 +0800 Subject: [PATCH 203/328] selftests/bpf: Add verifier test for release_reference() Add a test case to ensure that released pointer registers will not be leaked into the map. Before fix: ./test_verifier 984 984/u reference tracking: try to leak released ptr reg FAIL Unexpected success to load! verification time 67 usec stack depth 4 processed 23 insns (limit 1000000) max_states_per_insn 0 total_states 2 peak_states 2 mark_read 1 984/p reference tracking: try to leak released ptr reg OK Summary: 1 PASSED, 0 SKIPPED, 1 FAILED After fix: ./test_verifier 984 984/u reference tracking: try to leak released ptr reg OK 984/p reference tracking: try to leak released ptr reg OK Summary: 2 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Youlin Li Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20221103093440.3161-2-liulin063@gmail.com --- .../selftests/bpf/verifier/ref_tracking.c | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index f18ce867271f..fd683a32a276 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -1044,3 +1044,39 @@ .result_unpriv = REJECT, .errstr_unpriv = "unknown func", }, +{ + "reference tracking: try to leak released ptr reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_ringbuf_reserve), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_ringbuf_discard), + BPF_MOV64_IMM(BPF_REG_0, 0), + + BPF_STX_MEM(BPF_DW, BPF_REG_9, BPF_REG_8, 0), + BPF_EXIT_INSN() + }, + .fixup_map_array_48b = { 4 }, + .fixup_map_ringbuf = { 11 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R8 !read_ok" +}, From 4a6f316d6855a434f56dbbeba05e14c01acde8f8 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Fri, 4 Nov 2022 08:49:31 +0900 Subject: [PATCH 204/328] kprobe: reverse kp->flags when arm_kprobe failed In aggregate kprobe case, when arm_kprobe failed, we need set the kp->flags with KPROBE_FLAG_DISABLED again. If not, the 'kp' kprobe will been considered as enabled but it actually not enabled. Link: https://lore.kernel.org/all/20220902155820.34755-1-liq3ea@163.com/ Fixes: 12310e343755 ("kprobes: Propagate error from arm_kprobe_ftrace()") Cc: stable@vger.kernel.org Signed-off-by: Li Qiang Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/kprobes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3220b0a2fb4a..cd9f5a66a690 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2429,8 +2429,11 @@ int enable_kprobe(struct kprobe *kp) if (!kprobes_all_disarmed && kprobe_disabled(p)) { p->flags &= ~KPROBE_FLAG_DISABLED; ret = arm_kprobe(p); - if (ret) + if (ret) { p->flags |= KPROBE_FLAG_DISABLED; + if (p != kp) + kp->flags |= KPROBE_FLAG_DISABLED; + } } out: mutex_unlock(&kprobe_mutex); From d05ea35e7eea14d32f29fd688d3daeb9089de1a5 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Tue, 25 Oct 2022 00:12:08 -0300 Subject: [PATCH 205/328] fprobe: Check rethook_alloc() return in rethook initialization Check if fp->rethook succeeded to be allocated. Otherwise, if rethook_alloc() fails, then we end up dereferencing a NULL pointer in rethook_add_node(). Link: https://lore.kernel.org/all/20221025031209.954836-1-rafaelmendsr@gmail.com/ Fixes: 5b0ab78998e3 ("fprobe: Add exit_handler support") Cc: stable@vger.kernel.org Signed-off-by: Rafael Mendonca Acked-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/fprobe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index aac63ca9c3d1..71614b2a67ff 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -141,6 +141,8 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) return -E2BIG; fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler); + if (!fp->rethook) + return -ENOMEM; for (i = 0; i < size; i++) { struct fprobe_rethook_node *node; From 61b304b73ab4b48b1cd7796efe42a570e2a0e0fc Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 23 Oct 2022 11:11:43 +0900 Subject: [PATCH 206/328] tracing/fprobe: Fix to check whether fprobe is registered correctly Since commit ab51e15d535e ("fprobe: Introduce FPROBE_FL_KPROBE_SHARED flag for fprobe") introduced fprobe_kprobe_handler() for fprobe::ops::func, unregister_fprobe() fails to unregister the registered if user specifies FPROBE_FL_KPROBE_SHARED flag. Moreover, __register_ftrace_function() is possible to change the ftrace_ops::func, thus we have to check fprobe::ops::saved_func instead. To check it correctly, it should confirm the fprobe::ops::saved_func is either fprobe_handler() or fprobe_kprobe_handler(). Link: https://lore.kernel.org/all/166677683946.1459107.15997653945538644683.stgit@devnote3/ Fixes: cad9931f64dc ("fprobe: Add ftrace based probe APIs") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/fprobe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 71614b2a67ff..e8143e368074 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -303,7 +303,8 @@ int unregister_fprobe(struct fprobe *fp) { int ret; - if (!fp || fp->ops.func != fprobe_handler) + if (!fp || (fp->ops.saved_func != fprobe_handler && + fp->ops.saved_func != fprobe_kprobe_handler)) return -EINVAL; /* From 66f0919c953ef7b55e5ab94389a013da2ce80a2c Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Wed, 2 Nov 2022 15:29:54 +0800 Subject: [PATCH 207/328] tracing: kprobe: Fix memory leak in test_gen_kprobe/kretprobe_cmd() test_gen_kprobe_cmd() only free buf in fail path, hence buf will leak when there is no failure. Move kfree(buf) from fail path to common path to prevent the memleak. The same reason and solution in test_gen_kretprobe_cmd(). unreferenced object 0xffff888143b14000 (size 2048): comm "insmod", pid 52490, jiffies 4301890980 (age 40.553s) hex dump (first 32 bytes): 70 3a 6b 70 72 6f 62 65 73 2f 67 65 6e 5f 6b 70 p:kprobes/gen_kp 72 6f 62 65 5f 74 65 73 74 20 64 6f 5f 73 79 73 robe_test do_sys backtrace: [<000000006d7b836b>] kmalloc_trace+0x27/0xa0 [<0000000009528b5b>] 0xffffffffa059006f [<000000008408b580>] do_one_initcall+0x87/0x2a0 [<00000000c4980a7e>] do_init_module+0xdf/0x320 [<00000000d775aad0>] load_module+0x3006/0x3390 [<00000000e9a74b80>] __do_sys_finit_module+0x113/0x1b0 [<000000003726480d>] do_syscall_64+0x35/0x80 [<000000003441e93b>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Link: https://lore.kernel.org/all/20221102072954.26555-1-shangxiaojing@huawei.com/ Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") Cc: stable@vger.kernel.org Signed-off-by: Shang XiaoJing Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/kprobe_event_gen_test.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index 80e04a1e1977..d81f7c51025c 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -100,20 +100,20 @@ static int __init test_gen_kprobe_cmd(void) KPROBE_GEN_TEST_FUNC, KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1); if (ret) - goto free; + goto out; /* Use kprobe_event_add_fields to add the rest of the fields */ ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3); if (ret) - goto free; + goto out; /* * This actually creates the event. */ ret = kprobe_event_gen_cmd_end(&cmd); if (ret) - goto free; + goto out; /* * Now get the gen_kprobe_test event file. We need to prevent @@ -136,13 +136,11 @@ static int __init test_gen_kprobe_cmd(void) goto delete; } out: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kprobe_test"); - free: - kfree(buf); - goto out; } @@ -170,14 +168,14 @@ static int __init test_gen_kretprobe_cmd(void) KPROBE_GEN_TEST_FUNC, "$retval"); if (ret) - goto free; + goto out; /* * This actually creates the event. */ ret = kretprobe_event_gen_cmd_end(&cmd); if (ret) - goto free; + goto out; /* * Now get the gen_kretprobe_test event file. We need to @@ -201,13 +199,11 @@ static int __init test_gen_kretprobe_cmd(void) goto delete; } out: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kretprobe_test"); - free: - kfree(buf); - goto out; } From 9e4b7a99a03aefd37ba7bb1f022c8efab5019165 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 2 Nov 2022 17:53:25 +0100 Subject: [PATCH 208/328] net: gso: fix panic on frag_list with mixed head alloc types Since commit 3dcbdb134f32 ("net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list"), it is allowed to change gso_size of a GRO packet. However, that commit assumes that "checking the first list_skb member suffices; i.e if either of the list_skb members have non head_frag head, then the first one has too". It turns out this assumption does not hold. We've seen BUG_ON being hit in skb_segment when skbs on the frag_list had differing head_frag with the vmxnet3 driver. This happens because __netdev_alloc_skb and __napi_alloc_skb can return a skb that is page backed or kmalloced depending on the requested size. As the result, the last small skb in the GRO packet can be kmalloced. There are three different locations where this can be fixed: (1) We could check head_frag in GRO and not allow GROing skbs with different head_frag. However, that would lead to performance regression on normal forward paths with unmodified gso_size, where !head_frag in the last packet is not a problem. (2) Set a flag in bpf_skb_net_grow and bpf_skb_net_shrink indicating that NETIF_F_SG is undesirable. That would need to eat a bit in sk_buff. Furthermore, that flag can be unset when all skbs on the frag_list are page backed. To retain good performance, bpf_skb_net_grow/shrink would have to walk the frag_list. (3) Walk the frag_list in skb_segment when determining whether NETIF_F_SG should be cleared. This of course slows things down. This patch implements (3). To limit the performance impact in skb_segment, the list is walked only for skbs with SKB_GSO_DODGY set that have gso_size changed. Normal paths thus will not hit it. We could check only the last skb but since we need to walk the whole list anyway, let's stay on the safe side. Fixes: 3dcbdb134f32 ("net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list") Signed-off-by: Jiri Benc Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/e04426a6a91baf4d1081e1b478c82b5de25fdf21.1667407944.git.jbenc@redhat.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d1a3fa6f3f12..88fa40571d0c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4134,23 +4134,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int i = 0; int pos; - if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && - (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { - /* gso_size is untrusted, and we have a frag_list with a linear - * non head_frag head. - * - * (we assume checking the first list_skb member suffices; - * i.e if either of the list_skb members have non head_frag - * head, then the first one has too). - * - * If head_skb's headlen does not fit requested gso_size, it - * means that the frag_list members do NOT terminate on exact - * gso_size boundaries. Hence we cannot perform skb_frag_t page - * sharing. Therefore we must fallback to copying the frag_list - * skbs; we do so by disabling SG. - */ - if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) - features &= ~NETIF_F_SG; + if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) && + mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) { + struct sk_buff *check_skb; + + for (check_skb = list_skb; check_skb; check_skb = check_skb->next) { + if (skb_headlen(check_skb) && !check_skb->head_frag) { + /* gso_size is untrusted, and we have a frag_list with + * a linear non head_frag item. + * + * If head_skb's headlen does not fit requested gso_size, + * it means that the frag_list members do NOT terminate + * on exact gso_size boundaries. Hence we cannot perform + * skb_frag_t page sharing. Therefore we must fallback to + * copying the frag_list skbs; we do so by disabling SG. + */ + features &= ~NETIF_F_SG; + break; + } + } } __skb_push(head_skb, doffset); From 4581dd480c9e42a1ad21dd8b9c110abe41878ce5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 31 Oct 2022 22:08:13 -0700 Subject: [PATCH 209/328] net: octeontx2-pf: mcs: consider MACSEC setting Fix build errors when MACSEC=m and OCTEONTX2_PF=y by having OCTEONTX2_PF depend on MACSEC if it is enabled. By adding "|| !MACSEC", this means that MACSEC is not required -- it can be disabled for this driver. drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.o: in function `otx2_remove': ../drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c:(.text+0x2fd0): undefined reference to `cn10k_mcs_free' mips64-linux-ld: drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.o: in function `otx2_mbox_up_handler_mcs_intr_notify': ../drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c:(.text+0x4610): undefined reference to `cn10k_handle_mcs_event' Reported-by: kernel test robot Fixes: c54ffc73601c ("octeontx2-pf: mcs: Introduce MACSEC hardware offloading") Signed-off-by: Randy Dunlap Cc: Subbaraya Sundeep Cc: Sunil Goutham Cc: Geetha sowjanya Cc: hariprasad Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index e1036b0eb6b1..993ac180a5db 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -36,6 +36,7 @@ config OCTEONTX2_PF select DIMLIB depends on PCI depends on PTP_1588_CLOCK_OPTIONAL + depends on MACSEC || !MACSEC help This driver supports Marvell's OcteonTX2 NIC physical function. From cfdcb075048c1e886c45a9c9e681ed222f74ecb9 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Tue, 1 Nov 2022 15:48:38 +0800 Subject: [PATCH 210/328] net: hns3: fix get wrong value of function hclge_get_dscp_prio() As the argument struct hnae3_handle *h of function hclge_get_dscp_prio() can be other client registered in hnae3 layer, we need to transform it into hnae3_handle of local nic client to get right dscp settings for other clients. Fixes: dfea275e06c2 ("net: hns3: optimize converting dscp to priority process of hns3_nic_select_queue()") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 6962a9d69cf8..987271da6e9b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -12984,14 +12984,16 @@ static void hclge_clean_vport_config(struct hnae3_ae_dev *ae_dev, int num_vfs) static int hclge_get_dscp_prio(struct hnae3_handle *h, u8 dscp, u8 *tc_mode, u8 *priority) { + struct hclge_vport *vport = hclge_get_vport(h); + if (dscp >= HNAE3_MAX_DSCP) return -EINVAL; if (tc_mode) - *tc_mode = h->kinfo.tc_map_mode; + *tc_mode = vport->nic.kinfo.tc_map_mode; if (priority) - *priority = h->kinfo.dscp_prio[dscp] == HNAE3_PRIO_ID_INVALID ? 0 : - h->kinfo.dscp_prio[dscp]; + *priority = vport->nic.kinfo.dscp_prio[dscp] == HNAE3_PRIO_ID_INVALID ? 0 : + vport->nic.kinfo.dscp_prio[dscp]; return 0; } From cdb525ca92b196f8916102b62431aa0d9a644ff2 Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Tue, 1 Nov 2022 14:48:08 -0400 Subject: [PATCH 211/328] selftests/net: give more time to udpgro bg processes to complete startup In some conditions, background processes in udpgro don't have enough time to set up the sockets. When foreground processes start, this results in the test failing with "./udpgso_bench_tx: sendmsg: Connection refused". For instance, this happens from time to time on a Qualcomm SA8540P SoC running CentOS Stream 9. To fix this, increase the time given to background processes to complete the startup before foreground processes start. Signed-off-by: Adrien Thierry Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro.sh | 4 ++-- tools/testing/selftests/net/udpgro_bench.sh | 2 +- tools/testing/selftests/net/udpgro_frglist.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index ebbd0b282432..6a443ca3cd3a 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -50,7 +50,7 @@ run_one() { echo "failed" & # Hack: let bg programs complete the startup - sleep 0.1 + sleep 0.2 ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) @@ -117,7 +117,7 @@ run_one_2sock() { echo "failed" & # Hack: let bg programs complete the startup - sleep 0.1 + sleep 0.2 ./udpgso_bench_tx ${tx_args} -p 12345 sleep 0.1 # first UDP GSO socket should be closed at this point diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index fad2d1a71cac..8a1109a545db 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -39,7 +39,7 @@ run_one() { ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & # Hack: let bg programs complete the startup - sleep 0.1 + sleep 0.2 ./udpgso_bench_tx ${tx_args} } diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 832c738cc3c2..7fe85ba51075 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -44,7 +44,7 @@ run_one() { ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & # Hack: let bg programs complete the startup - sleep 0.1 + sleep 0.2 ./udpgso_bench_tx ${tx_args} } From 8bcd560ae8784da57c610d857118c5d6576b1a8f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:12 +0100 Subject: [PATCH 212/328] Revert "net: macsec: report real_dev features when HW offloading is enabled" This reverts commit c850240b6c4132574a00f2da439277ab94265b66. That commit tried to improve the performance of macsec offload by taking advantage of some of the NIC's features, but in doing so, broke macsec offload when the lower device supports both macsec and ipsec offload, as the ipsec offload feature flags (mainly NETIF_F_HW_ESP) were copied from the real device. Since the macsec device doesn't provide xdo_* ops, the XFRM core rejects the registration of the new macsec device in xfrm_api_check. Example perf trace when running ip link add link eni1np1 type macsec port 4 offload mac ip 737 [003] 795.477676: probe:xfrm_dev_event__REGISTER name="macsec0" features=0x1c000080014869 xfrm_dev_event+0x3a notifier_call_chain+0x47 register_netdevice+0x846 macsec_newlink+0x25a ip 737 [003] 795.477687: probe:xfrm_dev_event__return ret=0x8002 (NOTIFY_BAD) notifier_call_chain+0x47 register_netdevice+0x846 macsec_newlink+0x25a dev->features includes NETIF_F_HW_ESP (0x04000000000000), so xfrm_api_check returns NOTIFY_BAD because we don't have dev->xfrmdev_ops on the macsec device. We could probably propagate GSO and a few other features from the lower device, similar to macvlan. This will be done in a future patch. Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/macsec.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index c891b60937a7..b3f76e8071f2 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2654,11 +2654,6 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) if (ret) goto rollback; - /* Force features update, since they are different for SW MACSec and - * HW offloading cases. - */ - netdev_update_features(dev); - rtnl_unlock(); return 0; @@ -3432,16 +3427,9 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, return ret; } -#define SW_MACSEC_FEATURES \ +#define MACSEC_FEATURES \ (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) -/* If h/w offloading is enabled, use real device features save for - * VLAN_FEATURES - they require additional ops - * HW_MACSEC - no reason to report it - */ -#define REAL_DEV_FEATURES(dev) \ - ((dev)->features & ~(NETIF_F_VLAN_FEATURES | NETIF_F_HW_MACSEC)) - static int macsec_dev_init(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); @@ -3458,12 +3446,8 @@ static int macsec_dev_init(struct net_device *dev) return err; } - if (macsec_is_offloaded(macsec)) { - dev->features = REAL_DEV_FEATURES(real_dev); - } else { - dev->features = real_dev->features & SW_MACSEC_FEATURES; - dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE; - } + dev->features = real_dev->features & MACSEC_FEATURES; + dev->features |= NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE; dev->needed_headroom = real_dev->needed_headroom + MACSEC_NEEDED_HEADROOM; @@ -3495,10 +3479,7 @@ static netdev_features_t macsec_fix_features(struct net_device *dev, struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; - if (macsec_is_offloaded(macsec)) - return REAL_DEV_FEATURES(real_dev); - - features &= (real_dev->features & SW_MACSEC_FEATURES) | + features &= (real_dev->features & MACSEC_FEATURES) | NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES; features |= NETIF_F_LLTX; From 93a30947821c203d08865c4e17ea181c9668ce52 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:13 +0100 Subject: [PATCH 213/328] macsec: delete new rxsc when offload fails Currently we get an inconsistent state: - netlink returns the error to userspace - the RXSC is installed but not offloaded Then the device could get confused when we try to add an RXSA, because the RXSC isn't supposed to exist. Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/macsec.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index b3f76e8071f2..0d6fe34b91ae 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1876,7 +1876,6 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct macsec_secy *secy; - bool was_active; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) @@ -1904,7 +1903,6 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(rx_sc); } - was_active = rx_sc->active; if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) rx_sc->active = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); @@ -1931,7 +1929,8 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) return 0; cleanup: - rx_sc->active = was_active; + del_rx_sc(secy, sci); + free_rx_sc(rx_sc); rtnl_unlock(); return ret; } From 73a4b31c9d11f98ae3bc5286d5382930adb0e9c7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:14 +0100 Subject: [PATCH 214/328] macsec: fix secy->n_rx_sc accounting secy->n_rx_sc is supposed to be the number of _active_ rxsc's within a secy. This is then used by macsec_send_sci to help decide if we should add the SCI to the header or not. This logic is currently broken when we create a new RXSC and turn it off at creation, as create_rx_sc always sets ->active to true (and immediately uses that to increment n_rx_sc), and only later macsec_add_rxsc sets rx_sc->active. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/macsec.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 0d6fe34b91ae..1b4d856f4bd7 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1413,7 +1413,8 @@ static struct macsec_rx_sc *del_rx_sc(struct macsec_secy *secy, sci_t sci) return NULL; } -static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci) +static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci, + bool active) { struct macsec_rx_sc *rx_sc; struct macsec_dev *macsec; @@ -1437,7 +1438,7 @@ static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci) } rx_sc->sci = sci; - rx_sc->active = true; + rx_sc->active = active; refcount_set(&rx_sc->refcnt, 1); secy = &macsec_priv(dev)->secy; @@ -1876,6 +1877,7 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct macsec_secy *secy; + bool active = true; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) @@ -1897,15 +1899,15 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) secy = &macsec_priv(dev)->secy; sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); - rx_sc = create_rx_sc(dev, sci); + if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) + active = nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); + + rx_sc = create_rx_sc(dev, sci, active); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } - if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) - rx_sc->active = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); - if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; From 80df4706357a5a06bbbc70273bf2611df1ceee04 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:15 +0100 Subject: [PATCH 215/328] macsec: fix detection of RXSCs when toggling offloading macsec_is_configured incorrectly uses secy->n_rx_sc to check if some RXSCs exist. secy->n_rx_sc only counts the number of active RXSCs, but there can also be inactive SCs as well, which may be stored in the driver (in case we're disabling offloading), or would have to be pushed to the device (in case we're trying to enable offloading). As long as RXSCs active on creation and never turned off, the issue is not visible. Fixes: dcb780fb2795 ("net: macsec: add nla support for changing the offloading selection") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 1b4d856f4bd7..700a8f96c6c2 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2571,7 +2571,7 @@ static bool macsec_is_configured(struct macsec_dev *macsec) struct macsec_tx_sc *tx_sc = &secy->tx_sc; int i; - if (secy->n_rx_sc > 0) + if (secy->rx_sc) return true; for (i = 0; i < MACSEC_NUM_AN; i++) From aaab73f8fba4fd38f4d2617440d541a1c334e819 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:16 +0100 Subject: [PATCH 216/328] macsec: clear encryption keys from the stack after setting up offload macsec_add_rxsa and macsec_add_txsa copy the key to an on-stack offloading context to pass it to the drivers, but leaves it there when it's done. Clear it with memzero_explicit as soon as it's not needed anymore. Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/macsec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 700a8f96c6c2..85376d2f24ca 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1839,6 +1839,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) secy->key_len); err = macsec_offload(ops->mdo_add_rxsa, &ctx); + memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } @@ -2081,6 +2082,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) secy->key_len); err = macsec_offload(ops->mdo_add_txsa, &ctx); + memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } From 1a0c016a4831ea29be09bbc8162d4a2a0690b4b8 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 2 Nov 2022 12:31:44 +0200 Subject: [PATCH 217/328] net: ethernet: ti: am65-cpsw: Fix segmentation fault at module unload Move am65_cpsw_nuss_phylink_cleanup() call to after am65_cpsw_nuss_cleanup_ndev() so phylink is still valid to prevent the below Segmentation fault on module remove when first slave link is up. [ 31.652944] Unable to handle kernel paging request at virtual address 00040008000005f4 [ 31.684627] Mem abort info: [ 31.687446] ESR = 0x0000000096000004 [ 31.704614] EC = 0x25: DABT (current EL), IL = 32 bits [ 31.720663] SET = 0, FnV = 0 [ 31.723729] EA = 0, S1PTW = 0 [ 31.740617] FSC = 0x04: level 0 translation fault [ 31.756624] Data abort info: [ 31.759508] ISV = 0, ISS = 0x00000004 [ 31.776705] CM = 0, WnR = 0 [ 31.779695] [00040008000005f4] address between user and kernel address ranges [ 31.808644] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [ 31.814928] Modules linked in: wlcore_sdio wl18xx wlcore mac80211 libarc4 cfg80211 rfkill crct10dif_ce phy_gmii_sel ti_am65_cpsw_nuss(-) sch_fq_codel ipv6 [ 31.828776] CPU: 0 PID: 1026 Comm: modprobe Not tainted 6.1.0-rc2-00012-gfabfcf7dafdb-dirty #160 [ 31.837547] Hardware name: Texas Instruments AM625 (DT) [ 31.842760] pstate: 40000005 (nZcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 31.849709] pc : phy_stop+0x18/0xf8 [ 31.853202] lr : phylink_stop+0x38/0xf8 [ 31.857031] sp : ffff80000a0839f0 [ 31.860335] x29: ffff80000a0839f0 x28: ffff000000de1c80 x27: 0000000000000000 [ 31.867462] x26: 0000000000000000 x25: 0000000000000000 x24: ffff80000a083b98 [ 31.874589] x23: 0000000000000800 x22: 0000000000000001 x21: ffff000001bfba90 [ 31.881715] x20: ffff0000015ee000 x19: 0004000800000200 x18: 0000000000000000 [ 31.888842] x17: ffff800076c45000 x16: ffff800008004000 x15: 000058e39660b106 [ 31.895969] x14: 0000000000000144 x13: 0000000000000144 x12: 0000000000000000 [ 31.903095] x11: 000000000000275f x10: 00000000000009e0 x9 : ffff80000a0837d0 [ 31.910222] x8 : ffff000000de26c0 x7 : ffff00007fbd6540 x6 : ffff00007fbd64c0 [ 31.917349] x5 : ffff00007fbd0b10 x4 : ffff00007fbd0b10 x3 : ffff00007fbd3920 [ 31.924476] x2 : d0a07fcff8b8d500 x1 : 0000000000000000 x0 : 0004000800000200 [ 31.931603] Call trace: [ 31.934042] phy_stop+0x18/0xf8 [ 31.937177] phylink_stop+0x38/0xf8 [ 31.940657] am65_cpsw_nuss_ndo_slave_stop+0x28/0x1e0 [ti_am65_cpsw_nuss] [ 31.947452] __dev_close_many+0xa4/0x140 [ 31.951371] dev_close_many+0x84/0x128 [ 31.955115] unregister_netdevice_many+0x130/0x6d0 [ 31.959897] unregister_netdevice_queue+0x94/0xd8 [ 31.964591] unregister_netdev+0x24/0x38 [ 31.968504] am65_cpsw_nuss_cleanup_ndev.isra.0+0x48/0x70 [ti_am65_cpsw_nuss] [ 31.975637] am65_cpsw_nuss_remove+0x58/0xf8 [ti_am65_cpsw_nuss] Cc: # v5.18+ Fixes: e8609e69470f ("net: ethernet: ti: am65-cpsw: Convert to PHYLINK") Signed-off-by: Roger Quadros Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 7f86068f3ff6..c50b137f92d7 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2823,7 +2823,6 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev) if (ret < 0) return ret; - am65_cpsw_nuss_phylink_cleanup(common); am65_cpsw_unregister_devlink(common); am65_cpsw_unregister_notifiers(common); @@ -2831,6 +2830,7 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev) * dma_deconfigure(dev) before devres_release_all(dev) */ am65_cpsw_nuss_cleanup_ndev(common); + am65_cpsw_nuss_phylink_cleanup(common); of_platform_device_destroy(common->mdio_dev, NULL); From 51afe9026d0c63263abe9840e629f118d7405b36 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Wed, 2 Nov 2022 08:41:13 +0530 Subject: [PATCH 218/328] octeontx2-pf: NIX TX overwrites SQ_CTX_HW_S[SQ_INT] In scenarios where multiple errors have occurred for a SQ before SW starts handling error interrupt, SQ_CTX[OP_INT] may get overwritten leading to NIX_LF_SQ_OP_INT returning incorrect value. To workaround this read LMT, MNQ and SQ individual error status registers to determine the cause of error. Fixes: 4ff7d1488a84 ("octeontx2-pf: Error handling support") Signed-off-by: Ratheesh Kannoth Reviewed-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 135 ++++++++++++++---- .../marvell/octeontx2/nic/otx2_struct.h | 57 ++++++++ 2 files changed, 162 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 892ca88e0cf4..303930499a4c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "otx2_reg.h" #include "otx2_common.h" @@ -1171,6 +1172,59 @@ int otx2_set_real_num_queues(struct net_device *netdev, } EXPORT_SYMBOL(otx2_set_real_num_queues); +static char *nix_sqoperr_e_str[NIX_SQOPERR_MAX] = { + "NIX_SQOPERR_OOR", + "NIX_SQOPERR_CTX_FAULT", + "NIX_SQOPERR_CTX_POISON", + "NIX_SQOPERR_DISABLED", + "NIX_SQOPERR_SIZE_ERR", + "NIX_SQOPERR_OFLOW", + "NIX_SQOPERR_SQB_NULL", + "NIX_SQOPERR_SQB_FAULT", + "NIX_SQOPERR_SQE_SZ_ZERO", +}; + +static char *nix_mnqerr_e_str[NIX_MNQERR_MAX] = { + "NIX_MNQERR_SQ_CTX_FAULT", + "NIX_MNQERR_SQ_CTX_POISON", + "NIX_MNQERR_SQB_FAULT", + "NIX_MNQERR_SQB_POISON", + "NIX_MNQERR_TOTAL_ERR", + "NIX_MNQERR_LSO_ERR", + "NIX_MNQERR_CQ_QUERY_ERR", + "NIX_MNQERR_MAX_SQE_SIZE_ERR", + "NIX_MNQERR_MAXLEN_ERR", + "NIX_MNQERR_SQE_SIZEM1_ZERO", +}; + +static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] = { + "NIX_SND_STATUS_GOOD", + "NIX_SND_STATUS_SQ_CTX_FAULT", + "NIX_SND_STATUS_SQ_CTX_POISON", + "NIX_SND_STATUS_SQB_FAULT", + "NIX_SND_STATUS_SQB_POISON", + "NIX_SND_STATUS_HDR_ERR", + "NIX_SND_STATUS_EXT_ERR", + "NIX_SND_STATUS_JUMP_FAULT", + "NIX_SND_STATUS_JUMP_POISON", + "NIX_SND_STATUS_CRC_ERR", + "NIX_SND_STATUS_IMM_ERR", + "NIX_SND_STATUS_SG_ERR", + "NIX_SND_STATUS_MEM_ERR", + "NIX_SND_STATUS_INVALID_SUBDC", + "NIX_SND_STATUS_SUBDC_ORDER_ERR", + "NIX_SND_STATUS_DATA_FAULT", + "NIX_SND_STATUS_DATA_POISON", + "NIX_SND_STATUS_NPC_DROP_ACTION", + "NIX_SND_STATUS_LOCK_VIOL", + "NIX_SND_STATUS_NPC_UCAST_CHAN_ERR", + "NIX_SND_STATUS_NPC_MCAST_CHAN_ERR", + "NIX_SND_STATUS_NPC_MCAST_ABORT", + "NIX_SND_STATUS_NPC_VTAG_PTR_ERR", + "NIX_SND_STATUS_NPC_VTAG_SIZE_ERR", + "NIX_SND_STATUS_SEND_STATS_ERR", +}; + static irqreturn_t otx2_q_intr_handler(int irq, void *data) { struct otx2_nic *pf = data; @@ -1204,46 +1258,67 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data) /* SQ */ for (qidx = 0; qidx < pf->hw.tot_tx_queues; qidx++) { + u64 sq_op_err_dbg, mnq_err_dbg, snd_err_dbg; + u8 sq_op_err_code, mnq_err_code, snd_err_code; + + /* Below debug registers captures first errors corresponding to + * those registers. We don't have to check against SQ qid as + * these are fatal errors. + */ + ptr = otx2_get_regaddr(pf, NIX_LF_SQ_OP_INT); val = otx2_atomic64_add((qidx << 44), ptr); otx2_write64(pf, NIX_LF_SQ_OP_INT, (qidx << 44) | (val & NIX_SQINT_BITS)); - if (!(val & (NIX_SQINT_BITS | BIT_ULL(42)))) - continue; - if (val & BIT_ULL(42)) { netdev_err(pf->netdev, "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n", qidx, otx2_read64(pf, NIX_LF_ERR_INT)); - } else { - if (val & BIT_ULL(NIX_SQINT_LMT_ERR)) { - netdev_err(pf->netdev, "SQ%lld: LMT store error NIX_LF_SQ_OP_ERR_DBG:0x%llx", - qidx, - otx2_read64(pf, - NIX_LF_SQ_OP_ERR_DBG)); - otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG, - BIT_ULL(44)); - } - if (val & BIT_ULL(NIX_SQINT_MNQ_ERR)) { - netdev_err(pf->netdev, "SQ%lld: Meta-descriptor enqueue error NIX_LF_MNQ_ERR_DGB:0x%llx\n", - qidx, - otx2_read64(pf, NIX_LF_MNQ_ERR_DBG)); - otx2_write64(pf, NIX_LF_MNQ_ERR_DBG, - BIT_ULL(44)); - } - if (val & BIT_ULL(NIX_SQINT_SEND_ERR)) { - netdev_err(pf->netdev, "SQ%lld: Send error, NIX_LF_SEND_ERR_DBG 0x%llx", - qidx, - otx2_read64(pf, - NIX_LF_SEND_ERR_DBG)); - otx2_write64(pf, NIX_LF_SEND_ERR_DBG, - BIT_ULL(44)); - } - if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL)) - netdev_err(pf->netdev, "SQ%lld: SQB allocation failed", - qidx); + goto done; } + sq_op_err_dbg = otx2_read64(pf, NIX_LF_SQ_OP_ERR_DBG); + if (!(sq_op_err_dbg & BIT(44))) + goto chk_mnq_err_dbg; + + sq_op_err_code = FIELD_GET(GENMASK(7, 0), sq_op_err_dbg); + netdev_err(pf->netdev, "SQ%lld: NIX_LF_SQ_OP_ERR_DBG(%llx) err=%s\n", + qidx, sq_op_err_dbg, nix_sqoperr_e_str[sq_op_err_code]); + + otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG, BIT_ULL(44)); + + if (sq_op_err_code == NIX_SQOPERR_SQB_NULL) + goto chk_mnq_err_dbg; + + /* Err is not NIX_SQOPERR_SQB_NULL, call aq function to read SQ structure. + * TODO: But we are in irq context. How to call mbox functions which does sleep + */ + +chk_mnq_err_dbg: + mnq_err_dbg = otx2_read64(pf, NIX_LF_MNQ_ERR_DBG); + if (!(mnq_err_dbg & BIT(44))) + goto chk_snd_err_dbg; + + mnq_err_code = FIELD_GET(GENMASK(7, 0), mnq_err_dbg); + netdev_err(pf->netdev, "SQ%lld: NIX_LF_MNQ_ERR_DBG(%llx) err=%s\n", + qidx, mnq_err_dbg, nix_mnqerr_e_str[mnq_err_code]); + otx2_write64(pf, NIX_LF_MNQ_ERR_DBG, BIT_ULL(44)); + +chk_snd_err_dbg: + snd_err_dbg = otx2_read64(pf, NIX_LF_SEND_ERR_DBG); + if (snd_err_dbg & BIT(44)) { + snd_err_code = FIELD_GET(GENMASK(7, 0), snd_err_dbg); + netdev_err(pf->netdev, "SQ%lld: NIX_LF_SND_ERR_DBG:0x%llx err=%s\n", + qidx, snd_err_dbg, nix_snd_status_e_str[snd_err_code]); + otx2_write64(pf, NIX_LF_SEND_ERR_DBG, BIT_ULL(44)); + } + +done: + /* Print values and reset */ + if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL)) + netdev_err(pf->netdev, "SQ%lld: SQB allocation failed", + qidx); + schedule_work(&pf->reset_task); } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h index aa205a0d158f..fa37b9f312ca 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h @@ -281,4 +281,61 @@ enum nix_sqint_e { BIT_ULL(NIX_SQINT_SEND_ERR) | \ BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL)) +enum nix_sqoperr_e { + NIX_SQOPERR_OOR = 0, + NIX_SQOPERR_CTX_FAULT = 1, + NIX_SQOPERR_CTX_POISON = 2, + NIX_SQOPERR_DISABLED = 3, + NIX_SQOPERR_SIZE_ERR = 4, + NIX_SQOPERR_OFLOW = 5, + NIX_SQOPERR_SQB_NULL = 6, + NIX_SQOPERR_SQB_FAULT = 7, + NIX_SQOPERR_SQE_SZ_ZERO = 8, + NIX_SQOPERR_MAX, +}; + +enum nix_mnqerr_e { + NIX_MNQERR_SQ_CTX_FAULT = 0, + NIX_MNQERR_SQ_CTX_POISON = 1, + NIX_MNQERR_SQB_FAULT = 2, + NIX_MNQERR_SQB_POISON = 3, + NIX_MNQERR_TOTAL_ERR = 4, + NIX_MNQERR_LSO_ERR = 5, + NIX_MNQERR_CQ_QUERY_ERR = 6, + NIX_MNQERR_MAX_SQE_SIZE_ERR = 7, + NIX_MNQERR_MAXLEN_ERR = 8, + NIX_MNQERR_SQE_SIZEM1_ZERO = 9, + NIX_MNQERR_MAX, +}; + +enum nix_snd_status_e { + NIX_SND_STATUS_GOOD = 0x0, + NIX_SND_STATUS_SQ_CTX_FAULT = 0x1, + NIX_SND_STATUS_SQ_CTX_POISON = 0x2, + NIX_SND_STATUS_SQB_FAULT = 0x3, + NIX_SND_STATUS_SQB_POISON = 0x4, + NIX_SND_STATUS_HDR_ERR = 0x5, + NIX_SND_STATUS_EXT_ERR = 0x6, + NIX_SND_STATUS_JUMP_FAULT = 0x7, + NIX_SND_STATUS_JUMP_POISON = 0x8, + NIX_SND_STATUS_CRC_ERR = 0x9, + NIX_SND_STATUS_IMM_ERR = 0x10, + NIX_SND_STATUS_SG_ERR = 0x11, + NIX_SND_STATUS_MEM_ERR = 0x12, + NIX_SND_STATUS_INVALID_SUBDC = 0x13, + NIX_SND_STATUS_SUBDC_ORDER_ERR = 0x14, + NIX_SND_STATUS_DATA_FAULT = 0x15, + NIX_SND_STATUS_DATA_POISON = 0x16, + NIX_SND_STATUS_NPC_DROP_ACTION = 0x17, + NIX_SND_STATUS_LOCK_VIOL = 0x18, + NIX_SND_STATUS_NPC_UCAST_CHAN_ERR = 0x19, + NIX_SND_STATUS_NPC_MCAST_CHAN_ERR = 0x20, + NIX_SND_STATUS_NPC_MCAST_ABORT = 0x21, + NIX_SND_STATUS_NPC_VTAG_PTR_ERR = 0x22, + NIX_SND_STATUS_NPC_VTAG_SIZE_ERR = 0x23, + NIX_SND_STATUS_SEND_MEM_FAULT = 0x24, + NIX_SND_STATUS_SEND_STATS_ERR = 0x25, + NIX_SND_STATUS_MAX, +}; + #endif /* OTX2_STRUCT_H */ From 1118b2049d77ca0b505775fc1a8d1909cf19a7ec Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Wed, 2 Nov 2022 17:41:19 +0800 Subject: [PATCH 219/328] net: tun: Fix memory leaks of napi_get_frags kmemleak reports after running test_progs: unreferenced object 0xffff8881b1672dc0 (size 232): comm "test_progs", pid 394388, jiffies 4354712116 (age 841.975s) hex dump (first 32 bytes): e0 84 d7 a8 81 88 ff ff 80 2c 67 b1 81 88 ff ff .........,g..... 00 40 c5 9b 81 88 ff ff 00 00 00 00 00 00 00 00 .@.............. backtrace: [<00000000c8f01748>] napi_skb_cache_get+0xd4/0x150 [<0000000041c7fc09>] __napi_build_skb+0x15/0x50 [<00000000431c7079>] __napi_alloc_skb+0x26e/0x540 [<000000003ecfa30e>] napi_get_frags+0x59/0x140 [<0000000099b2199e>] tun_get_user+0x183d/0x3bb0 [tun] [<000000008a5adef0>] tun_chr_write_iter+0xc0/0x1b1 [tun] [<0000000049993ff4>] do_iter_readv_writev+0x19f/0x320 [<000000008f338ea2>] do_iter_write+0x135/0x630 [<000000008a3377a4>] vfs_writev+0x12e/0x440 [<00000000a6b5639a>] do_writev+0x104/0x280 [<00000000ccf065d8>] do_syscall_64+0x3b/0x90 [<00000000d776e329>] entry_SYSCALL_64_after_hwframe+0x63/0xcd The issue occurs in the following scenarios: tun_get_user() napi_gro_frags() napi_frags_finish() case GRO_NORMAL: gro_normal_one() list_add_tail(&skb->list, &napi->rx_list); <-- While napi->rx_count < READ_ONCE(gro_normal_batch), <-- gro_normal_list() is not called, napi->rx_list is not empty <-- not ask to complete the gro work, will cause memory leaks in <-- following tun_napi_del() ... tun_napi_del() netif_napi_del() __netif_napi_del() <-- &napi->rx_list is not empty, which caused memory leaks To fix, add napi_complete() after napi_gro_frags(). Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Wang Yufen Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 946628050f28..eb12f3136a54 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1977,6 +1977,7 @@ drop: local_bh_disable(); napi_gro_frags(&tfile->napi); + napi_complete(&tfile->napi); local_bh_enable(); mutex_unlock(&tfile->napi_mutex); } else if (tfile->napi_enabled) { From eb4940d4adf590590a9d0c47e38d2799c2ff9670 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 4 Nov 2022 13:57:11 +0100 Subject: [PATCH 220/328] mm/slab: remove !CONFIG_TRACING variants of kmalloc_[node_]trace() For !CONFIG_TRACING kernels, the kmalloc() implementation tries (in cases where the allocation size is build-time constant) to save a function call, by inlining kmalloc_trace() to a kmem_cache_alloc() call. However since commit 6edf2576a6cc ("mm/slub: enable debugging memory wasting of kmalloc") this path now fails to pass the original request size to be eventually recorded (for kmalloc caches with debugging enabled). We could adjust the code to call __kmem_cache_alloc_node() as the CONFIG_TRACING variant, but that would as a result inline a call with 5 parameters, bloating the kmalloc() call sites. The cost of extra function call (to kmalloc_trace()) seems like a lesser evil. It also appears that the !CONFIG_TRACING variant is incompatible with upcoming hardening efforts [1] so it's easier if we just remove it now. Kernels with no tracing are rare these days and the benefit is dubious anyway. [1] https://lore.kernel.org/linux-mm/20221101222520.never.109-kees@kernel.org/T/#m20ecf14390e406247bde0ea9cce368f469c539ed Link: https://lore.kernel.org/all/097d8fba-bd10-a312-24a3-a4068c4f424c@suse.cz/ Suggested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 23 ----------------------- mm/slab_common.c | 2 -- 2 files changed, 25 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 90877fcde70b..45efc6c553b8 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -470,35 +470,12 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignm void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; -#ifdef CONFIG_TRACING void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) __assume_kmalloc_alignment __alloc_size(3); void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) __assume_kmalloc_alignment __alloc_size(4); -#else /* CONFIG_TRACING */ -/* Save a function call when CONFIG_TRACING=n */ -static __always_inline __alloc_size(3) -void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) -{ - void *ret = kmem_cache_alloc(s, flags); - - ret = kasan_kmalloc(s, ret, size, flags); - return ret; -} - -static __always_inline __alloc_size(4) -void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) -{ - void *ret = kmem_cache_alloc_node(s, gfpflags, node); - - ret = kasan_kmalloc(s, ret, size, gfpflags); - return ret; -} -#endif /* CONFIG_TRACING */ - void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment __alloc_size(1); diff --git a/mm/slab_common.c b/mm/slab_common.c index 74a991fd9d31..206e59051c1d 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1040,7 +1040,6 @@ size_t __ksize(const void *object) return slab_ksize(folio_slab(folio)->slab_cache); } -#ifdef CONFIG_TRACING void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) { void *ret = __kmem_cache_alloc_node(s, gfpflags, NUMA_NO_NODE, @@ -1064,7 +1063,6 @@ void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, return ret; } EXPORT_SYMBOL(kmalloc_node_trace); -#endif /* !CONFIG_TRACING */ #endif /* !CONFIG_SLOB */ gfp_t kmalloc_fix_flags(gfp_t flags) From 6e59419fd0a244dd55e53e798797f0697dec8b1c Mon Sep 17 00:00:00 2001 From: Peng Wu Date: Sun, 11 Sep 2022 06:00:53 +0000 Subject: [PATCH 221/328] phy: sunplus: Fix an IS_ERR() vs NULL bug in sp_usb_phy_probe The devm_ioremap() function returns NULL on error, it doesn't return error pointers. Fixes: 99d9ccd973852 ("phy: usb: Add USB2.0 phy driver for Sunplus SP7021") Signed-off-by: Peng Wu Link: https://lore.kernel.org/r/20220911060053.123594-1-wupeng58@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/sunplus/phy-sunplus-usb2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/sunplus/phy-sunplus-usb2.c b/drivers/phy/sunplus/phy-sunplus-usb2.c index b932087c55b2..e827b79f6d49 100644 --- a/drivers/phy/sunplus/phy-sunplus-usb2.c +++ b/drivers/phy/sunplus/phy-sunplus-usb2.c @@ -256,8 +256,8 @@ static int sp_usb_phy_probe(struct platform_device *pdev) usbphy->moon4_res_mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "moon4"); usbphy->moon4_regs = devm_ioremap(&pdev->dev, usbphy->moon4_res_mem->start, resource_size(usbphy->moon4_res_mem)); - if (IS_ERR(usbphy->moon4_regs)) - return PTR_ERR(usbphy->moon4_regs); + if (!usbphy->moon4_regs) + return -ENOMEM; usbphy->phy_clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(usbphy->phy_clk)) From 7beade0dd41d42d797ccb7791b134a77fcebf35b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 3 Nov 2022 13:33:10 -0700 Subject: [PATCH 222/328] x86/cpu: Add several Intel server CPU model numbers These servers are all on the public versions of the roadmap. The model numbers for Grand Ridge, Granite Rapids, and Sierra Forest were included in the September 2022 edition of the Instruction Set Extensions document. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20221103203310.5058-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 5d75fe229342..347707d459c6 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -107,6 +107,11 @@ #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ +#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF + +#define INTEL_FAM6_GRANITERAPIDS_X 0xAD +#define INTEL_FAM6_GRANITERAPIDS_D 0xAE + #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_N 0xBE @@ -118,7 +123,7 @@ #define INTEL_FAM6_METEORLAKE 0xAC #define INTEL_FAM6_METEORLAKE_L 0xAA -/* "Small Core" Processors (Atom) */ +/* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ @@ -145,6 +150,10 @@ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ +#define INTEL_FAM6_SIERRAFOREST_X 0xAF + +#define INTEL_FAM6_GRANDRIDGE 0xB6 + /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ From a90accb358ae33ea982a35595573f7a045993f8b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:24 -0700 Subject: [PATCH 223/328] cxl/region: Fix region HPA ordering validation Some regions may not have any address space allocated. Skip them when validating HPA order otherwise a crash like the following may result: devm_cxl_add_region: cxl_acpi cxl_acpi.0: decoder3.4: created region9 BUG: kernel NULL pointer dereference, address: 0000000000000000 [..] RIP: 0010:store_targetN+0x655/0x1740 [cxl_core] [..] Call Trace: kernfs_fop_write_iter+0x144/0x200 vfs_write+0x24a/0x4d0 ksys_write+0x69/0xf0 do_syscall_64+0x3a/0x90 store_targetN+0x655/0x1740: alloc_region_ref at drivers/cxl/core/region.c:676 (inlined by) cxl_port_attach_region at drivers/cxl/core/region.c:850 (inlined by) cxl_region_attach at drivers/cxl/core/region.c:1290 (inlined by) attach_target at drivers/cxl/core/region.c:1410 (inlined by) store_targetN at drivers/cxl/core/region.c:1453 Cc: Fixes: 384e624bb211 ("cxl/region: Attach endpoint decoders") Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/166752182461.947915.497032805239915067.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index bb6f4fc84a3f..d26ca7a6beae 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -658,6 +658,9 @@ static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port, xa_for_each(&port->regions, index, iter) { struct cxl_region_params *ip = &iter->region->params; + if (!ip->res) + continue; + if (ip->res->start > p->res->start) { dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n", From 0d9e734018d70cecf79e2e4c6082167160a0f13f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:30 -0700 Subject: [PATCH 224/328] cxl/region: Fix cxl_region leak, cleanup targets at region delete When a region is deleted any targets that have been previously assigned to that region hold references to it. Trigger those references to drop by detaching all targets at unregister_region() time. Otherwise that region object will leak as userspace has lost the ability to detach targets once region sysfs is torn down. Cc: Fixes: b9686e8c8e39 ("cxl/region: Enable the assignment of endpoint decoders to regions") Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752183055.947915.17681995648556534844.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index d26ca7a6beae..c52465e09f26 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1557,8 +1557,19 @@ static struct cxl_region *to_cxl_region(struct device *dev) static void unregister_region(void *dev) { struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + int i; device_del(dev); + + /* + * Now that region sysfs is shutdown, the parameter block is now + * read-only, so no need to hold the region rwsem to access the + * region parameters. + */ + for (i = 0; i < p->interleave_ways; i++) + detach_target(cxlr, i); + cxl_region_iomem_release(cxlr); put_device(dev); } From 4d07ae22e79ebc2d7528bbc69daa53b86981cb3a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:36 -0700 Subject: [PATCH 225/328] cxl/pmem: Fix cxl_pmem_region and cxl_memdev leak When a cxl_nvdimm object goes through a ->remove() event (device physically removed, nvdimm-bridge disabled, or nvdimm device disabled), then any associated regions must also be disabled. As highlighted by the cxl-create-region.sh test [1], a single device may host multiple regions, but the driver was only tracking one region at a time. This leads to a situation where only the last enabled region per nvdimm device is cleaned up properly. Other regions are leaked, and this also causes cxl_memdev reference leaks. Fix the tracking by allowing cxl_nvdimm objects to track multiple region associations. Cc: Link: https://github.com/pmem/ndctl/blob/main/test/cxl-create-region.sh [1] Reported-by: Vishal Verma Fixes: 04ad63f086d1 ("cxl/region: Introduce cxl_pmem_region objects") Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752183647.947915.2045230911503793901.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pmem.c | 2 + drivers/cxl/cxl.h | 2 +- drivers/cxl/pmem.c | 103 +++++++++++++++++++++++++--------------- 3 files changed, 69 insertions(+), 38 deletions(-) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 1d12a8206444..36aa5070d902 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -188,6 +188,7 @@ static void cxl_nvdimm_release(struct device *dev) { struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); + xa_destroy(&cxl_nvd->pmem_regions); kfree(cxl_nvd); } @@ -230,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) dev = &cxl_nvd->dev; cxl_nvd->cxlmd = cxlmd; + xa_init(&cxl_nvd->pmem_regions); device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); device_set_pm_not_required(dev); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index f680450f0b16..1164ad49f3d3 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -423,7 +423,7 @@ struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; struct cxl_nvdimm_bridge *bridge; - struct cxl_pmem_region *region; + struct xarray pmem_regions; }; struct cxl_pmem_region_mapping { diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 0bac05d804bc..4c627d67281a 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -30,17 +30,20 @@ static void unregister_nvdimm(void *nvdimm) struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge; struct cxl_pmem_region *cxlr_pmem; + unsigned long index; device_lock(&cxl_nvb->dev); - cxlr_pmem = cxl_nvd->region; dev_set_drvdata(&cxl_nvd->dev, NULL); - cxl_nvd->region = NULL; - device_unlock(&cxl_nvb->dev); + xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) { + get_device(&cxlr_pmem->dev); + device_unlock(&cxl_nvb->dev); - if (cxlr_pmem) { device_release_driver(&cxlr_pmem->dev); put_device(&cxlr_pmem->dev); + + device_lock(&cxl_nvb->dev); } + device_unlock(&cxl_nvb->dev); nvdimm_delete(nvdimm); cxl_nvd->bridge = NULL; @@ -366,25 +369,49 @@ static int match_cxl_nvdimm(struct device *dev, void *data) static void unregister_nvdimm_region(void *nd_region) { - struct cxl_nvdimm_bridge *cxl_nvb; - struct cxl_pmem_region *cxlr_pmem; - int i; + nvdimm_region_delete(nd_region); +} + +static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd, + struct cxl_pmem_region *cxlr_pmem) +{ + int rc; + + rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem, + cxlr_pmem, GFP_KERNEL); + if (rc) + return rc; + + get_device(&cxlr_pmem->dev); + return 0; +} + +static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, + struct cxl_pmem_region *cxlr_pmem) +{ + /* + * It is possible this is called without a corresponding + * cxl_nvdimm_add_region for @cxlr_pmem + */ + cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem); + if (cxlr_pmem) + put_device(&cxlr_pmem->dev); +} + +static void release_mappings(void *data) +{ + int i; + struct cxl_pmem_region *cxlr_pmem = data; + struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge; - cxlr_pmem = nd_region_provider_data(nd_region); - cxl_nvb = cxlr_pmem->bridge; device_lock(&cxl_nvb->dev); for (i = 0; i < cxlr_pmem->nr_mappings; i++) { struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; - if (cxl_nvd->region) { - put_device(&cxlr_pmem->dev); - cxl_nvd->region = NULL; - } + cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem); } device_unlock(&cxl_nvb->dev); - - nvdimm_region_delete(nd_region); } static void cxlr_pmem_remove_resource(void *res) @@ -422,7 +449,7 @@ static int cxl_pmem_region_probe(struct device *dev) if (!cxl_nvb->nvdimm_bus) { dev_dbg(dev, "nvdimm bus not found\n"); rc = -ENXIO; - goto err; + goto out_nvb; } memset(&mappings, 0, sizeof(mappings)); @@ -431,7 +458,7 @@ static int cxl_pmem_region_probe(struct device *dev) res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); if (!res) { rc = -ENOMEM; - goto err; + goto out_nvb; } res->name = "Persistent Memory"; @@ -442,11 +469,11 @@ static int cxl_pmem_region_probe(struct device *dev) rc = insert_resource(&iomem_resource, res); if (rc) - goto err; + goto out_nvb; rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); if (rc) - goto err; + goto out_nvb; ndr_desc.res = res; ndr_desc.provider_data = cxlr_pmem; @@ -462,7 +489,7 @@ static int cxl_pmem_region_probe(struct device *dev) nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); if (!nd_set) { rc = -ENOMEM; - goto err; + goto out_nvb; } ndr_desc.memregion = cxlr->id; @@ -472,9 +499,13 @@ static int cxl_pmem_region_probe(struct device *dev) info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); if (!info) { rc = -ENOMEM; - goto err; + goto out_nvb; } + rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem); + if (rc) + goto out_nvd; + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; struct cxl_memdev *cxlmd = m->cxlmd; @@ -486,7 +517,7 @@ static int cxl_pmem_region_probe(struct device *dev) dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, dev_name(&cxlmd->dev)); rc = -ENODEV; - goto err; + goto out_nvd; } /* safe to drop ref now with bridge lock held */ @@ -498,10 +529,17 @@ static int cxl_pmem_region_probe(struct device *dev) dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, dev_name(&cxlmd->dev)); rc = -ENODEV; - goto err; + goto out_nvd; } - cxl_nvd->region = cxlr_pmem; - get_device(&cxlr_pmem->dev); + + /* + * Pin the region per nvdimm device as those may be released + * out-of-order with respect to the region, and a single nvdimm + * maybe associated with multiple regions + */ + rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem); + if (rc) + goto out_nvd; m->cxl_nvd = cxl_nvd; mappings[i] = (struct nd_mapping_desc) { .nvdimm = nvdimm, @@ -527,27 +565,18 @@ static int cxl_pmem_region_probe(struct device *dev) nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc); if (!cxlr_pmem->nd_region) { rc = -ENOMEM; - goto err; + goto out_nvd; } rc = devm_add_action_or_reset(dev, unregister_nvdimm_region, cxlr_pmem->nd_region); -out: +out_nvd: kfree(info); +out_nvb: device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; - -err: - dev_dbg(dev, "failed to create nvdimm region\n"); - for (i--; i >= 0; i--) { - nvdimm = mappings[i].nvdimm; - cxl_nvd = nvdimm_provider_data(nvdimm); - put_device(&cxl_nvd->region->dev); - cxl_nvd->region = NULL; - } - goto out; } static struct cxl_driver cxl_pmem_region_driver = { From 86e86c3cb63325c12ea99fbce2cc5bafba86bb40 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:42 -0700 Subject: [PATCH 226/328] tools/testing/cxl: Fix some error exits Fix a few typos where 'goto err_port' was used rather than the object specific cleanup. Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752184255.947915.16163477849330181425.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index a072b2d3e726..133e4c73d370 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -695,7 +695,7 @@ static __init int cxl_test_init(void) pdev = platform_device_alloc("cxl_switch_uport", i); if (!pdev) - goto err_port; + goto err_uport; pdev->dev.parent = &root_port->dev; rc = platform_device_add(pdev); @@ -713,7 +713,7 @@ static __init int cxl_test_init(void) pdev = platform_device_alloc("cxl_switch_dport", i); if (!pdev) - goto err_port; + goto err_dport; pdev->dev.parent = &uport->dev; rc = platform_device_add(pdev); From e41c8452b9b204689e68756a3836d1d37b617ad5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:48 -0700 Subject: [PATCH 227/328] tools/testing/cxl: Add a single-port host-bridge regression config Jonathan reports that region creation fails when a single-port host-bridge connects to a multi-port switch. Mock up that configuration so a fix can be tested and regression tested going forward. Reported-by: Bobo WL Reported-by: Jonathan Cameron Link: http://lore.kernel.org/r/20221010172057.00001559@huawei.com Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752184838.947915.2167957540894293891.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- tools/testing/cxl/test/cxl.c | 297 ++++++++++++++++++++++++++++++++--- 1 file changed, 278 insertions(+), 19 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 133e4c73d370..7edce12fd2ce 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -12,30 +12,62 @@ #include "mock.h" #define NR_CXL_HOST_BRIDGES 2 +#define NR_CXL_SINGLE_HOST 1 #define NR_CXL_ROOT_PORTS 2 #define NR_CXL_SWITCH_PORTS 2 #define NR_CXL_PORT_DECODERS 8 static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; -static struct platform_device - *cxl_root_port[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; -static struct platform_device - *cxl_switch_uport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; -static struct platform_device - *cxl_switch_dport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * - NR_CXL_SWITCH_PORTS]; -struct platform_device - *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS]; +#define NR_MULTI_ROOT (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS) +static struct platform_device *cxl_root_port[NR_MULTI_ROOT]; +static struct platform_device *cxl_switch_uport[NR_MULTI_ROOT]; +#define NR_MEM_MULTI \ + (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS) +static struct platform_device *cxl_switch_dport[NR_MEM_MULTI]; + +static struct platform_device *cxl_hb_single[NR_CXL_SINGLE_HOST]; +static struct platform_device *cxl_root_single[NR_CXL_SINGLE_HOST]; +static struct platform_device *cxl_swu_single[NR_CXL_SINGLE_HOST]; +#define NR_MEM_SINGLE (NR_CXL_SINGLE_HOST * NR_CXL_SWITCH_PORTS) +static struct platform_device *cxl_swd_single[NR_MEM_SINGLE]; + +struct platform_device *cxl_mem[NR_MEM_MULTI]; +struct platform_device *cxl_mem_single[NR_MEM_SINGLE]; + + +static inline bool is_multi_bridge(struct device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) + if (&cxl_host_bridge[i]->dev == dev) + return true; + return false; +} + +static inline bool is_single_bridge(struct device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) + if (&cxl_hb_single[i]->dev == dev) + return true; + return false; +} static struct acpi_device acpi0017_mock; -static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = { +static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST] = { [0] = { .handle = &host_bridge[0], }, [1] = { .handle = &host_bridge[1], }, + [2] = { + .handle = &host_bridge[2], + }, + }; static bool is_mock_dev(struct device *dev) @@ -45,6 +77,9 @@ static bool is_mock_dev(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) if (dev == &cxl_mem[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) + if (dev == &cxl_mem_single[i]->dev) + return true; if (dev == &cxl_acpi->dev) return true; return false; @@ -66,7 +101,7 @@ static bool is_mock_adev(struct acpi_device *adev) static struct { struct acpi_table_cedt cedt; - struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES]; + struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST]; struct { struct acpi_cedt_cfmws cfmws; u32 target[1]; @@ -83,6 +118,10 @@ static struct { struct acpi_cedt_cfmws cfmws; u32 target[2]; } cfmws3; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws4; } __packed mock_cedt = { .cedt = { .header = { @@ -107,6 +146,14 @@ static struct { .uid = 1, .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, }, + .chbs[2] = { + .header = { + .type = ACPI_CEDT_TYPE_CHBS, + .length = sizeof(mock_cedt.chbs[0]), + }, + .uid = 2, + .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, + }, .cfmws0 = { .cfmws = { .header = { @@ -167,13 +214,29 @@ static struct { }, .target = { 0, 1, }, }, + .cfmws4 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws4), + }, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 4, + .window_size = SZ_256M * 4UL, + }, + .target = { 2 }, + }, }; -struct acpi_cedt_cfmws *mock_cfmws[4] = { +struct acpi_cedt_cfmws *mock_cfmws[] = { [0] = &mock_cedt.cfmws0.cfmws, [1] = &mock_cedt.cfmws1.cfmws, [2] = &mock_cedt.cfmws2.cfmws, [3] = &mock_cedt.cfmws3.cfmws, + [4] = &mock_cedt.cfmws4.cfmws, }; struct cxl_mock_res { @@ -304,6 +367,9 @@ static bool is_mock_bridge(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) if (dev == &cxl_host_bridge[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) + if (dev == &cxl_hb_single[i]->dev) + return true; return false; } @@ -326,6 +392,18 @@ static bool is_mock_port(struct device *dev) if (dev == &cxl_switch_dport[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_root_single); i++) + if (dev == &cxl_root_single[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_swu_single); i++) + if (dev == &cxl_swu_single[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_swd_single); i++) + if (dev == &cxl_swd_single[i]->dev) + return true; + if (is_cxl_memdev(dev)) return is_mock_dev(dev->parent); @@ -561,11 +639,31 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) int i, array_size; if (port->depth == 1) { - array_size = ARRAY_SIZE(cxl_root_port); - array = cxl_root_port; + if (is_multi_bridge(port->uport)) { + array_size = ARRAY_SIZE(cxl_root_port); + array = cxl_root_port; + } else if (is_single_bridge(port->uport)) { + array_size = ARRAY_SIZE(cxl_root_single); + array = cxl_root_single; + } else { + dev_dbg(&port->dev, "%s: unknown bridge type\n", + dev_name(port->uport)); + return -ENXIO; + } } else if (port->depth == 2) { - array_size = ARRAY_SIZE(cxl_switch_dport); - array = cxl_switch_dport; + struct cxl_port *parent = to_cxl_port(port->dev.parent); + + if (is_multi_bridge(parent->uport)) { + array_size = ARRAY_SIZE(cxl_switch_dport); + array = cxl_switch_dport; + } else if (is_single_bridge(parent->uport)) { + array_size = ARRAY_SIZE(cxl_swd_single); + array = cxl_swd_single; + } else { + dev_dbg(&port->dev, "%s: unknown bridge type\n", + dev_name(port->uport)); + return -ENXIO; + } } else { dev_WARN_ONCE(&port->dev, 1, "unexpected depth %d\n", port->depth); @@ -576,8 +674,12 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) struct platform_device *pdev = array[i]; struct cxl_dport *dport; - if (pdev->dev.parent != port->uport) + if (pdev->dev.parent != port->uport) { + dev_dbg(&port->dev, "%s: mismatch parent %s\n", + dev_name(port->uport), + dev_name(pdev->dev.parent)); continue; + } dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); @@ -627,6 +729,157 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_512G (SZ_64G * 8) #endif +static __init int cxl_single_init(void) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) { + struct acpi_device *adev = + &host_bridge[NR_CXL_HOST_BRIDGES + i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_host_bridge", + NR_CXL_HOST_BRIDGES + i); + if (!pdev) + goto err_bridge; + + mock_companion(adev, &pdev->dev); + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_bridge; + } + + cxl_hb_single[i] = pdev; + rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, + "physical_node"); + if (rc) + goto err_bridge; + } + + for (i = 0; i < ARRAY_SIZE(cxl_root_single); i++) { + struct platform_device *bridge = + cxl_hb_single[i % ARRAY_SIZE(cxl_hb_single)]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_root_port", + NR_MULTI_ROOT + i); + if (!pdev) + goto err_port; + pdev->dev.parent = &bridge->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_port; + } + cxl_root_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_swu_single); i++) { + struct platform_device *root_port = cxl_root_single[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_uport", + NR_MULTI_ROOT + i); + if (!pdev) + goto err_uport; + pdev->dev.parent = &root_port->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_uport; + } + cxl_swu_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_swd_single); i++) { + struct platform_device *uport = + cxl_swu_single[i % ARRAY_SIZE(cxl_swu_single)]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_dport", + i + NR_MEM_MULTI); + if (!pdev) + goto err_dport; + pdev->dev.parent = &uport->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_dport; + } + cxl_swd_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) { + struct platform_device *dport = cxl_swd_single[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_mem", NR_MEM_MULTI + i); + if (!pdev) + goto err_mem; + pdev->dev.parent = &dport->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_mem; + } + cxl_mem_single[i] = pdev; + } + + return 0; + +err_mem: + for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem_single[i]); +err_dport: + for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swd_single[i]); +err_uport: + for (i = ARRAY_SIZE(cxl_swu_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swu_single[i]); +err_port: + for (i = ARRAY_SIZE(cxl_root_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_root_single[i]); +err_bridge: + for (i = ARRAY_SIZE(cxl_hb_single) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_hb_single[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); + platform_device_unregister(cxl_hb_single[i]); + } + + return rc; +} + +static void cxl_single_exit(void) +{ + int i; + + for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem_single[i]); + for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swd_single[i]); + for (i = ARRAY_SIZE(cxl_swu_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_swu_single[i]); + for (i = ARRAY_SIZE(cxl_root_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_root_single[i]); + for (i = ARRAY_SIZE(cxl_hb_single) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_hb_single[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); + platform_device_unregister(cxl_hb_single[i]); + } +} + static __init int cxl_test_init(void) { int rc, i; @@ -724,7 +977,6 @@ static __init int cxl_test_init(void) cxl_switch_dport[i] = pdev; } - BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_switch_dport)); for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { struct platform_device *dport = cxl_switch_dport[i]; struct platform_device *pdev; @@ -743,9 +995,13 @@ static __init int cxl_test_init(void) cxl_mem[i] = pdev; } + rc = cxl_single_init(); + if (rc) + goto err_mem; + cxl_acpi = platform_device_alloc("cxl_acpi", 0); if (!cxl_acpi) - goto err_mem; + goto err_single; mock_companion(&acpi0017_mock, &cxl_acpi->dev); acpi0017_mock.dev.bus = &platform_bus_type; @@ -758,6 +1014,8 @@ static __init int cxl_test_init(void) err_add: platform_device_put(cxl_acpi); +err_single: + cxl_single_exit(); err_mem: for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); @@ -793,6 +1051,7 @@ static __exit void cxl_test_exit(void) int i; platform_device_unregister(cxl_acpi); + cxl_single_exit(); for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) From e4f6dfa9ef756a3934a4caf618b1e86e9e8e21d0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:30:54 -0700 Subject: [PATCH 228/328] cxl/region: Fix 'distance' calculation with passthrough ports When programming port decode targets, the algorithm wants to ensure that two devices are compatible to be programmed as peers beneath a given port. A compatible peer is a target that shares the same dport, and where that target's interleave position also routes it to the same dport. Compatibility is determined by the device's interleave position being >= to distance. For example, if a given dport can only map every Nth position then positions less than N away from the last target programmed are incompatible. The @distance for the host-bridge's cxl_port in a simple dual-ported host-bridge configuration with 2 direct-attached devices is 1, i.e. An x2 region divided by 2 dports to reach 2 region targets. An x4 region under an x2 host-bridge would need 2 intervening switches where the @distance at the host bridge level is 2 (x4 region divided by 2 switches to reach 4 devices). However, the distance between peers underneath a single ported host-bridge is always zero because there is no limit to the number of devices that can be mapped. In other words, there are no decoders to program in a passthrough, all descendants are mapped and distance only starts matters for the intervening descendant ports of the passthrough port. Add tracking for the number of dports mapped to a port, and use that to detect the passthrough case for calculating @distance. Cc: Reported-by: Bobo WL Reported-by: Jonathan Cameron Link: http://lore.kernel.org/r/20221010172057.00001559@huawei.com Fixes: 27b3f8d13830 ("cxl/region: Program target lists") Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752185440.947915.6617495912508299445.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/port.c | 11 +++++++++-- drivers/cxl/core/region.c | 9 ++++++++- drivers/cxl/cxl.h | 2 ++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index bffde862de0b..e7556864ea80 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -811,6 +811,7 @@ static struct cxl_dport *find_dport(struct cxl_port *port, int id) static int add_dport(struct cxl_port *port, struct cxl_dport *new) { struct cxl_dport *dup; + int rc; device_lock_assert(&port->dev); dup = find_dport(port, new->port_id); @@ -821,8 +822,14 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) dev_name(dup->dport)); return -EBUSY; } - return xa_insert(&port->dports, (unsigned long)new->dport, new, - GFP_KERNEL); + + rc = xa_insert(&port->dports, (unsigned long)new->dport, new, + GFP_KERNEL); + if (rc) + return rc; + + port->nr_dports++; + return 0; } /* diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c52465e09f26..c0253de74945 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -990,7 +990,14 @@ static int cxl_port_setup_targets(struct cxl_port *port, if (cxl_rr->nr_targets_set) { int i, distance; - distance = p->nr_targets / cxl_rr->nr_targets; + /* + * Passthrough ports impose no distance requirements between + * peers + */ + if (port->nr_dports == 1) + distance = 0; + else + distance = p->nr_targets / cxl_rr->nr_targets; for (i = 0; i < cxl_rr->nr_targets_set; i++) if (ep->dport == cxlsd->target[i]) { rc = check_last_peer(cxled, ep, cxl_rr, diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 1164ad49f3d3..ac75554b5d76 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -457,6 +457,7 @@ struct cxl_pmem_region { * @regions: cxl_region_ref instances, regions mapped by this port * @parent_dport: dport that points to this port in the parent * @decoder_ida: allocator for decoder ids + * @nr_dports: number of entries in @dports * @hdm_end: track last allocated HDM decoder instance for allocation ordering * @commit_end: cursor to track highest committed decoder for commit ordering * @component_reg_phys: component register capability base address (optional) @@ -475,6 +476,7 @@ struct cxl_port { struct xarray regions; struct cxl_dport *parent_dport; struct ida decoder_ida; + int nr_dports; int hdm_end; int commit_end; resource_size_t component_reg_phys; From 8f401ec1c8975eabfe4c089de91cbe058deabf71 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Nov 2022 17:31:00 -0700 Subject: [PATCH 229/328] cxl/region: Recycle region ids At region creation time the next region-id is atomically cached so that there is predictability of region device names. If that region is destroyed and then a new one is created the region id increments. That ends up looking like a memory leak, or is otherwise surprising that identifiers roll forward even after destroying all previously created regions. Try to reuse rather than free old region ids at region release time. While this fixes a cosmetic issue, the needlessly advancing memory region-id gives the appearance of a memory leak, hence the "Fixes" tag, but no "Cc: stable" tag. Cc: Ben Widawsky Cc: Jonathan Cameron Fixes: 779dd20cfb56 ("cxl/region: Add region creation support") Reviewed-by: Dave Jiang Reviewed-by: Vishal Verma Link: https://lore.kernel.org/r/166752186062.947915.13200195701224993317.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/region.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c0253de74945..f9ae5ad284ff 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1534,9 +1534,24 @@ static const struct attribute_group *region_groups[] = { static void cxl_region_release(struct device *dev) { + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); struct cxl_region *cxlr = to_cxl_region(dev); + int id = atomic_read(&cxlrd->region_id); + + /* + * Try to reuse the recently idled id rather than the cached + * next id to prevent the region id space from increasing + * unnecessarily. + */ + if (cxlr->id < id) + if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) { + memregion_free(id); + goto out; + } memregion_free(cxlr->id); +out: + put_device(dev->parent); kfree(cxlr); } @@ -1598,6 +1613,11 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_region_key); dev->parent = &cxlrd->cxlsd.cxld.dev; + /* + * Keep root decoder pinned through cxl_region_release to fixup + * region id allocations + */ + get_device(dev->parent); device_set_pm_not_required(dev); dev->bus = &cxl_bus_type; dev->type = &cxl_region_type; From c4f683731db330460d909bd0ca8d5af876fcdc97 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 31 Oct 2022 11:40:18 +0000 Subject: [PATCH 230/328] hwmon: (scmi) Register explicitly with Thermal Framework Available sensors are enumerated and reported by the SCMI platform server using a 16bit identification number; not all such sensors are of a type supported by hwmon subsystem and, among the supported ones, only a subset could be temperature sensors that have to be registered with the Thermal Framework. Potential clashes between hwmon channels indexes and the underlying real sensors IDs do not play well with the hwmon<-->thermal bridge automatic registration routines and could need a sensible number of fake dummy sensors to be made up in order to keep indexes and IDs in sync. Avoid to use the hwmon<-->thermal bridge dropping the HWMON_C_REGISTER_TZ attribute and instead explicit register temperature sensors directly with the Thermal Framework. Cc: Daniel Lezcano Cc: Guenter Roeck Cc: linux-hwmon@vger.kernel.org Signed-off-by: Cristian Marussi Acked-by: Sudeep Holla Link: https://lore.kernel.org/r/20221031114018.59048-1-cristian.marussi@arm.com Signed-off-by: Guenter Roeck --- drivers/hwmon/scmi-hwmon.c | 116 ++++++++++++++++++++++++++++++++----- 1 file changed, 103 insertions(+), 13 deletions(-) diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c index b1329a58ce40..e192f0c67146 100644 --- a/drivers/hwmon/scmi-hwmon.c +++ b/drivers/hwmon/scmi-hwmon.c @@ -20,6 +20,11 @@ struct scmi_sensors { const struct scmi_sensor_info **info[hwmon_max]; }; +struct scmi_thermal_sensor { + const struct scmi_protocol_handle *ph; + const struct scmi_sensor_info *info; +}; + static inline u64 __pow10(u8 x) { u64 r = 1; @@ -64,16 +69,14 @@ static int scmi_hwmon_scale(const struct scmi_sensor_info *sensor, u64 *value) return 0; } -static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type, - u32 attr, int channel, long *val) +static int scmi_hwmon_read_scaled_value(const struct scmi_protocol_handle *ph, + const struct scmi_sensor_info *sensor, + long *val) { int ret; u64 value; - const struct scmi_sensor_info *sensor; - struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev); - sensor = *(scmi_sensors->info[type] + channel); - ret = sensor_ops->reading_get(scmi_sensors->ph, sensor->id, &value); + ret = sensor_ops->reading_get(ph, sensor->id, &value); if (ret) return ret; @@ -84,6 +87,17 @@ static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type, return ret; } +static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + const struct scmi_sensor_info *sensor; + struct scmi_sensors *scmi_sensors = dev_get_drvdata(dev); + + sensor = *(scmi_sensors->info[type] + channel); + + return scmi_hwmon_read_scaled_value(scmi_sensors->ph, sensor, val); +} + static int scmi_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, const char **str) @@ -122,6 +136,25 @@ static struct hwmon_chip_info scmi_chip_info = { .info = NULL, }; +static int scmi_hwmon_thermal_get_temp(struct thermal_zone_device *tz, + int *temp) +{ + int ret; + long value; + struct scmi_thermal_sensor *th_sensor = tz->devdata; + + ret = scmi_hwmon_read_scaled_value(th_sensor->ph, th_sensor->info, + &value); + if (!ret) + *temp = value; + + return ret; +} + +static const struct thermal_zone_device_ops scmi_hwmon_thermal_ops = { + .get_temp = scmi_hwmon_thermal_get_temp, +}; + static int scmi_hwmon_add_chan_info(struct hwmon_channel_info *scmi_hwmon_chan, struct device *dev, int num, enum hwmon_sensor_types type, u32 config) @@ -149,7 +182,6 @@ static enum hwmon_sensor_types scmi_types[] = { }; static u32 hwmon_attributes[hwmon_max] = { - [hwmon_chip] = HWMON_C_REGISTER_TZ, [hwmon_temp] = HWMON_T_INPUT | HWMON_T_LABEL, [hwmon_in] = HWMON_I_INPUT | HWMON_I_LABEL, [hwmon_curr] = HWMON_C_INPUT | HWMON_C_LABEL, @@ -157,6 +189,43 @@ static u32 hwmon_attributes[hwmon_max] = { [hwmon_energy] = HWMON_E_INPUT | HWMON_E_LABEL, }; +static int scmi_thermal_sensor_register(struct device *dev, + const struct scmi_protocol_handle *ph, + const struct scmi_sensor_info *sensor) +{ + struct scmi_thermal_sensor *th_sensor; + struct thermal_zone_device *tzd; + + th_sensor = devm_kzalloc(dev, sizeof(*th_sensor), GFP_KERNEL); + if (!th_sensor) + return -ENOMEM; + + th_sensor->ph = ph; + th_sensor->info = sensor; + + /* + * Try to register a temperature sensor with the Thermal Framework: + * skip sensors not defined as part of any thermal zone (-ENODEV) but + * report any other errors related to misconfigured zones/sensors. + */ + tzd = devm_thermal_of_zone_register(dev, th_sensor->info->id, th_sensor, + &scmi_hwmon_thermal_ops); + if (IS_ERR(tzd)) { + devm_kfree(dev, th_sensor); + + if (PTR_ERR(tzd) != -ENODEV) + return PTR_ERR(tzd); + + dev_dbg(dev, "Sensor '%s' not attached to any thermal zone.\n", + sensor->name); + } else { + dev_dbg(dev, "Sensor '%s' attached to thermal zone ID:%d\n", + sensor->name, tzd->id); + } + + return 0; +} + static int scmi_hwmon_probe(struct scmi_device *sdev) { int i, idx; @@ -164,7 +233,7 @@ static int scmi_hwmon_probe(struct scmi_device *sdev) enum hwmon_sensor_types type; struct scmi_sensors *scmi_sensors; const struct scmi_sensor_info *sensor; - int nr_count[hwmon_max] = {0}, nr_types = 0; + int nr_count[hwmon_max] = {0}, nr_types = 0, nr_count_temp = 0; const struct hwmon_chip_info *chip_info; struct device *hwdev, *dev = &sdev->dev; struct hwmon_channel_info *scmi_hwmon_chan; @@ -208,10 +277,8 @@ static int scmi_hwmon_probe(struct scmi_device *sdev) } } - if (nr_count[hwmon_temp]) { - nr_count[hwmon_chip]++; - nr_types++; - } + if (nr_count[hwmon_temp]) + nr_count_temp = nr_count[hwmon_temp]; scmi_hwmon_chan = devm_kcalloc(dev, nr_types, sizeof(*scmi_hwmon_chan), GFP_KERNEL); @@ -262,8 +329,31 @@ static int scmi_hwmon_probe(struct scmi_device *sdev) hwdev = devm_hwmon_device_register_with_info(dev, "scmi_sensors", scmi_sensors, chip_info, NULL); + if (IS_ERR(hwdev)) + return PTR_ERR(hwdev); - return PTR_ERR_OR_ZERO(hwdev); + for (i = 0; i < nr_count_temp; i++) { + int ret; + + sensor = *(scmi_sensors->info[hwmon_temp] + i); + if (!sensor) + continue; + + /* + * Warn on any misconfiguration related to thermal zones but + * bail out of probing only on memory errors. + */ + ret = scmi_thermal_sensor_register(dev, ph, sensor); + if (ret) { + if (ret == -ENOMEM) + return ret; + dev_warn(dev, + "Thermal zone misconfigured for %s. err=%d\n", + sensor->name, ret); + } + } + + return 0; } static const struct scmi_device_id scmi_id_table[] = { From 1e699e177e339e462cdc8571e3d0fcf29665608e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 4 Nov 2022 16:37:30 -0700 Subject: [PATCH 231/328] Revert "hwmon: (pmbus) Add regulator supply into macro" This reverts commit 54cc3dbfc10dc3db7cb1cf49aee4477a8398fbde. Zev Weiss reports that the reverted patch may cause a regulator undercount. Here is his report: ... having regulator-dummy set as a supply on my PMBus regulators (instead of having them as their own top-level regulators without an upstream supply) leads to enable-count underflow errors when disabling them: # echo 0 > /sys/bus/platform/devices/efuse01/state [ 906.094477] regulator-dummy: Underflow of regulator enable count [ 906.100563] Failed to disable vout: -EINVAL [ 136.992676] reg-userspace-consumer efuse01: Failed to configure state: -22 Zev reports that reverting the patch fixes the problem. So let's do that for now. Fixes: 54cc3dbfc10d ("hwmon: (pmbus) Add regulator supply into macro") Cc: Marcello Sylvester Bauer Reported-by: Zev Weiss Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index 7daaf0caf4d3..10fb17879f8e 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -467,7 +467,6 @@ extern const struct regulator_ops pmbus_regulator_ops; #define PMBUS_REGULATOR_STEP(_name, _id, _voltages, _step) \ [_id] = { \ .name = (_name # _id), \ - .supply_name = "vin", \ .id = (_id), \ .of_match = of_match_ptr(_name # _id), \ .regulators_node = of_match_ptr("regulators"), \ From b4c66425771ddb910316c7b4cd7fa0614098ec45 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Thu, 3 Nov 2022 19:33:24 -0400 Subject: [PATCH 232/328] bnxt_en: refactor bnxt_cancel_reservations() Introduce bnxt_clear_reservations() to clear the reserved attributes only. This will be used in the next patch to fix PCI AER handling. Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 04cf7684f1b0..3743d9755eb4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9983,17 +9983,12 @@ static int bnxt_try_recover_fw(struct bnxt *bp) return -ENODEV; } -int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset) +static void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; - int rc; if (!BNXT_NEW_RM(bp)) - return 0; /* no resource reservations required */ - - rc = bnxt_hwrm_func_resc_qcaps(bp, true); - if (rc) - netdev_err(bp->dev, "resc_qcaps failed\n"); + return; /* no resource reservations required */ hw_resc->resv_cp_rings = 0; hw_resc->resv_stat_ctxs = 0; @@ -10006,6 +10001,20 @@ int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset) bp->tx_nr_rings = 0; bp->rx_nr_rings = 0; } +} + +int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset) +{ + int rc; + + if (!BNXT_NEW_RM(bp)) + return 0; /* no resource reservations required */ + + rc = bnxt_hwrm_func_resc_qcaps(bp, true); + if (rc) + netdev_err(bp->dev, "resc_qcaps failed\n"); + + bnxt_clear_reservations(bp, fw_reset); return rc; } From 0cf736a18a1e804037839bd8df9e36f0efdb8745 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Thu, 3 Nov 2022 19:33:25 -0400 Subject: [PATCH 233/328] bnxt_en: fix the handling of PCIE-AER Fix the sequence required for PCIE-AER. While slot reset occurs, firmware might not be ready and the driver needs to check for its recovery. We also need to remap the health registers for some chips and clear the resource reservations. The resources will be allocated again during bnxt_io_resume(). Fixes: fb1e6e562b37 ("bnxt_en: Fix AER recovery.") Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 29 ++++++++++++++++++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + .../net/ethernet/broadcom/bnxt/bnxt_hwrm.c | 3 +- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3743d9755eb4..f44f9367b7fd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13922,7 +13922,9 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT; struct net_device *netdev = pci_get_drvdata(pdev); struct bnxt *bp = netdev_priv(netdev); - int err = 0, off; + int retry = 0; + int err = 0; + int off; netdev_info(bp->dev, "PCI Slot Reset\n"); @@ -13950,11 +13952,36 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) pci_restore_state(pdev); pci_save_state(pdev); + bnxt_inv_fw_health_reg(bp); + bnxt_try_map_fw_health_reg(bp); + + /* In some PCIe AER scenarios, firmware may take up to + * 10 seconds to become ready in the worst case. + */ + do { + err = bnxt_try_recover_fw(bp); + if (!err) + break; + retry++; + } while (retry < BNXT_FW_SLOT_RESET_RETRY); + + if (err) { + dev_err(&pdev->dev, "Firmware not ready\n"); + goto reset_exit; + } + err = bnxt_hwrm_func_reset(bp); if (!err) result = PCI_ERS_RESULT_RECOVERED; + + bnxt_ulp_irq_stop(bp); + bnxt_clear_int_mode(bp); + err = bnxt_init_int_mode(bp); + bnxt_ulp_irq_restart(bp, err); } +reset_exit: + bnxt_clear_reservations(bp, true); rtnl_unlock(); return result; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index b1b17f911300..d5fa43cfe524 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1621,6 +1621,7 @@ struct bnxt_fw_health { #define BNXT_FW_RETRY 5 #define BNXT_FW_IF_RETRY 10 +#define BNXT_FW_SLOT_RESET_RETRY 4 enum board_idx { BCM57301, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c index b01d42928a53..132442f16fe6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c @@ -476,7 +476,8 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx) memset(ctx->resp, 0, PAGE_SIZE); req_type = le16_to_cpu(ctx->req->req_type); - if (BNXT_NO_FW_ACCESS(bp) && req_type != HWRM_FUNC_RESET) { + if (BNXT_NO_FW_ACCESS(bp) && + (req_type != HWRM_FUNC_RESET && req_type != HWRM_VER_GET)) { netdev_dbg(bp->dev, "hwrm req_type 0x%x skipped, FW channel down\n", req_type); goto exit; From 6d81ea3765dfa6c8a20822613c81edad1c4a16a0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 3 Nov 2022 19:33:26 -0400 Subject: [PATCH 234/328] bnxt_en: Fix possible crash in bnxt_hwrm_set_coal() During the error recovery sequence, the rtnl_lock is not held for the entire duration and some datastructures may be freed during the sequence. Check for the BNXT_STATE_OPEN flag instead of netif_running() to ensure that the device is fully operational before proceeding to reconfigure the coalescing settings. This will fix a possible crash like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 10 PID: 181276 Comm: ethtool Kdump: loaded Tainted: G IOE --------- - - 4.18.0-348.el8.x86_64 #1 Hardware name: Dell Inc. PowerEdge R740/0F9N89, BIOS 2.3.10 08/15/2019 RIP: 0010:bnxt_hwrm_set_coal+0x1fb/0x2a0 [bnxt_en] Code: c2 66 83 4e 22 08 66 89 46 1c e8 10 cb 00 00 41 83 c6 01 44 39 b3 68 01 00 00 0f 8e a3 00 00 00 48 8b 93 c8 00 00 00 49 63 c6 <48> 8b 2c c2 48 8b 85 b8 02 00 00 48 85 c0 74 2e 48 8b 74 24 08 f6 RSP: 0018:ffffb11c8dcaba50 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8d168a8b0ac0 RCX: 00000000000000c5 RDX: 0000000000000000 RSI: ffff8d162f72c000 RDI: ffff8d168a8b0b28 RBP: 0000000000000000 R08: b6e1f68a12e9a7eb R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000037 R12: ffff8d168a8b109c R13: ffff8d168a8b10aa R14: 0000000000000000 R15: ffffffffc01ac4e0 FS: 00007f3852e4c740(0000) GS:ffff8d24c0080000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000041b3ee003 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ethnl_set_coalesce+0x3ce/0x4c0 genl_family_rcv_msg_doit.isra.15+0x10f/0x150 genl_family_rcv_msg+0xb3/0x160 ? coalesce_fill_reply+0x480/0x480 genl_rcv_msg+0x47/0x90 ? genl_family_rcv_msg+0x160/0x160 netlink_rcv_skb+0x4c/0x120 genl_rcv+0x24/0x40 netlink_unicast+0x196/0x230 netlink_sendmsg+0x204/0x3d0 sock_sendmsg+0x4c/0x50 __sys_sendto+0xee/0x160 ? syscall_trace_enter+0x1d3/0x2c0 ? __audit_syscall_exit+0x249/0x2a0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x1a0 entry_SYSCALL_64_after_hwframe+0x65/0xca RIP: 0033:0x7f38524163bb Fixes: 2151fe0830fd ("bnxt_en: Handle RESET_NOTIFY async event from firmware.") Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index f57e524c7e30..8cad15c458b3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -162,7 +162,7 @@ static int bnxt_set_coalesce(struct net_device *dev, } reset_coalesce: - if (netif_running(dev)) { + if (test_bit(BNXT_STATE_OPEN, &bp->state)) { if (update_stats) { rc = bnxt_close_nic(bp, true, false); if (!rc) From 02597d39145bb0aa81d04bf39b6a913ce9a9d465 Mon Sep 17 00:00:00 2001 From: Alex Barba Date: Thu, 3 Nov 2022 19:33:27 -0400 Subject: [PATCH 235/328] bnxt_en: fix potentially incorrect return value for ndo_rx_flow_steer In the bnxt_en driver ndo_rx_flow_steer returns '0' whenever an entry that we are attempting to steer is already found. This is not the correct behavior. The return code should be the value/index that corresponds to the entry. Returning zero all the time causes the RFS records to be incorrect unless entry '0' is the correct one. As flows migrate to different cores this can create entries that are not correct. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reported-by: Akshay Navgire Signed-off-by: Alex Barba Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f44f9367b7fd..c78b6e9dea2c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12903,8 +12903,8 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, rcu_read_lock(); hlist_for_each_entry_rcu(fltr, head, hash) { if (bnxt_fltr_match(fltr, new_fltr)) { + rc = fltr->sw_id; rcu_read_unlock(); - rc = 0; goto err_free; } } From b7cbc6740bd6ad5d43345a2504f7e4beff0d709f Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 3 Nov 2022 14:28:30 -0400 Subject: [PATCH 236/328] net: fman: Unregister ethernet device on removal When the mac device gets removed, it leaves behind the ethernet device. This will result in a segfault next time the ethernet device accesses mac_dev. Remove the ethernet device when we get removed to prevent this. This is not completely reversible, since some resources aren't cleaned up properly, but that can be addressed later. Fixes: 3933961682a3 ("fsl/fman: Add FMan MAC driver") Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20221103182831.2248833-1-sean.anderson@seco.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fman/mac.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 65df308bad97..13e67f2864be 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -487,12 +487,21 @@ _return_of_node_put: return err; } +static int mac_remove(struct platform_device *pdev) +{ + struct mac_device *mac_dev = platform_get_drvdata(pdev); + + platform_device_unregister(mac_dev->priv->eth_dev); + return 0; +} + static struct platform_driver mac_driver = { .driver = { .name = KBUILD_MODNAME, .of_match_table = mac_match, }, .probe = mac_probe, + .remove = mac_remove, }; builtin_platform_driver(mac_driver); From 8abcaeaed38109e5ccaf40218e0e9e387f07bfe6 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 28 Oct 2022 09:52:26 +0000 Subject: [PATCH 237/328] cifs: always iterate smb sessions using primary channel smb sessions and tcons currently hang off primary channel only. Secondary channels have the lists as empty. Whenever there's a need to iterate sessions or tcons, we should use the list in the corresponding primary channel. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/misc.c | 6 +++++- fs/cifs/smb2misc.c | 12 ++++++++++-- fs/cifs/smb2ops.c | 6 +++++- fs/cifs/smb2transport.c | 6 +++++- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index da51ffd02928..3e68d8208cf5 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -400,6 +400,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) { struct smb_hdr *buf = (struct smb_hdr *)buffer; struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifsInodeInfo *pCifsInode; @@ -464,9 +465,12 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) return false; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(srv) ? srv->primary_server : srv; + /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &srv->smb_ses_list, smb_ses_list) { + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid != buf->Tid) continue; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index a38720477966..e73a3c649b87 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -135,6 +135,7 @@ static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, int smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) { + struct TCP_Server_Info *pserver; struct smb2_hdr *shdr = (struct smb2_hdr *)buf; struct smb2_pdu *pdu = (struct smb2_pdu *)shdr; int hdr_size = sizeof(struct smb2_hdr); @@ -143,6 +144,9 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) __u32 calc_len; /* calculated length */ __u64 mid; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + /* * Add function to do table lookup of StructureSize by command * ie Validate the wct via smb2_struct_sizes table above @@ -155,7 +159,7 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *server) /* decrypt frame now that it is completely read in */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(iter, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(iter, &pserver->smb_ses_list, smb_ses_list) { if (iter->Suid == le64_to_cpu(thdr->SessionId)) { ses = iter; break; @@ -671,6 +675,7 @@ bool smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) { struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifsInodeInfo *cinode; @@ -691,9 +696,12 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) cifs_dbg(FYI, "oplock level 0x%x\n", rsp->OplockLevel); + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { spin_lock(&tcon->open_file_lock); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 4f53fa012936..cca12eadbb07 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2302,14 +2302,18 @@ static void smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server) { struct smb2_hdr *shdr = (struct smb2_hdr *)buf; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; if (shdr->Status != STATUS_NETWORK_NAME_DELETED) return; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->tid == le32_to_cpu(shdr->Id.SyncId.TreeId)) { spin_lock(&tcon->tc_lock); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 8e3f26e6f6b9..8a9d9d08cf19 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -136,9 +136,13 @@ out: static struct cifs_ses * smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) { + struct TCP_Server_Info *pserver; struct cifs_ses *ses; - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { if (ses->Suid != ses_id) continue; ++ses->ses_count; From 23d9b9b757e8007204d8f71448ab55d5ef2ae8e5 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 28 Oct 2022 10:01:45 +0000 Subject: [PATCH 238/328] cifs: avoid unnecessary iteration of tcp sessions In a few places, we do unnecessary iterations of tcp sessions, even when the server struct is provided. The change avoids it and uses the server struct provided. Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/smb2misc.c | 69 +++++++++++++++++++++-------------------- fs/cifs/smb2ops.c | 24 +++++++------- fs/cifs/smb2transport.c | 13 ++++---- 3 files changed, 55 insertions(+), 51 deletions(-) diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index e73a3c649b87..572293c18e16 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -612,51 +612,52 @@ smb2_tcon_find_pending_open_lease(struct cifs_tcon *tcon, } static bool -smb2_is_valid_lease_break(char *buffer) +smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) { struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; - struct TCP_Server_Info *server; + struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifs_pending_open *open; cifs_dbg(FYI, "Checking for lease break\n"); + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + /* look up tcon based on tid & uid */ spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { - list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { - spin_lock(&tcon->open_file_lock); - cifs_stats_inc( - &tcon->stats.cifs_stats.num_oplock_brks); - if (smb2_tcon_has_lease(tcon, rsp)) { - spin_unlock(&tcon->open_file_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; - } - open = smb2_tcon_find_pending_open_lease(tcon, - rsp); - if (open) { - __u8 lease_key[SMB2_LEASE_KEY_SIZE]; - struct tcon_link *tlink; - - tlink = cifs_get_tlink(open->tlink); - memcpy(lease_key, open->lease_key, - SMB2_LEASE_KEY_SIZE); - spin_unlock(&tcon->open_file_lock); - spin_unlock(&cifs_tcp_ses_lock); - smb2_queue_pending_open_break(tlink, - lease_key, - rsp->NewLeaseState); - return true; - } + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + spin_lock(&tcon->open_file_lock); + cifs_stats_inc( + &tcon->stats.cifs_stats.num_oplock_brks); + if (smb2_tcon_has_lease(tcon, rsp)) { spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } + open = smb2_tcon_find_pending_open_lease(tcon, + rsp); + if (open) { + __u8 lease_key[SMB2_LEASE_KEY_SIZE]; + struct tcon_link *tlink; - if (cached_dir_lease_break(tcon, rsp->LeaseKey)) { - spin_unlock(&cifs_tcp_ses_lock); - return true; - } + tlink = cifs_get_tlink(open->tlink); + memcpy(lease_key, open->lease_key, + SMB2_LEASE_KEY_SIZE); + spin_unlock(&tcon->open_file_lock); + spin_unlock(&cifs_tcp_ses_lock); + smb2_queue_pending_open_break(tlink, + lease_key, + rsp->NewLeaseState); + return true; + } + spin_unlock(&tcon->open_file_lock); + + if (cached_dir_lease_break(tcon, rsp->LeaseKey)) { + spin_unlock(&cifs_tcp_ses_lock); + return true; } } } @@ -689,7 +690,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (rsp->StructureSize != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { if (le16_to_cpu(rsp->StructureSize) == 44) - return smb2_is_valid_lease_break(buffer); + return smb2_is_valid_lease_break(buffer, server); else return false; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index cca12eadbb07..880cd494afea 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4268,21 +4268,23 @@ init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign) static int smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) { + struct TCP_Server_Info *pserver; struct cifs_ses *ses; u8 *ses_enc_key; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { - if (ses->Suid == ses_id) { - spin_lock(&ses->ses_lock); - ses_enc_key = enc ? ses->smb3encryptionkey : - ses->smb3decryptionkey; - memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE); - spin_unlock(&ses->ses_lock); - spin_unlock(&cifs_tcp_ses_lock); - return 0; - } + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (ses->Suid == ses_id) { + spin_lock(&ses->ses_lock); + ses_enc_key = enc ? ses->smb3encryptionkey : + ses->smb3decryptionkey; + memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE); + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); + return 0; } } spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 8a9d9d08cf19..381babc1212c 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -77,18 +77,19 @@ static int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { struct cifs_chan *chan; + struct TCP_Server_Info *pserver; struct cifs_ses *ses = NULL; - struct TCP_Server_Info *it = NULL; int i; int rc = 0; spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(it, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &it->smb_ses_list, smb_ses_list) { - if (ses->Suid == ses_id) - goto found; - } + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + if (ses->Suid == ses_id) + goto found; } cifs_server_dbg(VFS, "%s: Could not find session 0x%llx\n", __func__, ses_id); From 542228db2f28fdf775b301f2843e1fe486e7c797 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Fri, 4 Nov 2022 15:44:41 +0800 Subject: [PATCH 239/328] cifs: fix use-after-free on the link name xfstests generic/011 reported use-after-free bug as follows: BUG: KASAN: use-after-free in __d_alloc+0x269/0x859 Read of size 15 at addr ffff8880078933a0 by task dirstress/952 CPU: 1 PID: 952 Comm: dirstress Not tainted 6.1.0-rc3+ #77 Call Trace: __dump_stack+0x23/0x29 dump_stack_lvl+0x51/0x73 print_address_description+0x67/0x27f print_report+0x3e/0x5c kasan_report+0x7b/0xa8 kasan_check_range+0x1b2/0x1c1 memcpy+0x22/0x5d __d_alloc+0x269/0x859 d_alloc+0x45/0x20c d_alloc_parallel+0xb2/0x8b2 lookup_open+0x3b8/0x9f9 open_last_lookups+0x63d/0xc26 path_openat+0x11a/0x261 do_filp_open+0xcc/0x168 do_sys_openat2+0x13b/0x3f7 do_sys_open+0x10f/0x146 __se_sys_creat+0x27/0x2e __x64_sys_creat+0x55/0x6a do_syscall_64+0x40/0x96 entry_SYSCALL_64_after_hwframe+0x63/0xcd Allocated by task 952: kasan_save_stack+0x1f/0x42 kasan_set_track+0x21/0x2a kasan_save_alloc_info+0x17/0x1d __kasan_kmalloc+0x7e/0x87 __kmalloc_node_track_caller+0x59/0x155 kstrndup+0x60/0xe6 parse_mf_symlink+0x215/0x30b check_mf_symlink+0x260/0x36a cifs_get_inode_info+0x14e1/0x1690 cifs_revalidate_dentry_attr+0x70d/0x964 cifs_revalidate_dentry+0x36/0x62 cifs_d_revalidate+0x162/0x446 lookup_open+0x36f/0x9f9 open_last_lookups+0x63d/0xc26 path_openat+0x11a/0x261 do_filp_open+0xcc/0x168 do_sys_openat2+0x13b/0x3f7 do_sys_open+0x10f/0x146 __se_sys_creat+0x27/0x2e __x64_sys_creat+0x55/0x6a do_syscall_64+0x40/0x96 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 950: kasan_save_stack+0x1f/0x42 kasan_set_track+0x21/0x2a kasan_save_free_info+0x1c/0x34 ____kasan_slab_free+0x1c1/0x1d5 __kasan_slab_free+0xe/0x13 __kmem_cache_free+0x29a/0x387 kfree+0xd3/0x10e cifs_fattr_to_inode+0xb6a/0xc8c cifs_get_inode_info+0x3cb/0x1690 cifs_revalidate_dentry_attr+0x70d/0x964 cifs_revalidate_dentry+0x36/0x62 cifs_d_revalidate+0x162/0x446 lookup_open+0x36f/0x9f9 open_last_lookups+0x63d/0xc26 path_openat+0x11a/0x261 do_filp_open+0xcc/0x168 do_sys_openat2+0x13b/0x3f7 do_sys_open+0x10f/0x146 __se_sys_creat+0x27/0x2e __x64_sys_creat+0x55/0x6a do_syscall_64+0x40/0x96 entry_SYSCALL_64_after_hwframe+0x63/0xcd When opened a symlink, link name is from 'inode->i_link', but it may be reset to a new value when revalidate the dentry. If some processes get the link name on the race scenario, then UAF will happen on link name. Fix this by implementing 'get_link' interface to duplicate the link name. Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Signed-off-by: ChenXiaoSong Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 26 +++++++++++++++++++++++++- fs/cifs/inode.c | 5 ----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d0b9fec111aa..fe220686bba4 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1143,8 +1143,32 @@ const struct inode_operations cifs_file_inode_ops = { .fiemap = cifs_fiemap, }; +const char *cifs_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) +{ + char *target_path; + + target_path = kmalloc(PATH_MAX, GFP_KERNEL); + if (!target_path) + return ERR_PTR(-ENOMEM); + + spin_lock(&inode->i_lock); + if (likely(CIFS_I(inode)->symlink_target)) { + strscpy(target_path, CIFS_I(inode)->symlink_target, PATH_MAX); + } else { + kfree(target_path); + target_path = ERR_PTR(-EOPNOTSUPP); + } + spin_unlock(&inode->i_lock); + + if (!IS_ERR(target_path)) + set_delayed_call(done, kfree_link, target_path); + + return target_path; +} + const struct inode_operations cifs_symlink_inode_ops = { - .get_link = simple_get_link, + .get_link = cifs_get_link, .permission = cifs_permission, .listxattr = cifs_listxattr, }; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9bde08d44617..4e2ca3c6e5c0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -215,11 +215,6 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) kfree(cifs_i->symlink_target); cifs_i->symlink_target = fattr->cf_symlink_target; fattr->cf_symlink_target = NULL; - - if (unlikely(!cifs_i->symlink_target)) - inode->i_link = ERR_PTR(-EOPNOTSUPP); - else - inode->i_link = cifs_i->symlink_target; } spin_unlock(&inode->i_lock); From 46653972e3ea64f79e7f8ae3aa41a4d3fdb70a13 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 19:25:36 +0800 Subject: [PATCH 240/328] capabilities: fix undefined behavior in bit shift for CAP_TO_MASK Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in security/commoncap.c:1252:2 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c cap_task_prctl+0x561/0x6f0 security_task_prctl+0x5a/0xb0 __x64_sys_prctl+0x61/0x8f0 do_syscall_64+0x58/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: e338d263a76a ("Add 64-bit capability support to the kernel") Signed-off-by: Gaosheng Cui Acked-by: Andrew G. Morgan Reviewed-by: Serge Hallyn Signed-off-by: Paul Moore --- include/uapi/linux/capability.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 463d1ba2232a..3d61a0ae055d 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -426,7 +426,7 @@ struct vfs_ns_cap_data { */ #define CAP_TO_INDEX(x) ((x) >> 5) /* 1 << 5 == bits in __u32 */ -#define CAP_TO_MASK(x) (1 << ((x) & 31)) /* mask for indexed __u32 */ +#define CAP_TO_MASK(x) (1U << ((x) & 31)) /* mask for indexed __u32 */ #endif /* _UAPI_LINUX_CAPABILITY_H */ From 819b885cd886c193782891c4f51bbcab3de119a4 Mon Sep 17 00:00:00 2001 From: John Thomson Date: Sat, 5 Nov 2022 06:52:41 +1000 Subject: [PATCH 241/328] phy: ralink: mt7621-pci: add sentinel to quirks table With mt7621 soc_dev_attr fixed to register the soc as a device, kernel will experience an oops in soc_device_match_attr This quirk test was introduced in the staging driver in commit 9445ccb3714c ("staging: mt7621-pci-phy: add quirks for 'E2' revision using 'soc_device_attribute'"). The staging driver was removed, and later re-added in commit d87da32372a0 ("phy: ralink: Add PHY driver for MT7621 PCIe PHY") for kernel 5.11 Link: https://lore.kernel.org/lkml/26ebbed1-0fe9-4af9-8466-65f841d0b382@app.fastmail.com Fixes: d87da32372a0 ("phy: ralink: Add PHY driver for MT7621 PCIe PHY") Signed-off-by: John Thomson Acked-by: Sergio Paracuellos Link: https://lore.kernel.org/r/20221104205242.3440388-2-git@johnthomson.fastmail.com.au Signed-off-by: Vinod Koul --- drivers/phy/ralink/phy-mt7621-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/phy/ralink/phy-mt7621-pci.c b/drivers/phy/ralink/phy-mt7621-pci.c index 5e6530f545b5..85888ab2d307 100644 --- a/drivers/phy/ralink/phy-mt7621-pci.c +++ b/drivers/phy/ralink/phy-mt7621-pci.c @@ -280,7 +280,8 @@ static struct phy *mt7621_pcie_phy_of_xlate(struct device *dev, } static const struct soc_device_attribute mt7621_pci_quirks_match[] = { - { .soc_id = "mt7621", .revision = "E2" } + { .soc_id = "mt7621", .revision = "E2" }, + { /* sentinel */ } }; static const struct regmap_config mt7621_pci_phy_regmap_config = { From 17a0bc9bd697f75cfdf9b378d5eb2d7409c91340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= Date: Wed, 12 Oct 2022 14:13:30 +0100 Subject: [PATCH 242/328] ext4: fix BUG_ON() when directory entry has invalid rec_len MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rec_len field in the directory entry has to be a multiple of 4. A corrupted filesystem image can be used to hit a BUG() in ext4_rec_len_to_disk(), called from make_indexed_dir(). ------------[ cut here ]------------ kernel BUG at fs/ext4/ext4.h:2413! ... RIP: 0010:make_indexed_dir+0x53f/0x5f0 ... Call Trace: ? add_dirent_to_buf+0x1b2/0x200 ext4_add_entry+0x36e/0x480 ext4_add_nondir+0x2b/0xc0 ext4_create+0x163/0x200 path_openat+0x635/0xe90 do_filp_open+0xb4/0x160 ? __create_object.isra.0+0x1de/0x3b0 ? _raw_spin_unlock+0x12/0x30 do_sys_openat2+0x91/0x150 __x64_sys_open+0x6c/0xa0 do_syscall_64+0x3c/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The fix simply adds a call to ext4_check_dir_entry() to validate the directory entry, returning -EFSCORRUPTED if the entry is invalid. CC: stable@kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=216540 Signed-off-by: Luís Henriques Link: https://lore.kernel.org/r/20221012131330.32456-1-lhenriques@suse.de Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4183a4cb4a21..be8136aafa22 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2259,8 +2259,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, memset(de, 0, len); /* wipe old data */ de = (struct ext4_dir_entry_2 *) data2; top = data2 + len; - while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) + while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) { + if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len, + (data2 + (blocksize - csum_size) - + (char *) de))) { + brelse(bh2); + brelse(bh); + return -EFSCORRUPTED; + } de = de2; + } de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) - (char *) de, blocksize); From 1b8f787ef547230a3249bcf897221ef0cc78481b Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 18 Oct 2022 10:27:01 +0800 Subject: [PATCH 243/328] ext4: fix warning in 'ext4_da_release_space' Syzkaller report issue as follows: EXT4-fs (loop0): Free/Dirty block details EXT4-fs (loop0): free_blocks=0 EXT4-fs (loop0): dirty_blocks=0 EXT4-fs (loop0): Block reservation details EXT4-fs (loop0): i_reserved_data_blocks=0 EXT4-fs warning (device loop0): ext4_da_release_space:1527: ext4_da_release_space: ino 18, to_free 1 with only 0 reserved data blocks ------------[ cut here ]------------ WARNING: CPU: 0 PID: 92 at fs/ext4/inode.c:1528 ext4_da_release_space+0x25e/0x370 fs/ext4/inode.c:1524 Modules linked in: CPU: 0 PID: 92 Comm: kworker/u4:4 Not tainted 6.0.0-syzkaller-09423-g493ffd6605b2 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Workqueue: writeback wb_workfn (flush-7:0) RIP: 0010:ext4_da_release_space+0x25e/0x370 fs/ext4/inode.c:1528 RSP: 0018:ffffc900015f6c90 EFLAGS: 00010296 RAX: 42215896cd52ea00 RBX: 0000000000000000 RCX: 42215896cd52ea00 RDX: 0000000000000000 RSI: 0000000080000001 RDI: 0000000000000000 RBP: 1ffff1100e907d96 R08: ffffffff816aa79d R09: fffff520002bece5 R10: fffff520002bece5 R11: 1ffff920002bece4 R12: ffff888021fd2000 R13: ffff88807483ecb0 R14: 0000000000000001 R15: ffff88807483e740 FS: 0000000000000000(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005555569ba628 CR3: 000000000c88e000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ext4_es_remove_extent+0x1ab/0x260 fs/ext4/extents_status.c:1461 mpage_release_unused_pages+0x24d/0xef0 fs/ext4/inode.c:1589 ext4_writepages+0x12eb/0x3be0 fs/ext4/inode.c:2852 do_writepages+0x3c3/0x680 mm/page-writeback.c:2469 __writeback_single_inode+0xd1/0x670 fs/fs-writeback.c:1587 writeback_sb_inodes+0xb3b/0x18f0 fs/fs-writeback.c:1870 wb_writeback+0x41f/0x7b0 fs/fs-writeback.c:2044 wb_do_writeback fs/fs-writeback.c:2187 [inline] wb_workfn+0x3cb/0xef0 fs/fs-writeback.c:2227 process_one_work+0x877/0xdb0 kernel/workqueue.c:2289 worker_thread+0xb14/0x1330 kernel/workqueue.c:2436 kthread+0x266/0x300 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 Above issue may happens as follows: ext4_da_write_begin ext4_create_inline_data ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); __ext4_ioctl ext4_ext_migrate -> will lead to eh->eh_entries not zero, and set extent flag ext4_da_write_begin ext4_da_convert_inline_data_to_extent ext4_da_write_inline_data_begin ext4_da_map_blocks ext4_insert_delayed_block if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) if (!ext4_es_scan_clu(inode, &ext4_es_is_mapped, lblk)) ext4_clu_mapped(inode, EXT4_B2C(sbi, lblk)); -> will return 1 allocated = true; ext4_es_insert_delayed_block(inode, lblk, allocated); ext4_writepages mpage_map_and_submit_extent(handle, &mpd, &give_up_on_write); -> return -ENOSPC mpage_release_unused_pages(&mpd, give_up_on_write); -> give_up_on_write == 1 ext4_es_remove_extent ext4_da_release_space(inode, reserved); if (unlikely(to_free > ei->i_reserved_data_blocks)) -> to_free == 1 but ei->i_reserved_data_blocks == 0 -> then trigger warning as above To solve above issue, forbid inode do migrate which has inline data. Cc: stable@kernel.org Reported-by: syzbot+c740bb18df70ad00952e@syzkaller.appspotmail.com Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221018022701.683489-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/migrate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 0a220ec9862d..a19a9661646e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -424,7 +424,8 @@ int ext4_ext_migrate(struct inode *inode) * already is extent-based, error out. */ if (!ext4_has_feature_extents(inode->i_sb) || - (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) + ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || + ext4_has_inline_data(inode)) return -EINVAL; if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0) From 9f2a1d9fb33a2129a9ba29bc61d3f14adb28ddc2 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 25 Oct 2022 12:02:06 +0800 Subject: [PATCH 244/328] ext4: fix wrong return err in ext4_load_and_init_journal() The return value is wrong in ext4_load_and_init_journal(). The local variable 'err' need to be initialized before goto out. The original code in __ext4_fill_super() is fine because it has two return values 'ret' and 'err' and 'ret' is initialized as -EINVAL. After we factor out ext4_load_and_init_journal(), this code is broken. So fix it by directly returning -EINVAL in the error handler path. Cc: stable@kernel.org Fixes: 9c1dd22d7422 ("ext4: factor out ext4_load_and_init_journal()") Signed-off-by: Jason Yan Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20221025040206.3134773-1-yanaijie@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d733db8a0b02..6ddebc9f1b90 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4886,7 +4886,7 @@ out: flush_work(&sbi->s_error_work); jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; - return err; + return -EINVAL; } static int ext4_journal_data_mode_check(struct super_block *sb) From 0d043351e5baf3857f915367deba2a518b6a0809 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 5 Nov 2022 23:42:36 -0400 Subject: [PATCH 245/328] ext4: fix fortify warning in fs/ext4/fast_commit.c:1551 With the new fortify string system, rework the memcpy to avoid this warning: memcpy: detected field-spanning write (size 60) of single field "&raw_inode->i_generation" at fs/ext4/fast_commit.c:1551 (size 4) Cc: stable@kernel.org Fixes: 54d9469bc515 ("fortify: Add run-time WARN for cross-field memcpy()") Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index ef05bfa87798..0f6d0a80467d 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1521,6 +1521,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, struct ext4_iloc iloc; int inode_len, ino, ret, tag = tl->fc_tag; struct ext4_extent_header *eh; + size_t off_gen = offsetof(struct ext4_inode, i_generation); memcpy(&fc_inode, val, sizeof(fc_inode)); @@ -1548,8 +1549,8 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, raw_inode = ext4_raw_inode(&iloc); memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block)); - memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation, - inode_len - offsetof(struct ext4_inode, i_generation)); + memcpy((u8 *)raw_inode + off_gen, (u8 *)raw_fc_inode + off_gen, + inode_len - off_gen); if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) { eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]); if (eh->eh_magic != EXT4_EXT_MAGIC) { From 328687151b2a29fe93db4736bfb90da134e52fbf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 4 Nov 2022 23:35:34 -0700 Subject: [PATCH 246/328] mm/slab_common: Restore passing "caller" for tracing The "caller" argument was accidentally being ignored in a few places that were recently refactored. Restore these "caller" arguments, instead of _RET_IP_. Fixes: 11e9734bcb6a ("mm/slab_common: unify NUMA and UMA version of tracepoints") Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Vlastimil Babka Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: Roman Gushchin Cc: linux-mm@kvack.org Signed-off-by: Kees Cook Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 206e59051c1d..fa784563e1ed 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -941,7 +941,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { ret = __kmalloc_large_node(size, flags, node); - trace_kmalloc(_RET_IP_, ret, size, + trace_kmalloc(caller, ret, size, PAGE_SIZE << get_order(size), flags, node); return ret; } @@ -953,7 +953,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller ret = __kmem_cache_alloc_node(s, flags, node, size, caller); ret = kasan_kmalloc(s, ret, size, flags); - trace_kmalloc(_RET_IP_, ret, size, s->size, flags, node); + trace_kmalloc(caller, ret, size, s->size, flags, node); return ret; } From f0c4d9fc9cc9462659728d168387191387e903cc Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Nov 2022 15:07:11 -0800 Subject: [PATCH 247/328] Linux 6.1-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 226ca8a312d9..ac2ec990422d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 59dc2a7e7c6a0d4a2a2c4dafea874af484fc0592 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 31 Oct 2022 11:17:45 +0100 Subject: [PATCH 248/328] ACPI: video: Improve Chromebook checks 2 improvements for the Chromebook handling in acpi_video_get_backlight_type(): 1. Also check for the "GOOG000C" ACPI HID used on some models 2. Move the Chromebook check to above the ACPI-video check normally Chromebooks don't have ACPI video backlight support, but when flashed with upstream coreboot builds they may have ACPI video backlight support, but native should still be used/preferred then. Suggested-by: Mr. Chromebox Signed-off-by: Hans de Goede --- drivers/acpi/video_detect.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 9cd8797d12bb..06aaec2e378b 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -670,7 +670,7 @@ static const struct dmi_system_id video_detect_dmi_table[] = { static bool google_cros_ec_present(void) { - return acpi_dev_found("GOOG0004"); + return acpi_dev_found("GOOG0004") || acpi_dev_found("GOOG000C"); } /* @@ -718,6 +718,10 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) if (apple_gmux_present()) return acpi_backlight_apple_gmux; + /* Chromebooks should always prefer native backlight control. */ + if (google_cros_ec_present() && native_available) + return acpi_backlight_native; + /* On systems with ACPI video use either native or ACPI video. */ if (video_caps & ACPI_VIDEO_BACKLIGHT) { /* @@ -735,13 +739,6 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) return acpi_backlight_video; } - /* - * Chromebooks that don't have backlight handle in ACPI table - * are supposed to use native backlight if it's available. - */ - if (google_cros_ec_present() && native_available) - return acpi_backlight_native; - /* No ACPI video (old hw), use vendor specific fw methods. */ return acpi_backlight_vendor; } From 531705a765493655472c993627106e19f7e5a6d2 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 3 Nov 2022 17:05:37 +0800 Subject: [PATCH 249/328] net: lapbether: fix issue of dev reference count leakage in lapbeth_device_event() When following tests are performed, it will cause dev reference counting leakage. a)ip link add bond2 type bond mode balance-rr b)ip link set bond2 up c)ifenslave -f bond2 rose1 d)ip link del bond2 When new bond device is created, the default type of the bond device is ether. And the bond device is up, lapbeth_device_event() receives the message and creates a new lapbeth device. In this case, the reference count value of dev is hold once. But after "ifenslave -f bond2 rose1" command is executed, the type of the bond device is changed to rose. When the bond device is unregistered, lapbeth_device_event() will not put the dev reference count. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller --- drivers/net/wan/lapbether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 960f1393595c..cb360dca3250 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -446,7 +446,7 @@ static int lapbeth_device_event(struct notifier_block *this, if (dev_net(dev) != &init_net) return NOTIFY_DONE; - if (!dev_is_ethdev(dev)) + if (!dev_is_ethdev(dev) && !lapbeth_get_x25_dev(dev)) return NOTIFY_DONE; switch (event) { From 85cbaf032d3cd9f595152625eda5d4ecb1d6d78d Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 3 Nov 2022 17:09:05 +0800 Subject: [PATCH 250/328] hamradio: fix issue of dev reference count leakage in bpq_device_event() When following tests are performed, it will cause dev reference counting leakage. a)ip link add bond2 type bond mode balance-rr b)ip link set bond2 up c)ifenslave -f bond2 rose1 d)ip link del bond2 When new bond device is created, the default type of the bond device is ether. And the bond device is up, bpq_device_event() receives the message and creates a new bpq device. In this case, the reference count value of dev is hold once. But after "ifenslave -f bond2 rose1" command is executed, the type of the bond device is changed to rose. When the bond device is unregistered, bpq_device_event() will not put the dev reference count. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller --- drivers/net/hamradio/bpqether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 30af0081e2be..83a16d10eedb 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -533,7 +533,7 @@ static int bpq_device_event(struct notifier_block *this, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - if (!dev_is_ethdev(dev)) + if (!dev_is_ethdev(dev) && !bpq_get_ax25_dev(dev)) return NOTIFY_DONE; switch (event) { From f25caaca424703d5a0607310f0452f978f1f78d9 Mon Sep 17 00:00:00 2001 From: HW He Date: Thu, 3 Nov 2022 18:40:00 +0800 Subject: [PATCH 251/328] net: wwan: iosm: fix memory leak in ipc_wwan_dellink IOSM driver registers network device without setting the needs_free_netdev flag, and does NOT call free_netdev() when unregisters network device, which causes a memory leak. This patch sets needs_free_netdev to true when registers network device, which makes netdev subsystem call free_netdev() automatically after unregister_netdevice(). Fixes: 2a54f2c77934 ("net: iosm: net driver") Signed-off-by: HW He Reviewed-by: Loic Poulain Signed-off-by: Zhaoping Shu Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index 2f1f8b5d5b59..0108d8d01ff2 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -168,6 +168,7 @@ static void ipc_wwan_setup(struct net_device *iosm_dev) iosm_dev->max_mtu = ETH_MAX_MTU; iosm_dev->flags = IFF_POINTOPOINT | IFF_NOARP; + iosm_dev->needs_free_netdev = true; iosm_dev->netdev_ops = &ipc_inm_ops; } From 668205b9c9f94d5ed6ab00cce9a46a654c2b5d16 Mon Sep 17 00:00:00 2001 From: HW He Date: Thu, 3 Nov 2022 18:54:19 +0800 Subject: [PATCH 252/328] net: wwan: mhi: fix memory leak in mhi_mbim_dellink MHI driver registers network device without setting the needs_free_netdev flag, and does NOT call free_netdev() when unregisters network device, which causes a memory leak. This patch sets needs_free_netdev to true when registers network device, which makes netdev subsystem call free_netdev() automatically after unregister_netdevice(). Fixes: aa730a9905b7 ("net: wwan: Add MHI MBIM network driver") Signed-off-by: HW He Signed-off-by: Zhaoping Shu Signed-off-by: David S. Miller --- drivers/net/wwan/mhi_wwan_mbim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index 6872782e8dd8..ef70bb7c88ad 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -582,6 +582,7 @@ static void mhi_mbim_setup(struct net_device *ndev) ndev->min_mtu = ETH_MIN_MTU; ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom; ndev->tx_queue_len = 1000; + ndev->needs_free_netdev = true; } static const struct wwan_ops mhi_mbim_wwan_ops = { From f9027f88f7d1dc2b0ed1afbf28f22992d72d7efe Mon Sep 17 00:00:00 2001 From: Zhaoping Shu Date: Thu, 3 Nov 2022 19:08:49 +0800 Subject: [PATCH 253/328] net: wwan: iosm: Remove unnecessary if_mutex lock These WWAN network interface operations (create/delete/open/close) are already protected by RTNL lock, i.e., wwan_ops.newlink(), wwan_ops.dellink(), net_device_ops.ndo_open() and net_device.ndo_stop() are called with RTNL lock held. Therefore, this patch removes the unnecessary if_mutex. Signed-off-by: Zhaoping Shu Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_wwan.c | 42 ++++----------------------- 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index 0108d8d01ff2..4c9022a93e01 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -40,13 +40,11 @@ struct iosm_netdev_priv { * @ipc_imem: Pointer to imem data-struct * @sub_netlist: List of active netdevs * @dev: Pointer device structure - * @if_mutex: Mutex used for add and remove interface id */ struct iosm_wwan { struct iosm_imem *ipc_imem; struct iosm_netdev_priv __rcu *sub_netlist[IP_MUX_SESSION_END + 1]; struct device *dev; - struct mutex if_mutex; /* Mutex used for add and remove interface id */ }; /* Bring-up the wwan net link */ @@ -55,14 +53,11 @@ static int ipc_wwan_link_open(struct net_device *netdev) struct iosm_netdev_priv *priv = wwan_netdev_drvpriv(netdev); struct iosm_wwan *ipc_wwan = priv->ipc_wwan; int if_id = priv->if_id; - int ret; if (if_id < IP_MUX_SESSION_START || if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist)) return -EINVAL; - mutex_lock(&ipc_wwan->if_mutex); - /* get channel id */ priv->ch_id = ipc_imem_sys_wwan_open(ipc_wwan->ipc_imem, if_id); @@ -70,8 +65,7 @@ static int ipc_wwan_link_open(struct net_device *netdev) dev_err(ipc_wwan->dev, "cannot connect wwan0 & id %d to the IPC mem layer", if_id); - ret = -ENODEV; - goto out; + return -ENODEV; } /* enable tx path, DL data may follow */ @@ -80,10 +74,7 @@ static int ipc_wwan_link_open(struct net_device *netdev) dev_dbg(ipc_wwan->dev, "Channel id %d allocated to if_id %d", priv->ch_id, priv->if_id); - ret = 0; -out: - mutex_unlock(&ipc_wwan->if_mutex); - return ret; + return 0; } /* Bring-down the wwan net link */ @@ -93,11 +84,9 @@ static int ipc_wwan_link_stop(struct net_device *netdev) netif_stop_queue(netdev); - mutex_lock(&priv->ipc_wwan->if_mutex); ipc_imem_sys_wwan_close(priv->ipc_wwan->ipc_imem, priv->if_id, priv->ch_id); priv->ch_id = -1; - mutex_unlock(&priv->ipc_wwan->if_mutex); return 0; } @@ -190,26 +179,17 @@ static int ipc_wwan_newlink(void *ctxt, struct net_device *dev, priv->netdev = dev; priv->ipc_wwan = ipc_wwan; - mutex_lock(&ipc_wwan->if_mutex); - if (rcu_access_pointer(ipc_wwan->sub_netlist[if_id])) { - err = -EBUSY; - goto out_unlock; - } + if (rcu_access_pointer(ipc_wwan->sub_netlist[if_id])) + return -EBUSY; err = register_netdevice(dev); if (err) - goto out_unlock; + return err; rcu_assign_pointer(ipc_wwan->sub_netlist[if_id], priv); - mutex_unlock(&ipc_wwan->if_mutex); - netif_device_attach(dev); return 0; - -out_unlock: - mutex_unlock(&ipc_wwan->if_mutex); - return err; } static void ipc_wwan_dellink(void *ctxt, struct net_device *dev, @@ -223,17 +203,12 @@ static void ipc_wwan_dellink(void *ctxt, struct net_device *dev, if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist))) return; - mutex_lock(&ipc_wwan->if_mutex); - if (WARN_ON(rcu_access_pointer(ipc_wwan->sub_netlist[if_id]) != priv)) - goto unlock; + return; RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL); /* unregistering includes synchronize_net() */ unregister_netdevice_queue(dev, head); - -unlock: - mutex_unlock(&ipc_wwan->if_mutex); } static const struct wwan_ops iosm_wwan_ops = { @@ -324,12 +299,9 @@ struct iosm_wwan *ipc_wwan_init(struct iosm_imem *ipc_imem, struct device *dev) ipc_wwan->dev = dev; ipc_wwan->ipc_imem = ipc_imem; - mutex_init(&ipc_wwan->if_mutex); - /* WWAN core will create a netdev for the default IP MUX channel */ if (wwan_register_ops(ipc_wwan->dev, &iosm_wwan_ops, ipc_wwan, IP_MUX_SESSION_DEFAULT)) { - mutex_destroy(&ipc_wwan->if_mutex); kfree(ipc_wwan); return NULL; } @@ -342,7 +314,5 @@ void ipc_wwan_deinit(struct iosm_wwan *ipc_wwan) /* This call will remove all child netdev(s) */ wwan_unregister_ops(ipc_wwan->dev); - mutex_destroy(&ipc_wwan->if_mutex); - kfree(ipc_wwan); } From 0c175da7b0378445f5ef53904247cfbfb87e0b78 Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Fri, 4 Nov 2022 10:27:23 +0800 Subject: [PATCH 254/328] tcp: prohibit TCP_REPAIR_OPTIONS if data was already sent If setsockopt with option name of TCP_REPAIR_OPTIONS and opt_code of TCPOPT_SACK_PERM is called to enable sack after data is sent and dupacks are received , it will trigger a warning in function tcp_verify_left_out() as follows: ============================================ WARNING: CPU: 8 PID: 0 at net/ipv4/tcp_input.c:2132 tcp_timeout_mark_lost+0x154/0x160 tcp_enter_loss+0x2b/0x290 tcp_retransmit_timer+0x50b/0x640 tcp_write_timer_handler+0x1c8/0x340 tcp_write_timer+0xe5/0x140 call_timer_fn+0x3a/0x1b0 __run_timers.part.0+0x1bf/0x2d0 run_timer_softirq+0x43/0xb0 __do_softirq+0xfd/0x373 __irq_exit_rcu+0xf6/0x140 The warning is caused in the following steps: 1. a socket named socketA is created 2. socketA enters repair mode without build a connection 3. socketA calls connect() and its state is changed to TCP_ESTABLISHED directly 4. socketA leaves repair mode 5. socketA calls sendmsg() to send data, packets_out and sack_outs(dup ack receives) increase 6. socketA enters repair mode again 7. socketA calls setsockopt with TCPOPT_SACK_PERM to enable sack 8. retransmit timer expires, it calls tcp_timeout_mark_lost(), lost_out increases 9. sack_outs + lost_out > packets_out triggers since lost_out and sack_outs increase repeatly In function tcp_timeout_mark_lost(), tp->sacked_out will be cleared if Step7 not happen and the warning will not be triggered. As suggested by Denis and Eric, TCP_REPAIR_OPTIONS should be prohibited if data was already sent. socket-tcp tests in CRIU has been tested as follows: $ sudo ./test/zdtm.py run -t zdtm/static/socket-tcp* --keep-going \ --ignore-taint socket-tcp* represent all socket-tcp tests in test/zdtm/static/. Fixes: b139ba4e90dc ("tcp: Repair connection-time negotiated parameters") Signed-off-by: Lu Wei Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ef14efa1fb70..54836a6b81d6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3647,7 +3647,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_REPAIR_OPTIONS: if (!tp->repair) err = -EINVAL; - else if (sk->sk_state == TCP_ESTABLISHED) + else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent) err = tcp_repair_options_est(sk, optval, optlen); else err = -EPERM; From 454d61a56d5e4c8cd9905f81d475d23f86f370af Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 4 Nov 2022 18:06:29 +0100 Subject: [PATCH 255/328] ACPI: video: Make acpi_video_backlight_use_native() always return true Testing has shown that there are quite a few laptop models which rely on native backlight control and which do not support ACPI video bus backlight control, causing __acpi_video_get_backlight_type() to return vendor. Known Windows laptop models affected by this are: Acer Aspire 1640 HP Compaq nc6120 IBM ThinkPad X40 System76 Starling Star1 and the following MacBook models are affected too: Apple MacBook 2.1 Apple MacBook 4.1 Apple MacBook Pro 7.1 the list of affected Windows laptop models is likely just the top of the iceberg. So for now lets undo the change to not register native backlight class devices when __acpi_video_get_backlight_type() != native. Since as part of the backlight-detect refactor the detection code now relies on the GPU drivers calling acpi_video_backlight_use_native() to learn that native backlight support is available we cannot just remove the acpi_video_backlight_use_native() calls from the GPU drivers. Instead modify acpi_video_backlight_use_native() to always return true for now. This is meant as a temporary work-around, which will be removed again when the heuristics from __acpi_video_get_backlight_type() have been improved so that they will return native on affected models. Reported-by: Matthew Garrett Reported-by: John Warriner Reported-by: Scott Ostrander Reported-by: Matthias Rampke Reported-by: Milan Hodoscek Signed-off-by: Hans de Goede --- drivers/acpi/video_detect.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 06aaec2e378b..c02e960cdbe3 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -751,6 +751,18 @@ EXPORT_SYMBOL(acpi_video_get_backlight_type); bool acpi_video_backlight_use_native(void) { - return __acpi_video_get_backlight_type(true) == acpi_backlight_native; + /* + * Call __acpi_video_get_backlight_type() to let it know that + * a native backlight is available. + */ + __acpi_video_get_backlight_type(true); + + /* + * For now just always return true. There is a whole bunch of laptop + * models where (video_caps & ACPI_VIDEO_BACKLIGHT) is false causing + * __acpi_video_get_backlight_type() to return vendor, while these + * models only have a native backlight control. + */ + return true; } EXPORT_SYMBOL(acpi_video_backlight_use_native); From f46acc1efd4b5846de9fa05f966e504f328f34a6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 31 Oct 2022 21:20:59 +0100 Subject: [PATCH 256/328] ACPI: video: Add backlight=native DMI quirk for Dell G15 5515 The Dell G15 5515 has the WMI interface (and WMI call returns) expected by the nvidia-wmi-ec-backlight interface. But the backlight class device registered by the nvidia-wmi-ec-backlight driver does not actually work. The amdgpu_bl0 native GPU backlight class device does actually work, add a backlight=native DMI quirk for this. Reported-by: Iris Reviewed-by: Daniel Dadap Signed-off-by: Hans de Goede --- Changes in v2: - Add a comment that this needs to be revisited when dynamic-mux support gets added (suggested by: Daniel Dadap) --- drivers/acpi/video_detect.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index c02e960cdbe3..b2a616287638 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -645,6 +645,20 @@ static const struct dmi_system_id video_detect_dmi_table[] = { }, }, + /* + * Models which have nvidia-ec-wmi support, but should not use it. + * Note this indicates a likely firmware bug on these models and should + * be revisited if/when Linux gets support for dynamic mux mode. + */ + { + .callback = video_detect_force_native, + /* Dell G15 5515 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5515"), + }, + }, + /* * Desktops which falsely report a backlight and which our heuristics * for this do not catch. From a231224a601c1924b9df620281ad04472900d75f Mon Sep 17 00:00:00 2001 From: Manyi Li Date: Tue, 18 Oct 2022 17:53:23 +0800 Subject: [PATCH 257/328] platform/x86: ideapad-laptop: Disable touchpad_switch Ideapads for "Lenovo Yoga 3 Pro 1370" and "ZhaoYang K4e-IML" do not use EC to switch touchpad. Reading VPCCMD_R_TOUCHPAD will return zero thus touchpad may be blocked unexpectedly. Signed-off-by: Manyi Li Link: https://lore.kernel.org/r/20221018095323.14591-1-limanyi@uniontech.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/ideapad-laptop.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index abd0c81d62c4..33b3dfdd1b08 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1533,6 +1533,24 @@ static const struct dmi_system_id hw_rfkill_list[] = { {} }; +static const struct dmi_system_id no_touchpad_switch_list[] = { + { + .ident = "Lenovo Yoga 3 Pro 1370", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 3"), + }, + }, + { + .ident = "ZhaoYang K4e-IML", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ZhaoYang K4e-IML"), + }, + }, + {} +}; + static void ideapad_check_features(struct ideapad_private *priv) { acpi_handle handle = priv->adev->handle; @@ -1541,7 +1559,12 @@ static void ideapad_check_features(struct ideapad_private *priv) priv->features.hw_rfkill_switch = dmi_check_system(hw_rfkill_list); /* Most ideapads with ELAN0634 touchpad don't use EC touchpad switch */ - priv->features.touchpad_ctrl_via_ec = !acpi_dev_present("ELAN0634", NULL, -1); + if (acpi_dev_present("ELAN0634", NULL, -1)) + priv->features.touchpad_ctrl_via_ec = 0; + else if (dmi_check_system(no_touchpad_switch_list)) + priv->features.touchpad_ctrl_via_ec = 0; + else + priv->features.touchpad_ctrl_via_ec = 1; if (!read_ec_data(handle, VPCCMD_R_FAN, &val)) priv->features.fan_mode = true; From 0df044b34bf33e7e35c32b3bf6747fde6279c162 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 25 Oct 2022 16:11:31 +0200 Subject: [PATCH 258/328] platform/x86: touchscreen_dmi: Add info for the RCA Cambio W101 v2 2-in-1 Add touchscreen info for the RCA Cambio W101 v2 2-in-1. Link: https://github.com/onitake/gsl-firmware/discussions/193 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221025141131.509211-1-hdegoede@redhat.com --- drivers/platform/x86/touchscreen_dmi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index bc97bfa8e8a6..baae3120efd0 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -770,6 +770,22 @@ static const struct ts_dmi_data predia_basic_data = { .properties = predia_basic_props, }; +static const struct property_entry rca_cambio_w101_v2_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 4), + PROPERTY_ENTRY_U32("touchscreen-min-y", 20), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1644), + PROPERTY_ENTRY_U32("touchscreen-size-y", 874), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rca-cambio-w101-v2.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data rca_cambio_w101_v2_data = { + .acpi_name = "MSSL1680:00", + .properties = rca_cambio_w101_v2_props, +}; + static const struct property_entry rwc_nanote_p8_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-y", 46), PROPERTY_ENTRY_U32("touchscreen-size-x", 1728), @@ -1409,6 +1425,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "0E57"), }, }, + { + /* RCA Cambio W101 v2 */ + /* https://github.com/onitake/gsl-firmware/discussions/193 */ + .driver_data = (void *)&rca_cambio_w101_v2_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "RCA"), + DMI_MATCH(DMI_PRODUCT_NAME, "W101SA23T1"), + }, + }, { /* RWC NANOTE P8 */ .driver_data = (void *)&rwc_nanote_p8_data, From 1598bfa8e1faa932de42e1ee7628a1c4c4263f0a Mon Sep 17 00:00:00 2001 From: Jorge Lopez Date: Fri, 28 Oct 2022 10:55:27 -0500 Subject: [PATCH 259/328] platform/x86: hp_wmi: Fix rfkill causing soft blocked wifi After upgrading BIOS to U82 01.02.01 Rev.A, the console is flooded strange char "^@" which printed out every second and makes login nearly impossible. Also the below messages were shown both in console and journal/dmesg every second: usb 1-3: Device not responding to setup address. usb 1-3: device not accepting address 4, error -71 usb 1-3: device descriptor read/all, error -71 usb usb1-port3: unable to enumerate USB device Wifi is soft blocked by checking rfkill. When unblocked manually, after few seconds it would be soft blocked again. So I was suspecting something triggered rfkill to soft block wifi. At the end it was fixed by removing hp_wmi module. The root cause is the way hp-wmi driver handles command 1B on post-2009 BIOS. In pre-2009 BIOS, command 1Bh return 0x4 to indicate that BIOS no longer controls the power for the wireless devices. Signed-off-by: Jorge Lopez Link: https://bugzilla.kernel.org/show_bug.cgi?id=216468 Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20221028155527.7724-1-jorge.lopez2@hp.com Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/hp-wmi.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 627a6d0eaf83..12449038bed1 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -1300,8 +1300,16 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) wwan_rfkill = NULL; rfkill2_count = 0; - if (hp_wmi_rfkill_setup(device)) - hp_wmi_rfkill2_setup(device); + /* + * In pre-2009 BIOS, command 1Bh return 0x4 to indicate that + * BIOS no longer controls the power for the wireless + * devices. All features supported by this command will no + * longer be supported. + */ + if (!hp_wmi_bios_2009_later()) { + if (hp_wmi_rfkill_setup(device)) + hp_wmi_rfkill2_setup(device); + } err = hp_wmi_hwmon_init(); From bcdfa1f77ea7f67368d20384932a9d1e3047ddd2 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Fri, 4 Nov 2022 20:42:28 -0700 Subject: [PATCH 260/328] platform/x86/intel/pmt: Sapphire Rapids PMT errata fix On Sapphire Rapids, due to a hardware issue affecting the PUNIT telemetry region, reads that are not done in QWORD quantities and alignment may return incorrect data. Use a custom 64-bit copy for this region. Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20221105034228.1376677-1-david.e.box@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmt/class.c | 31 +++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/pmt/class.c b/drivers/platform/x86/intel/pmt/class.c index 53d7fd2943b4..46598dcb634a 100644 --- a/drivers/platform/x86/intel/pmt/class.c +++ b/drivers/platform/x86/intel/pmt/class.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -19,6 +20,7 @@ #define PMT_XA_START 0 #define PMT_XA_MAX INT_MAX #define PMT_XA_LIMIT XA_LIMIT(PMT_XA_START, PMT_XA_MAX) +#define GUID_SPR_PUNIT 0x9956f43f bool intel_pmt_is_early_client_hw(struct device *dev) { @@ -33,6 +35,29 @@ bool intel_pmt_is_early_client_hw(struct device *dev) } EXPORT_SYMBOL_GPL(intel_pmt_is_early_client_hw); +static inline int +pmt_memcpy64_fromio(void *to, const u64 __iomem *from, size_t count) +{ + int i, remain; + u64 *buf = to; + + if (!IS_ALIGNED((unsigned long)from, 8)) + return -EFAULT; + + for (i = 0; i < count/8; i++) + buf[i] = readq(&from[i]); + + /* Copy any remaining bytes */ + remain = count % 8; + if (remain) { + u64 tmp = readq(&from[i]); + + memcpy(&buf[i], &tmp, remain); + } + + return count; +} + /* * sysfs */ @@ -54,7 +79,11 @@ intel_pmt_read(struct file *filp, struct kobject *kobj, if (count > entry->size - off) count = entry->size - off; - memcpy_fromio(buf, entry->base + off, count); + if (entry->guid == GUID_SPR_PUNIT) + /* PUNIT on SPR only supports aligned 64-bit read */ + count = pmt_memcpy64_fromio(buf, entry->base + off, count); + else + memcpy_fromio(buf, entry->base + off, count); return count; } From a977ece5773b6746b814aac410da4776023db239 Mon Sep 17 00:00:00 2001 From: Ivan Hu Date: Wed, 2 Nov 2022 10:05:48 +0800 Subject: [PATCH 261/328] platform/x86/intel/hid: Add some ACPI device IDs Add INTC1076 (JasonLake), INTC1077 (MeteorLake) and INTC1078 (RaptorLake) devices IDs. Signed-off-by: Ivan Hu Link: https://lore.kernel.org/r/20221102020548.5225-1-ivan.hu@canonical.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/hid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c index 79cff1fc675c..b6313ecd190c 100644 --- a/drivers/platform/x86/intel/hid.c +++ b/drivers/platform/x86/intel/hid.c @@ -27,6 +27,9 @@ static const struct acpi_device_id intel_hid_ids[] = { {"INTC1051", 0}, {"INTC1054", 0}, {"INTC1070", 0}, + {"INTC1076", 0}, + {"INTC1077", 0}, + {"INTC1078", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); From 53eb64c88f17b14b324fbdfd417f56c5d3fa6fee Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 4 Nov 2022 17:49:16 +0200 Subject: [PATCH 262/328] platform/x86: p2sb: Don't fail if unknown CPU is found We have accessing P2SB from a very few places for quite known hardware. When a new SoC appears in intel-family.h it's not obvious that it needs to be added to p2sb.c as well. Instead, provide default BDF and refactor p2sb_get_devfn() to always succeed. If in the future we would need to exclude something, we may add a list of unsupported IDs. Without this change the iTCO on Intel Comet Lake SoCs became unavailable: i801_smbus 0000:00:1f.4: failed to create iTCO device Fixes: 5c7b9167ddf8 ("i2c: i801: convert to use common P2SB accessor") Reported-and-tested-by: Jarkko Nikula Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221104154916.35231-1-andriy.shevchenko@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/p2sb.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 384d0962ae93..1cf2471d54dd 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -19,26 +19,23 @@ #define P2SBC 0xe0 #define P2SBC_HIDE BIT(8) +#define P2SB_DEVFN_DEFAULT PCI_DEVFN(31, 1) + static const struct x86_cpu_id p2sb_cpu_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, PCI_DEVFN(13, 0)), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, PCI_DEVFN(31, 1)), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, PCI_DEVFN(31, 1)), - X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, PCI_DEVFN(31, 1)), - X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, PCI_DEVFN(31, 1)), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, PCI_DEVFN(31, 1)), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, PCI_DEVFN(31, 1)), {} }; static int p2sb_get_devfn(unsigned int *devfn) { + unsigned int fn = P2SB_DEVFN_DEFAULT; const struct x86_cpu_id *id; id = x86_match_cpu(p2sb_cpu_ids); - if (!id) - return -ENODEV; + if (id) + fn = (unsigned int)id->driver_data; - *devfn = (unsigned int)id->driver_data; + *devfn = fn; return 0; } From c23fb2c82267638f9d206cb96bb93e1f93ad7828 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 4 Nov 2022 11:32:16 +0100 Subject: [PATCH 263/328] ipv6: addrlabel: fix infoleak when sending struct ifaddrlblmsg to network When copying a `struct ifaddrlblmsg` to the network, __ifal_reserved remained uninitialized, resulting in a 1-byte infoleak: BUG: KMSAN: kernel-network-infoleak in __netdev_start_xmit ./include/linux/netdevice.h:4841 __netdev_start_xmit ./include/linux/netdevice.h:4841 netdev_start_xmit ./include/linux/netdevice.h:4857 xmit_one net/core/dev.c:3590 dev_hard_start_xmit+0x1dc/0x800 net/core/dev.c:3606 __dev_queue_xmit+0x17e8/0x4350 net/core/dev.c:4256 dev_queue_xmit ./include/linux/netdevice.h:3009 __netlink_deliver_tap_skb net/netlink/af_netlink.c:307 __netlink_deliver_tap+0x728/0xad0 net/netlink/af_netlink.c:325 netlink_deliver_tap net/netlink/af_netlink.c:338 __netlink_sendskb net/netlink/af_netlink.c:1263 netlink_sendskb+0x1d9/0x200 net/netlink/af_netlink.c:1272 netlink_unicast+0x56d/0xf50 net/netlink/af_netlink.c:1360 nlmsg_unicast ./include/net/netlink.h:1061 rtnl_unicast+0x5a/0x80 net/core/rtnetlink.c:758 ip6addrlbl_get+0xfad/0x10f0 net/ipv6/addrlabel.c:628 rtnetlink_rcv_msg+0xb33/0x1570 net/core/rtnetlink.c:6082 ... Uninit was created at: slab_post_alloc_hook+0x118/0xb00 mm/slab.h:742 slab_alloc_node mm/slub.c:3398 __kmem_cache_alloc_node+0x4f2/0x930 mm/slub.c:3437 __do_kmalloc_node mm/slab_common.c:954 __kmalloc_node_track_caller+0x117/0x3d0 mm/slab_common.c:975 kmalloc_reserve net/core/skbuff.c:437 __alloc_skb+0x27a/0xab0 net/core/skbuff.c:509 alloc_skb ./include/linux/skbuff.h:1267 nlmsg_new ./include/net/netlink.h:964 ip6addrlbl_get+0x490/0x10f0 net/ipv6/addrlabel.c:608 rtnetlink_rcv_msg+0xb33/0x1570 net/core/rtnetlink.c:6082 netlink_rcv_skb+0x299/0x550 net/netlink/af_netlink.c:2540 rtnetlink_rcv+0x26/0x30 net/core/rtnetlink.c:6109 netlink_unicast_kernel net/netlink/af_netlink.c:1319 netlink_unicast+0x9ab/0xf50 net/netlink/af_netlink.c:1345 netlink_sendmsg+0xebc/0x10f0 net/netlink/af_netlink.c:1921 ... This patch ensures that the reserved field is always initialized. Reported-by: syzbot+3553517af6020c4f2813f1003fe76ef3cbffe98d@syzkaller.appspotmail.com Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.") Signed-off-by: Alexander Potapenko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrlabel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 8a22486cf270..17ac45aa7194 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -437,6 +437,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, { struct ifaddrlblmsg *ifal = nlmsg_data(nlh); ifal->ifal_family = AF_INET6; + ifal->__ifal_reserved = 0; ifal->ifal_prefixlen = prefixlen; ifal->ifal_flags = 0; ifal->ifal_index = ifindex; From a3335faebe1608f3e78e24f09501c9f7d5ebf87a Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Mon, 31 Oct 2022 11:30:53 +0800 Subject: [PATCH 264/328] can: af_can: can_exit(): add missing dev_remove_pack() of canxl_packet In can_init(), dev_add_pack(&canxl_packet) is added but not removed in can_exit(). It breaks the packet handler list and can make kernel panic when can_init() is called for the second time. | > modprobe can && rmmod can | > rmmod xxx && modprobe can | | BUG: unable to handle page fault for address: fffffbfff807d7f4 | RIP: 0010:dev_add_pack+0x133/0x1f0 | Call Trace: | | can_init+0xaa/0x1000 [can] | do_one_initcall+0xd3/0x4e0 | ... Fixes: fb08cba12b52 ("can: canxl: update CAN infrastructure for CAN XL frames") Signed-off-by: Chen Zhongjin Acked-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20221031033053.37849-1-chenzhongjin@huawei.com [mkl: adjust subject and commit message] Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/can/af_can.c b/net/can/af_can.c index 9503ab10f9b8..5e9e3e1e9825 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -902,6 +902,7 @@ out_pernet: static __exit void can_exit(void) { /* protocol unregister */ + dev_remove_pack(&canxl_packet); dev_remove_pack(&canfd_packet); dev_remove_pack(&can_packet); sock_unregister(PF_CAN); From 8aa59e355949442c408408c2d836e561794c40a1 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 28 Oct 2022 16:56:50 +0800 Subject: [PATCH 265/328] can: af_can: fix NULL pointer dereference in can_rx_register() It causes NULL pointer dereference when testing as following: (a) use syscall(__NR_socket, 0x10ul, 3ul, 0) to create netlink socket. (b) use syscall(__NR_sendmsg, ...) to create bond link device and vxcan link device, and bind vxcan device to bond device (can also use ifenslave command to bind vxcan device to bond device). (c) use syscall(__NR_socket, 0x1dul, 3ul, 1) to create CAN socket. (d) use syscall(__NR_bind, ...) to bind the bond device to CAN socket. The bond device invokes the can-raw protocol registration interface to receive CAN packets. However, ml_priv is not allocated to the dev, dev_rcv_lists is assigned to NULL in can_rx_register(). In this case, it will occur the NULL pointer dereference issue. The following is the stack information: BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 122a4067 P4D 122a4067 PUD 1223c067 PMD 0 Oops: 0000 [#1] PREEMPT SMP RIP: 0010:can_rx_register+0x12d/0x1e0 Call Trace: raw_enable_filters+0x8d/0x120 raw_enable_allfilters+0x3b/0x130 raw_bind+0x118/0x4f0 __sys_bind+0x163/0x1a0 __x64_sys_bind+0x1e/0x30 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 4e096a18867a ("net: introduce CAN specific pointer in the struct net_device") Signed-off-by: Zhengchao Shao Reviewed-by: Marc Kleine-Budde Link: https://lore.kernel.org/all/20221028085650.170470-1-shaozhengchao@huawei.com Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 5e9e3e1e9825..27dcdcc0b808 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -450,7 +450,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, /* insert new receiver (dev,canid,mask) -> (func,data) */ - if (dev && dev->type != ARPHRD_CAN) + if (dev && (dev->type != ARPHRD_CAN || !can_get_ml_priv(dev))) return -ENODEV; if (dev && !net_eq(net, dev_net(dev))) From 866337865f3747c68a3e7bb837611e39cec1ecd6 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 4 Nov 2022 15:25:51 +0100 Subject: [PATCH 266/328] can: isotp: fix tx state handling for echo tx processing In commit 4b7fe92c0690 ("can: isotp: add local echo tx processing for consecutive frames") the data flow for consecutive frames (CF) has been reworked to improve the reliability of long data transfers. This rework did not touch the transmission and the tx state changes of single frame (SF) transfers which likely led to the WARN in the isotp_tx_timer_handler() catching a wrong tx state. This patch makes use of the improved frame processing for SF frames and sets the ISOTP_SENDING state in isotp_sendmsg() within the cmpxchg() condition handling. A review of the state machine and the timer handling additionally revealed a missing echo timeout handling in the case of the burst mode in isotp_rcv_echo() and removes a potential timer configuration uncertainty in isotp_rcv_fc() when the receiver requests consecutive frames. Fixes: 4b7fe92c0690 ("can: isotp: add local echo tx processing for consecutive frames") Link: https://lore.kernel.org/linux-can/CAO4mrfe3dG7cMP1V5FLUkw7s+50c9vichigUMQwsxX4M=45QEw@mail.gmail.com/T/#u Reported-by: Wei Chen Cc: stable@vger.kernel.org # v6.0 Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20221104142551.16924-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 71 ++++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index a9d1357f8489..608f8c24ae46 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -111,6 +111,9 @@ MODULE_ALIAS("can-proto-6"); #define ISOTP_FC_WT 1 /* wait */ #define ISOTP_FC_OVFLW 2 /* overflow */ +#define ISOTP_FC_TIMEOUT 1 /* 1 sec */ +#define ISOTP_ECHO_TIMEOUT 2 /* 2 secs */ + enum { ISOTP_IDLE = 0, ISOTP_WAIT_FIRST_FC, @@ -258,7 +261,8 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus) so->lastrxcf_tstamp = ktime_set(0, 0); /* start rx timeout watchdog */ - hrtimer_start(&so->rxtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT); + hrtimer_start(&so->rxtimer, ktime_set(ISOTP_FC_TIMEOUT, 0), + HRTIMER_MODE_REL_SOFT); return 0; } @@ -344,6 +348,8 @@ static int check_pad(struct isotp_sock *so, struct canfd_frame *cf, return 0; } +static void isotp_send_cframe(struct isotp_sock *so); + static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae) { struct sock *sk = &so->sk; @@ -398,14 +404,15 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae) case ISOTP_FC_CTS: so->tx.bs = 0; so->tx.state = ISOTP_SENDING; - /* start cyclic timer for sending CF frame */ - hrtimer_start(&so->txtimer, so->tx_gap, + /* send CF frame and enable echo timeout handling */ + hrtimer_start(&so->txtimer, ktime_set(ISOTP_ECHO_TIMEOUT, 0), HRTIMER_MODE_REL_SOFT); + isotp_send_cframe(so); break; case ISOTP_FC_WT: /* start timer to wait for next FC frame */ - hrtimer_start(&so->txtimer, ktime_set(1, 0), + hrtimer_start(&so->txtimer, ktime_set(ISOTP_FC_TIMEOUT, 0), HRTIMER_MODE_REL_SOFT); break; @@ -600,7 +607,7 @@ static int isotp_rcv_cf(struct sock *sk, struct canfd_frame *cf, int ae, /* perform blocksize handling, if enabled */ if (!so->rxfc.bs || ++so->rx.bs < so->rxfc.bs) { /* start rx timeout watchdog */ - hrtimer_start(&so->rxtimer, ktime_set(1, 0), + hrtimer_start(&so->rxtimer, ktime_set(ISOTP_FC_TIMEOUT, 0), HRTIMER_MODE_REL_SOFT); return 0; } @@ -829,7 +836,7 @@ static void isotp_rcv_echo(struct sk_buff *skb, void *data) struct isotp_sock *so = isotp_sk(sk); struct canfd_frame *cf = (struct canfd_frame *)skb->data; - /* only handle my own local echo skb's */ + /* only handle my own local echo CF/SF skb's (no FF!) */ if (skb->sk != sk || so->cfecho != *(u32 *)cf->data) return; @@ -849,13 +856,16 @@ static void isotp_rcv_echo(struct sk_buff *skb, void *data) if (so->txfc.bs && so->tx.bs >= so->txfc.bs) { /* stop and wait for FC with timeout */ so->tx.state = ISOTP_WAIT_FC; - hrtimer_start(&so->txtimer, ktime_set(1, 0), + hrtimer_start(&so->txtimer, ktime_set(ISOTP_FC_TIMEOUT, 0), HRTIMER_MODE_REL_SOFT); return; } /* no gap between data frames needed => use burst mode */ if (!so->tx_gap) { + /* enable echo timeout handling */ + hrtimer_start(&so->txtimer, ktime_set(ISOTP_ECHO_TIMEOUT, 0), + HRTIMER_MODE_REL_SOFT); isotp_send_cframe(so); return; } @@ -879,7 +889,7 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer) /* start timeout for unlikely lost echo skb */ hrtimer_set_expires(&so->txtimer, ktime_add(ktime_get(), - ktime_set(2, 0))); + ktime_set(ISOTP_ECHO_TIMEOUT, 0))); restart = HRTIMER_RESTART; /* push out the next consecutive frame */ @@ -907,7 +917,8 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer) break; default: - WARN_ON_ONCE(1); + WARN_ONCE(1, "can-isotp: tx timer state %08X cfecho %08X\n", + so->tx.state, so->cfecho); } return restart; @@ -923,7 +934,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) struct canfd_frame *cf; int ae = (so->opt.flags & CAN_ISOTP_EXTEND_ADDR) ? 1 : 0; int wait_tx_done = (so->opt.flags & CAN_ISOTP_WAIT_TX_DONE) ? 1 : 0; - s64 hrtimer_sec = 0; + s64 hrtimer_sec = ISOTP_ECHO_TIMEOUT; int off; int err; @@ -942,6 +953,8 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); if (err) goto err_out; + + so->tx.state = ISOTP_SENDING; } if (!size || size > MAX_MSG_LENGTH) { @@ -986,6 +999,10 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) cf = (struct canfd_frame *)skb->data; skb_put_zero(skb, so->ll.mtu); + /* cfecho should have been zero'ed by init / former isotp_rcv_echo() */ + if (so->cfecho) + pr_notice_once("can-isotp: uninit cfecho %08X\n", so->cfecho); + /* check for single frame transmission depending on TX_DL */ if (size <= so->tx.ll_dl - SF_PCI_SZ4 - ae - off) { /* The message size generally fits into a SingleFrame - good. @@ -1011,11 +1028,8 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) else cf->data[ae] |= size; - so->tx.state = ISOTP_IDLE; - wake_up_interruptible(&so->wait); - - /* don't enable wait queue for a single frame transmission */ - wait_tx_done = 0; + /* set CF echo tag for isotp_rcv_echo() (SF-mode) */ + so->cfecho = *(u32 *)cf->data; } else { /* send first frame */ @@ -1031,31 +1045,23 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) /* disable wait for FCs due to activated block size */ so->txfc.bs = 0; - /* cfecho should have been zero'ed by init */ - if (so->cfecho) - pr_notice_once("can-isotp: no fc cfecho %08X\n", - so->cfecho); - - /* set consecutive frame echo tag */ + /* set CF echo tag for isotp_rcv_echo() (CF-mode) */ so->cfecho = *(u32 *)cf->data; - - /* switch directly to ISOTP_SENDING state */ - so->tx.state = ISOTP_SENDING; - - /* start timeout for unlikely lost echo skb */ - hrtimer_sec = 2; } else { /* standard flow control check */ so->tx.state = ISOTP_WAIT_FIRST_FC; /* start timeout for FC */ - hrtimer_sec = 1; - } + hrtimer_sec = ISOTP_FC_TIMEOUT; - hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0), - HRTIMER_MODE_REL_SOFT); + /* no CF echo tag for isotp_rcv_echo() (FF-mode) */ + so->cfecho = 0; + } } + hrtimer_start(&so->txtimer, ktime_set(hrtimer_sec, 0), + HRTIMER_MODE_REL_SOFT); + /* send the first or only CAN frame */ cf->flags = so->ll.tx_flags; @@ -1068,8 +1074,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) __func__, ERR_PTR(err)); /* no transmission -> no timeout monitoring */ - if (hrtimer_sec) - hrtimer_cancel(&so->txtimer); + hrtimer_cancel(&so->txtimer); /* reset consecutive frame echo tag */ so->cfecho = 0; From 3eb3d283e8579a22b81dd2ac3987b77465b2a22f Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 4 Nov 2022 08:50:00 +0100 Subject: [PATCH 267/328] can: j1939: j1939_send_one(): fix missing CAN header initialization The read access to struct canxl_frame::len inside of a j1939 created skbuff revealed a missing initialization of reserved and later filled elements in struct can_frame. This patch initializes the 8 byte CAN header with zero. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Cc: Oleksij Rempel Link: https://lore.kernel.org/linux-can/20221104052235.GA6474@pengutronix.de Reported-by: syzbot+d168ec0caca4697e03b1@syzkaller.appspotmail.com Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20221104075000.105414-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/j1939/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 144c86b0e3ff..821d4ff303b3 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -336,6 +336,9 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb) /* re-claim the CAN_HDR from the SKB */ cf = skb_push(skb, J1939_CAN_HDR); + /* initialize header structure */ + memset(cf, 0, J1939_CAN_HDR); + /* make it a full can frame again */ skb_put(skb, J1939_CAN_FTR + (8 - dlc)); From ae64438be1923e3c1102d90fd41db7afcfaf54cc Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 2 Nov 2022 10:54:31 +0100 Subject: [PATCH 268/328] can: dev: fix skb drop check In commit a6d190f8c767 ("can: skb: drop tx skb if in listen only mode") the priv->ctrlmode element is read even on virtual CAN interfaces that do not create the struct can_priv at startup. This out-of-bounds read may lead to CAN frame drops for virtual CAN interfaces like vcan and vxcan. This patch mainly reverts the original commit and adds a new helper for CAN interface drivers that provide the required information in struct can_priv. Fixes: a6d190f8c767 ("can: skb: drop tx skb if in listen only mode") Reported-by: Dariusz Stojaczyk Cc: Vincent Mailhol Cc: Max Staudt Signed-off-by: Oliver Hartkopp Acked-by: Vincent Mailhol Link: https://lore.kernel.org/all/20221102095431.36831-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org # 6.0.x [mkl: patch pch_can, too] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/at91_can.c | 2 +- drivers/net/can/c_can/c_can_main.c | 2 +- drivers/net/can/can327.c | 2 +- drivers/net/can/cc770/cc770.c | 2 +- drivers/net/can/ctucanfd/ctucanfd_base.c | 2 +- drivers/net/can/dev/skb.c | 10 +--------- drivers/net/can/flexcan/flexcan-core.c | 2 +- drivers/net/can/grcan.c | 2 +- drivers/net/can/ifi_canfd/ifi_canfd.c | 2 +- drivers/net/can/janz-ican3.c | 2 +- drivers/net/can/kvaser_pciefd.c | 2 +- drivers/net/can/m_can/m_can.c | 2 +- drivers/net/can/mscan/mscan.c | 2 +- drivers/net/can/pch_can.c | 2 +- drivers/net/can/peak_canfd/peak_canfd.c | 2 +- drivers/net/can/rcar/rcar_can.c | 2 +- drivers/net/can/rcar/rcar_canfd.c | 2 +- drivers/net/can/sja1000/sja1000.c | 2 +- drivers/net/can/slcan/slcan-core.c | 2 +- drivers/net/can/softing/softing_main.c | 2 +- drivers/net/can/spi/hi311x.c | 2 +- drivers/net/can/spi/mcp251x.c | 2 +- drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c | 2 +- drivers/net/can/sun4i_can.c | 2 +- drivers/net/can/ti_hecc.c | 2 +- drivers/net/can/usb/ems_usb.c | 2 +- drivers/net/can/usb/esd_usb.c | 2 +- drivers/net/can/usb/etas_es58x/es58x_core.c | 2 +- drivers/net/can/usb/gs_usb.c | 2 +- drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c | 2 +- drivers/net/can/usb/mcba_usb.c | 2 +- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 +- drivers/net/can/usb/ucan.c | 2 +- drivers/net/can/usb/usb_8dev.c | 2 +- drivers/net/can/xilinx_can.c | 2 +- include/linux/can/dev.h | 16 ++++++++++++++++ 36 files changed, 51 insertions(+), 43 deletions(-) diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 3a2d109a3792..199cb200f2bd 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -452,7 +452,7 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int mb, prio; u32 reg_mid, reg_mcr; - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; mb = get_tx_next_mb(priv); diff --git a/drivers/net/can/c_can/c_can_main.c b/drivers/net/can/c_can/c_can_main.c index d6605dbb7737..c63f7fc1e691 100644 --- a/drivers/net/can/c_can/c_can_main.c +++ b/drivers/net/can/c_can/c_can_main.c @@ -457,7 +457,7 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb, struct c_can_tx_ring *tx_ring = &priv->tx; u32 idx, obj, cmd = IF_COMM_TX; - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; if (c_can_tx_busy(priv, tx_ring)) diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c index 0aa1af31d0fe..094197780776 100644 --- a/drivers/net/can/can327.c +++ b/drivers/net/can/can327.c @@ -813,7 +813,7 @@ static netdev_tx_t can327_netdev_start_xmit(struct sk_buff *skb, struct can327 *elm = netdev_priv(dev); struct can_frame *frame = (struct can_frame *)skb->data; - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; /* We shouldn't get here after a hardware fault: diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 0b9dfc76e769..30909f3aab57 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -429,7 +429,7 @@ static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev) struct cc770_priv *priv = netdev_priv(dev); unsigned int mo = obj2msgobj(CC770_OBJ_TX); - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; netif_stop_queue(dev); diff --git a/drivers/net/can/ctucanfd/ctucanfd_base.c b/drivers/net/can/ctucanfd/ctucanfd_base.c index b8da15ea6ad9..64c349fd4600 100644 --- a/drivers/net/can/ctucanfd/ctucanfd_base.c +++ b/drivers/net/can/ctucanfd/ctucanfd_base.c @@ -600,7 +600,7 @@ static netdev_tx_t ctucan_start_xmit(struct sk_buff *skb, struct net_device *nde bool ok; unsigned long flags; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; if (unlikely(!CTU_CAN_FD_TXTNF(priv))) { diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c index 791a51e2f5d6..241ec636e91f 100644 --- a/drivers/net/can/dev/skb.c +++ b/drivers/net/can/dev/skb.c @@ -5,7 +5,6 @@ */ #include -#include #include #define MOD_DESC "CAN device driver interface" @@ -337,8 +336,6 @@ static bool can_skb_headroom_valid(struct net_device *dev, struct sk_buff *skb) /* Drop a given socketbuffer if it does not contain a valid CAN frame. */ bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb) { - struct can_priv *priv = netdev_priv(dev); - switch (ntohs(skb->protocol)) { case ETH_P_CAN: if (!can_is_can_skb(skb)) @@ -359,13 +356,8 @@ bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb) goto inval_skb; } - if (!can_skb_headroom_valid(dev, skb)) { + if (!can_skb_headroom_valid(dev, skb)) goto inval_skb; - } else if (priv->ctrlmode & CAN_CTRLMODE_LISTENONLY) { - netdev_info_once(dev, - "interface in listen only mode, dropping skb\n"); - goto inval_skb; - } return false; diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c index 5ee38e586fd8..9bdadd716f4e 100644 --- a/drivers/net/can/flexcan/flexcan-core.c +++ b/drivers/net/can/flexcan/flexcan-core.c @@ -742,7 +742,7 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *de u32 ctrl = FLEXCAN_MB_CODE_TX_DATA | ((can_fd_len2dlc(cfd->len)) << 16); int i; - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; netif_stop_queue(dev); diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index 6c37aab93eb3..4bedcc3eea0d 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1345,7 +1345,7 @@ static netdev_tx_t grcan_start_xmit(struct sk_buff *skb, unsigned long flags; u32 oneshotmode = priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT; - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; /* Trying to transmit in silent mode will generate error interrupts, but diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 8d42b7e6661f..07eaf724a572 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -860,7 +860,7 @@ static netdev_tx_t ifi_canfd_start_xmit(struct sk_buff *skb, u32 txst, txid, txdlc; int i; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; /* Check if the TX buffer is full */ diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c index 71a2caae0757..0732a5092141 100644 --- a/drivers/net/can/janz-ican3.c +++ b/drivers/net/can/janz-ican3.c @@ -1693,7 +1693,7 @@ static netdev_tx_t ican3_xmit(struct sk_buff *skb, struct net_device *ndev) void __iomem *desc_addr; unsigned long flags; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; spin_lock_irqsave(&mod->lock, flags); diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 4e9680c8eb34..bcad11709bc9 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -772,7 +772,7 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, int nwords; u8 count; - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; nwords = kvaser_pciefd_prepare_tx_packet(&packet, can, skb); diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index dcb582563d5e..00d11e95fd98 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1721,7 +1721,7 @@ static netdev_tx_t m_can_start_xmit(struct sk_buff *skb, { struct m_can_classdev *cdev = netdev_priv(dev); - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; if (cdev->is_peripheral) { diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c index 2119fbb287ef..a6829cdc0e81 100644 --- a/drivers/net/can/mscan/mscan.c +++ b/drivers/net/can/mscan/mscan.c @@ -191,7 +191,7 @@ static netdev_tx_t mscan_start_xmit(struct sk_buff *skb, struct net_device *dev) int i, rtr, buf_id; u32 can_id; - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; out_8(®s->cantier, 0); diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index 0558ff67ec6a..2a44b2803e55 100644 --- a/drivers/net/can/pch_can.c +++ b/drivers/net/can/pch_can.c @@ -882,7 +882,7 @@ static netdev_tx_t pch_xmit(struct sk_buff *skb, struct net_device *ndev) int i; u32 id2; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; tx_obj_no = priv->tx_obj; diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index f8420cc1d907..31c9c127e24b 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -651,7 +651,7 @@ static netdev_tx_t peak_canfd_start_xmit(struct sk_buff *skb, int room_left; u8 len; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; msg_size = ALIGN(sizeof(*msg) + cf->len, 4); diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index 6ee968c59ac9..cc43c9c5e38c 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -590,7 +590,7 @@ static netdev_tx_t rcar_can_start_xmit(struct sk_buff *skb, struct can_frame *cf = (struct can_frame *)skb->data; u32 data, i; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; if (cf->can_id & CAN_EFF_FLAG) /* Extended frame format */ diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index 198da643ee6d..d530e986f7fa 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1481,7 +1481,7 @@ static netdev_tx_t rcar_canfd_start_xmit(struct sk_buff *skb, unsigned long flags; u32 ch = priv->channel; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; if (cf->can_id & CAN_EFF_FLAG) { diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 1bb1129b0450..aac5956e4a53 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -291,7 +291,7 @@ static netdev_tx_t sja1000_start_xmit(struct sk_buff *skb, u8 cmd_reg_val = 0x00; int i; - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; netif_stop_queue(dev); diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c index 8d13fdf8c28a..fbb34139daa1 100644 --- a/drivers/net/can/slcan/slcan-core.c +++ b/drivers/net/can/slcan/slcan-core.c @@ -594,7 +594,7 @@ static netdev_tx_t slcan_netdev_xmit(struct sk_buff *skb, { struct slcan *sl = netdev_priv(dev); - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; spin_lock(&sl->lock); diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c index a5ef57f415f7..c72f505d29fe 100644 --- a/drivers/net/can/softing/softing_main.c +++ b/drivers/net/can/softing/softing_main.c @@ -60,7 +60,7 @@ static netdev_tx_t softing_netdev_start_xmit(struct sk_buff *skb, struct can_frame *cf = (struct can_frame *)skb->data; uint8_t buf[DPRAM_TX_SIZE]; - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; spin_lock(&card->spin); diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index b87dc420428d..e1b8533a602e 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -373,7 +373,7 @@ static netdev_tx_t hi3110_hard_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } - if (can_dropped_invalid_skb(net, skb)) + if (can_dev_dropped_skb(net, skb)) return NETDEV_TX_OK; netif_stop_queue(net); diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 24883a65ca66..79c4bab5f724 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -789,7 +789,7 @@ static netdev_tx_t mcp251x_hard_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } - if (can_dropped_invalid_skb(net, skb)) + if (can_dev_dropped_skb(net, skb)) return NETDEV_TX_OK; netif_stop_queue(net); diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c index ffb6c36b7d9b..160528d3cc26 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c @@ -172,7 +172,7 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, u8 tx_head; int err; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; if (mcp251xfd_tx_busy(priv, tx_ring)) diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index 525309da1320..2b78f9197681 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -429,7 +429,7 @@ static netdev_tx_t sun4ican_start_xmit(struct sk_buff *skb, struct net_device *d canid_t id; int i; - if (can_dropped_invalid_skb(dev, skb)) + if (can_dev_dropped_skb(dev, skb)) return NETDEV_TX_OK; netif_stop_queue(dev); diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index b218fb3c6b76..27700f72eac2 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -470,7 +470,7 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev) u32 mbxno, mbx_mask, data; unsigned long flags; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; mbxno = get_tx_head_mb(priv); diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index d31191686a54..050c0b49938a 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -747,7 +747,7 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne size_t size = CPC_HEADER_SIZE + CPC_MSG_HEADER_LEN + sizeof(struct cpc_can_msg); - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; /* create a URB, and a buffer for it, and copy the data to the URB */ diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c index 1bcfad11b1e4..81b88e9e5bdc 100644 --- a/drivers/net/can/usb/esd_usb.c +++ b/drivers/net/can/usb/esd_usb.c @@ -725,7 +725,7 @@ static netdev_tx_t esd_usb_start_xmit(struct sk_buff *skb, int ret = NETDEV_TX_OK; size_t size = sizeof(struct esd_usb_msg); - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; /* create a URB, and a buffer for it, and copy the data to the URB */ diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 51294b717040..25f863b4f5f0 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -1913,7 +1913,7 @@ static netdev_tx_t es58x_start_xmit(struct sk_buff *skb, unsigned int frame_len; int ret; - if (can_dropped_invalid_skb(netdev, skb)) { + if (can_dev_dropped_skb(netdev, skb)) { if (priv->tx_urb) goto xmit_commit; return NETDEV_TX_OK; diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index f0065d40eb24..9c2c25fde3d1 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -723,7 +723,7 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, unsigned int idx; struct gs_tx_context *txc; - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; /* find an empty context to keep track of transmission */ diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index e91648ed7386..802e27c0eced 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -570,7 +570,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, unsigned int i; unsigned long flags; - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; urb = usb_alloc_urb(0, GFP_ATOMIC); diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 69346c63021f..218b098b261d 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -311,7 +311,7 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, .cmd_id = MBCA_CMD_TRANSMIT_MESSAGE_EV }; - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; ctx = mcba_usb_get_free_ctx(priv, cf); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 225697d70a9a..1d996d3320fe 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -351,7 +351,7 @@ static netdev_tx_t peak_usb_ndo_start_xmit(struct sk_buff *skb, int i, err; size_t size = dev->adapter->tx_buffer_size; - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; for (i = 0; i < PCAN_USB_MAX_TX_URBS; i++) diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index 7c35f50fda4e..67c2ff407d06 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -1120,7 +1120,7 @@ static netdev_tx_t ucan_start_xmit(struct sk_buff *skb, struct can_frame *cf = (struct can_frame *)skb->data; /* check skb */ - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; /* allocate a context and slow down tx path, if fifo state is low */ diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 64c00abe91cf..8a5596ce4e46 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -602,7 +602,7 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, int i, err; size_t size = sizeof(struct usb_8dev_tx_msg); - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; /* create a URB, and a buffer for it, and copy the data to the URB */ diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 5d3172795ad0..43c812ea1de0 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -743,7 +743,7 @@ static netdev_tx_t xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct xcan_priv *priv = netdev_priv(ndev); int ret; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; if (priv->devtype.flags & XCAN_FLAG_TX_MAILBOXES) diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 58f5431a5559..982ba245eb41 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -152,6 +152,22 @@ static inline bool can_is_canxl_dev_mtu(unsigned int mtu) return (mtu >= CANXL_MIN_MTU && mtu <= CANXL_MAX_MTU); } +/* drop skb if it does not contain a valid CAN frame for sending */ +static inline bool can_dev_dropped_skb(struct net_device *dev, struct sk_buff *skb) +{ + struct can_priv *priv = netdev_priv(dev); + + if (priv->ctrlmode & CAN_CTRLMODE_LISTENONLY) { + netdev_info_once(dev, + "interface in listen only mode, dropping skb\n"); + kfree_skb(skb); + dev->stats.tx_dropped++; + return true; + } + + return can_dropped_invalid_skb(dev, skb); +} + void can_setup(struct net_device *dev); struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, From 8b043dfb3dc7c32f9c2c0c93e3c2de346ee5e358 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 28 Oct 2022 12:06:45 +0200 Subject: [PATCH 269/328] can: rcar_canfd: Add missing ECC error checks for channels 2-7 When introducing support for R-Car V3U, which has 8 instead of 2 channels, the ECC error bitmask was extended to take into account the extra channels, but rcar_canfd_global_error() was not updated to act upon the extra bits. Replace the RCANFD_GERFL_EEF[01] macros by a new macro that takes the channel number, fixing R-Car V3U while simplifying the code. Fixes: 45721c406dcf50d4 ("can: rcar_canfd: Add support for r8a779a0 SoC") Signed-off-by: Geert Uytterhoeven Reviewed-by: Biju Das Link: https://lore.kernel.org/all/4edb2ea46cc64d0532a08a924179827481e14b4f.1666951503.git.geert+renesas@glider.be Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar/rcar_canfd.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index d530e986f7fa..b306cf554634 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -81,8 +81,7 @@ enum rcanfd_chip_id { /* RSCFDnCFDGERFL / RSCFDnGERFL */ #define RCANFD_GERFL_EEF0_7 GENMASK(23, 16) -#define RCANFD_GERFL_EEF1 BIT(17) -#define RCANFD_GERFL_EEF0 BIT(16) +#define RCANFD_GERFL_EEF(ch) BIT(16 + (ch)) #define RCANFD_GERFL_CMPOF BIT(3) /* CAN FD only */ #define RCANFD_GERFL_THLES BIT(2) #define RCANFD_GERFL_MES BIT(1) @@ -90,7 +89,7 @@ enum rcanfd_chip_id { #define RCANFD_GERFL_ERR(gpriv, x) \ ((x) & (reg_v3u(gpriv, RCANFD_GERFL_EEF0_7, \ - RCANFD_GERFL_EEF0 | RCANFD_GERFL_EEF1) | \ + RCANFD_GERFL_EEF(0) | RCANFD_GERFL_EEF(1)) | \ RCANFD_GERFL_MES | \ ((gpriv)->fdmode ? RCANFD_GERFL_CMPOF : 0))) @@ -936,12 +935,8 @@ static void rcar_canfd_global_error(struct net_device *ndev) u32 ridx = ch + RCANFD_RFFIFO_IDX; gerfl = rcar_canfd_read(priv->base, RCANFD_GERFL); - if ((gerfl & RCANFD_GERFL_EEF0) && (ch == 0)) { - netdev_dbg(ndev, "Ch0: ECC Error flag\n"); - stats->tx_dropped++; - } - if ((gerfl & RCANFD_GERFL_EEF1) && (ch == 1)) { - netdev_dbg(ndev, "Ch1: ECC Error flag\n"); + if (gerfl & RCANFD_GERFL_EEF(ch)) { + netdev_dbg(ndev, "Ch%u: ECC Error flag\n", ch); stats->tx_dropped++; } if (gerfl & RCANFD_GERFL_MES) { From 0fca385d6ebc3cabb20f67bcf8a71f1448bdc001 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Thu, 3 Nov 2022 16:33:01 +0800 Subject: [PATCH 270/328] btrfs: fix match incorrectly in dev_args_match_device syzkaller found a failed assertion: assertion failed: (args->devid != (u64)-1) || args->missing, in fs/btrfs/volumes.c:6921 This can be triggered when we set devid to (u64)-1 by ioctl. In this case, the match of devid will be skipped and the match of device may succeed incorrectly. Patch 562d7b1512f7 introduced this function which is used to match device. This function contains two matching scenarios, we can distinguish them by checking the value of args->missing rather than check whether args->devid and args->uuid is default value. Reported-by: syzbot+031687116258450f9853@syzkaller.appspotmail.com Fixes: 562d7b1512f7 ("btrfs: handle device lookup with btrfs_dev_lookup_args") CC: stable@vger.kernel.org # 5.16+ Reviewed-by: Nikolay Borisov Signed-off-by: Liu Shixin Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a8d4bc6a1937..f09d09c259f5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6918,18 +6918,18 @@ static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args, static bool dev_args_match_device(const struct btrfs_dev_lookup_args *args, const struct btrfs_device *device) { - ASSERT((args->devid != (u64)-1) || args->missing); + if (args->missing) { + if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) && + !device->bdev) + return true; + return false; + } - if ((args->devid != (u64)-1) && device->devid != args->devid) + if (device->devid != args->devid) return false; if (args->uuid && memcmp(device->uuid, args->uuid, BTRFS_UUID_SIZE) != 0) return false; - if (!args->missing) - return true; - if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) && - !device->bdev) - return true; - return false; + return true; } /* From 9b2f20344d450137d015b380ff0c2e2a6a170135 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 1 Nov 2022 10:53:54 +0800 Subject: [PATCH 271/328] btrfs: selftests: fix wrong error check in btrfs_free_dummy_root() The btrfs_alloc_dummy_root() uses ERR_PTR as the error return value rather than NULL, if error happened, there will be a NULL pointer dereference: BUG: KASAN: null-ptr-deref in btrfs_free_dummy_root+0x21/0x50 [btrfs] Read of size 8 at addr 000000000000002c by task insmod/258926 CPU: 2 PID: 258926 Comm: insmod Tainted: G W 6.1.0-rc2+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 Call Trace: dump_stack_lvl+0x34/0x44 kasan_report+0xb7/0x140 kasan_check_range+0x145/0x1a0 btrfs_free_dummy_root+0x21/0x50 [btrfs] btrfs_test_free_space_cache+0x1a8c/0x1add [btrfs] btrfs_run_sanity_tests+0x65/0x80 [btrfs] init_btrfs_fs+0xec/0x154 [btrfs] do_one_initcall+0x87/0x2a0 do_init_module+0xdf/0x320 load_module+0x3006/0x3390 __do_sys_finit_module+0x113/0x1b0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: aaedb55bc08f ("Btrfs: add tests for btrfs_get_extent") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Anand Jain Signed-off-by: Zhang Xiaoxu Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tests/btrfs-tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 9c478fa256f6..d43cb5242fec 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -200,7 +200,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) void btrfs_free_dummy_root(struct btrfs_root *root) { - if (!root) + if (IS_ERR_OR_NULL(root)) return; /* Will be freed by btrfs_free_fs_roots */ if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state))) From 8bb808c6ad91ec3d332f072ce8f8aa4b16e307e0 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Nov 2022 14:39:01 +0100 Subject: [PATCH 272/328] btrfs: don't print stack trace when transaction is aborted due to ENOMEM Add ENOMEM among the error codes that don't print stack trace on transaction abort. We've got several reports from syzbot that detects stacks as errors but caused by limiting memory. As this is an artificial condition we don't need to know where exactly the error happens, the abort and error cleanup will continue like e.g. for EIO. As the transaction aborts code needs to be inline in a lot of code, the implementation cases about minimal bloat. The error codes are in a separate function and the WARN uses the condition directly. This increases the code size by 571 bytes on release build. Alternatives considered: add -ENOMEM among the errors, this increases size by 2340 bytes, various attempts to combine the WARN and helper calls, increase by 700 or more bytes. Example syzbot reports (error -12): - https://syzkaller.appspot.com/bug?extid=5244d35be7f589cf093e - https://syzkaller.appspot.com/bug?extid=9c37714c07194d816417 Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 16 ++++++++++++++++ fs/btrfs/ctree.h | 11 +++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b39b339fbf96..a9543f01184c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -113,6 +113,22 @@ noinline void btrfs_release_path(struct btrfs_path *p) } } +/* + * We want the transaction abort to print stack trace only for errors where the + * cause could be a bug, eg. due to ENOSPC, and not for common errors that are + * caused by external factors. + */ +bool __cold abort_should_print_stack(int errno) +{ + switch (errno) { + case -EIO: + case -EROFS: + case -ENOMEM: + return false; + } + return true; +} + /* * safely gets a reference on the root node of a tree. A lock * is not taken, so a concurrent writer may put a different node diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f677b49df8ae..9e6d48ff4597 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3796,9 +3796,11 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, const char *function, unsigned int line, int errno, bool first_hit); +bool __cold abort_should_print_stack(int errno); + /* * Call btrfs_abort_transaction as early as possible when an error condition is - * detected, that way the exact line number is reported. + * detected, that way the exact stack trace is reported for some errors. */ #define btrfs_abort_transaction(trans, errno) \ do { \ @@ -3807,10 +3809,11 @@ do { \ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \ &((trans)->fs_info->fs_state))) { \ first = true; \ - if ((errno) != -EIO && (errno) != -EROFS) { \ - WARN(1, KERN_DEBUG \ + if (WARN(abort_should_print_stack(errno), \ + KERN_DEBUG \ "BTRFS: Transaction aborted (error %d)\n", \ - (errno)); \ + (errno))) { \ + /* Stack trace printed. */ \ } else { \ btrfs_debug((trans)->fs_info, \ "Transaction aborted (error %d)", \ From b75b51f886e9dd8cdfca1392ad43f4e542611c00 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 7 Nov 2022 07:23:26 +0800 Subject: [PATCH 273/328] Revert "btrfs: scrub: use larger block size for data extent scrub" This reverts commit 786672e9e1a39a231806313e3c445c236588ceef. [BUG] Since commit 786672e9e1a3 ("btrfs: scrub: use larger block size for data extent scrub"), btrfs scrub no longer reports errors if the corruption is not in the first sector of a STRIPE_LEN. The following script can expose the problem: mkfs.btrfs -f $dev mount $dev $mnt xfs_io -f -c "pwrite -S 0xff 0 8k" $mnt/foobar umount $mnt # 13631488 is the logical bytenr of above 8K extent btrfs-map-logical -l 13631488 -b 4096 $dev mirror 1 logical 13631488 physical 13631488 device /dev/test/scratch1 # Corrupt the 2nd sector of that extent xfs_io -f -c "pwrite -S 0x00 13635584 4k" $dev mount $dev $mnt btrfs scrub start -B $mnt scrub done for 54e63f9f-0c30-4c84-a33b-5c56014629b7 Scrub started: Mon Nov 7 07:18:27 2022 Status: finished Duration: 0:00:00 Total to scrub: 536.00MiB Rate: 0.00B/s Error summary: no errors found <<< [CAUSE] That offending commit enlarges the data extent scrub size from sector size to BTRFS_STRIPE_LEN, to avoid extra scrub_block to be allocated. But unfortunately the data extent scrub is still heavily relying on the fact that there is only one scrub_sector per scrub_block. Thus it will only check the first sector, and ignoring the remaining sectors. Furthermore the error reporting is not able to handle multiple sectors either. [FIX] For now just revert the offending commit. The consequence is just extra memory usage during scrub. We will need a proper change to make the remaining data scrub path to handle multiple sectors before we enlarging the data scrub size. Reported-by: Li Zhang Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f260c53829e5..b659c67af1e0 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2672,17 +2672,11 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map, u8 csum[BTRFS_CSUM_SIZE]; u32 blocksize; - /* - * Block size determines how many scrub_block will be allocated. Here - * we use BTRFS_STRIPE_LEN (64KiB) as default limit, so we won't - * allocate too many scrub_block, while still won't cause too large - * bios for large extents. - */ if (flags & BTRFS_EXTENT_FLAG_DATA) { if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) blocksize = map->stripe_len; else - blocksize = BTRFS_STRIPE_LEN; + blocksize = sctx->fs_info->sectorsize; spin_lock(&sctx->stat_lock); sctx->stat.data_extents_scrubbed++; sctx->stat.data_bytes_scrubbed += len; From 21e61ec6d0bb786818490e926aa9aeb4de95ad0d Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 4 Nov 2022 07:12:33 -0700 Subject: [PATCH 274/328] btrfs: zoned: clone zoned device info when cloning a device When cloning a btrfs_device, we're not cloning the associated btrfs_zoned_device_info structure of the device in case of a zoned filesystem. Later on this leads to a NULL pointer dereference when accessing the device's zone_info for instance when setting a zone as active. This was uncovered by fstests' testcase btrfs/161. CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 12 ++++++++++++ fs/btrfs/zoned.c | 40 ++++++++++++++++++++++++++++++++++++++++ fs/btrfs/zoned.h | 11 +++++++++++ 3 files changed, 63 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f09d09c259f5..3cb968ede675 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1011,6 +1011,18 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) rcu_assign_pointer(device->name, name); } + if (orig_dev->zone_info) { + struct btrfs_zoned_device_info *zone_info; + + zone_info = btrfs_clone_dev_zone_info(orig_dev); + if (!zone_info) { + btrfs_free_device(device); + ret = -ENOMEM; + goto error; + } + device->zone_info = zone_info; + } + list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; fs_devices->num_devices++; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index e2d073b08a7d..1912abf6d020 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -639,6 +639,46 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device) device->zone_info = NULL; } +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev) +{ + struct btrfs_zoned_device_info *zone_info; + + zone_info = kmemdup(orig_dev->zone_info, sizeof(*zone_info), GFP_KERNEL); + if (!zone_info) + return NULL; + + zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->seq_zones) + goto out; + + bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, + zone_info->nr_zones); + + zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->empty_zones) + goto out; + + bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, + zone_info->nr_zones); + + zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); + if (!zone_info->active_zones) + goto out; + + bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, + zone_info->nr_zones); + zone_info->zone_cache = NULL; + + return zone_info; + +out: + bitmap_free(zone_info->seq_zones); + bitmap_free(zone_info->empty_zones); + bitmap_free(zone_info->active_zones); + kfree(zone_info); + return NULL; +} + int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) { diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index e17462db3a84..8bd16d40b7c6 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -36,6 +36,7 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info); int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache); void btrfs_destroy_dev_zone_info(struct btrfs_device *device); +struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev); int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info); int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info); int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, @@ -103,6 +104,16 @@ static inline int btrfs_get_dev_zone_info(struct btrfs_device *device, static inline void btrfs_destroy_dev_zone_info(struct btrfs_device *device) { } +/* + * In case the kernel is compiled without CONFIG_BLK_DEV_ZONED we'll never call + * into btrfs_clone_dev_zone_info() so it's safe to return NULL here. + */ +static inline struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info( + struct btrfs_device *orig_dev) +{ + return NULL; +} + static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info) { if (!btrfs_is_zoned(fs_info)) From a8d1b1647bf8244a5f270538e9e636e2657fffa3 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 4 Nov 2022 07:12:34 -0700 Subject: [PATCH 275/328] btrfs: zoned: initialize device's zone info for seeding When performing seeding on a zoned filesystem it is necessary to initialize each zoned device's btrfs_zoned_device_info structure, otherwise mounting the filesystem will cause a NULL pointer dereference. This was uncovered by fstests' testcase btrfs/163. CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 +++- fs/btrfs/volumes.c | 11 +++++++++-- fs/btrfs/volumes.h | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4b28263c3d32..d99bf7c64611 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2551,7 +2551,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) fs_info->dev_root = root; } /* Initialize fs_info for all devices in any case */ - btrfs_init_devices_late(fs_info); + ret = btrfs_init_devices_late(fs_info); + if (ret) + goto out; /* * This tree can share blocks with some other fs tree during relocation diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3cb968ede675..635f45f1a2ef 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7756,10 +7756,11 @@ error: return ret; } -void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) +int btrfs_init_devices_late(struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; struct btrfs_device *device; + int ret = 0; fs_devices->fs_info = fs_info; @@ -7768,12 +7769,18 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) device->fs_info = fs_info; list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) { - list_for_each_entry(device, &seed_devs->devices, dev_list) + list_for_each_entry(device, &seed_devs->devices, dev_list) { device->fs_info = fs_info; + ret = btrfs_get_dev_zone_info(device, false); + if (ret) + break; + } seed_devs->fs_info = fs_info; } mutex_unlock(&fs_devices->device_list_mutex); + + return ret; } static u64 btrfs_dev_stats_value(const struct extent_buffer *eb, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f8b668dc8bf8..099def5613b8 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -671,7 +671,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, struct btrfs_ioctl_get_dev_stats *stats); -void btrfs_init_devices_late(struct btrfs_fs_info *fs_info); +int btrfs_init_devices_late(struct btrfs_fs_info *fs_info); int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); int btrfs_run_dev_stats(struct btrfs_trans_handle *trans); void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev); From c62f6bec53e63b11112e1ebce6bbaa39ce6f6706 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Fri, 4 Nov 2022 07:12:35 -0700 Subject: [PATCH 276/328] btrfs: zoned: fix locking imbalance on scrub If we're doing device replace on a zoned filesystem and discover in scrub_enumerate_chunks() that we don't have to copy the block group it is unlocked before it gets skipped. But as the block group hasn't yet been locked before it leads to a locking imbalance. To fix this simply remove the unlock. This was uncovered by fstests' testcase btrfs/163. Fixes: 9283b9e09a6d ("btrfs: remove lock protection for BLOCK_GROUP_FLAG_TO_COPY") Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b659c67af1e0..196c4c6ed1ed 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3911,7 +3911,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) { if (!test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags)) { - spin_unlock(&cache->lock); btrfs_put_block_group(cache); goto skip; } From c18c20f16219516b12a4f2fd29c25e06be97e064 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 7 Nov 2022 17:11:27 +0100 Subject: [PATCH 277/328] mm, slab: remove duplicate kernel-doc comment for ksize() Akira reports: > "make htmldocs" reports duplicate C declaration of ksize() as follows: > /linux/Documentation/core-api/mm-api:43: ./mm/slab_common.c:1428: WARNING: Duplicate C declaration, also defined at core-api/mm-api:212. > Declaration is '.. c:function:: size_t ksize (const void *objp)'. > This is due to the kernel-doc comment for ksize() declaration added in > include/linux/slab.h by commit 05a940656e1e ("slab: Introduce > kmalloc_size_roundup()"). There is an older kernel-doc comment for ksize() definition in mm/slab_common.c, which is not only duplicated, but also contradicts the new one - the additional storage discovered by ksize() should not be used by callers anymore. Delete the old kernel-doc. Reported-by: Akira Yokosawa Link: https://lore.kernel.org/all/d33440f6-40cf-9747-3340-e54ffaf7afb8@gmail.com/ Fixes: 05a940656e1e ("slab: Introduce kmalloc_size_roundup()") Cc: Kees Cook Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index fa784563e1ed..0042fb2730d1 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1409,20 +1409,6 @@ void kfree_sensitive(const void *p) } EXPORT_SYMBOL(kfree_sensitive); -/** - * ksize - get the actual amount of memory allocated for a given object - * @objp: Pointer to the object - * - * kmalloc may internally round up allocations and return more memory - * than requested. ksize() can be used to determine the actual amount of - * memory allocated. The caller may use this additional memory, even though - * a smaller amount of memory was initially specified with the kmalloc call. - * The caller must guarantee that objp points to a valid object previously - * allocated with either kmalloc() or kmem_cache_alloc(). The object - * must not be freed during the duration of the call. - * - * Return: size of the actual memory used by @objp in bytes - */ size_t ksize(const void *objp) { size_t size; From 59f2f4b8a757412fce372f6d0767bdb55da127a8 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 7 Nov 2022 20:11:42 +0000 Subject: [PATCH 278/328] fs/userfaultfd: Fix maple tree iterator in userfaultfd_unregister() When iterating the VMAs, the maple state needs to be invalidated if the tree is modified by a split or merge to ensure the maple tree node contained in the maple state is still valid. These invalidations were missed, so add them to the paths which alter the tree. Reported-by: syzbot+0d2014e4da2ccced5b41@syzkaller.appspotmail.com Fixes: 69dbe6daf104 (userfaultfd: use maple tree iterator to iterate VMAs) Signed-off-by: Liam R. Howlett Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 07c81ab3fd4d..98ac37e34e3d 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1630,17 +1630,20 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, NULL_VM_UFFD_CTX, anon_vma_name(vma)); if (prev) { vma = prev; + mas_pause(&mas); goto next; } if (vma->vm_start < start) { ret = split_vma(mm, vma, start, 1); if (ret) break; + mas_pause(&mas); } if (vma->vm_end > end) { ret = split_vma(mm, vma, end, 0); if (ret) break; + mas_pause(&mas); } next: /* From ed4314f7729714d788698ade4f9905ee5378ebc0 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 4 Nov 2022 09:30:04 +0100 Subject: [PATCH 279/328] net: stmmac: dwmac-meson8b: fix meson8b_devm_clk_prepare_enable() There are two problems with meson8b_devm_clk_prepare_enable(), introduced in commit a54dc4a49045 ("net: stmmac: dwmac-meson8b: Make the clock enabling code re-usable"): - It doesn't pass the clk argument, but instead always the rgmii_tx_clk of the device. - It silently ignores the return value of devm_add_action_or_reset(). The former didn't become an actual bug until another user showed up in the next commit 9308c47640d5 ("net: stmmac: dwmac-meson8b: add support for the RX delay configuration"). The latter means the callers could end up with the clock not actually prepared/enabled. Fixes: a54dc4a49045 ("net: stmmac: dwmac-meson8b: Make the clock enabling code re-usable") Signed-off-by: Rasmus Villemoes Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20221104083004.2212520-1-linux@rasmusvillemoes.dk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index c7a6588d9398..e8b507f88fbc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -272,11 +272,9 @@ static int meson8b_devm_clk_prepare_enable(struct meson8b_dwmac *dwmac, if (ret) return ret; - devm_add_action_or_reset(dwmac->dev, - (void(*)(void *))clk_disable_unprepare, - dwmac->rgmii_tx_clk); - - return 0; + return devm_add_action_or_reset(dwmac->dev, + (void(*)(void *))clk_disable_unprepare, + clk); } static int meson8b_init_rgmii_delays(struct meson8b_dwmac *dwmac) From ec683f02a150b9c4428f08accd387c8c216ea0e5 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Fri, 4 Nov 2022 17:21:47 +0100 Subject: [PATCH 280/328] dt-bindings: net: tsnep: Fix typo on generic nvmem property While working on the nvmem description I figured out this file had the "nvmem-cell-names" property name misspelled. Fix the typo, as "nvmem-cells-names" has never existed. Fixes: 603094b2cdb7 ("dt-bindings: net: Add tsnep Ethernet controller") Signed-off-by: Miquel Raynal Reviewed-by: Gerhard Engleder Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221104162147.1288230-1-miquel.raynal@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/engleder,tsnep.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/engleder,tsnep.yaml b/Documentation/devicetree/bindings/net/engleder,tsnep.yaml index 5bd964a46a9d..a6921e805e37 100644 --- a/Documentation/devicetree/bindings/net/engleder,tsnep.yaml +++ b/Documentation/devicetree/bindings/net/engleder,tsnep.yaml @@ -47,7 +47,7 @@ properties: nvmem-cells: true - nvmem-cells-names: true + nvmem-cell-names: true phy-connection-type: enum: From 02f5999e652952d69c341a03d4313310703fd7f1 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 5 Nov 2022 14:34:42 +0800 Subject: [PATCH 281/328] octeontx2-pf: fix build error when CONFIG_OCTEONTX2_PF=y If CONFIG_MACSEC=m and CONFIG_OCTEONTX2_PF=y, it leads a build error: ld: drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.o: in function `otx2_pfaf_mbox_up_handler': otx2_pf.c:(.text+0x181c): undefined reference to `cn10k_handle_mcs_event' ld: drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.o: in function `otx2_probe': otx2_pf.c:(.text+0x437e): undefined reference to `cn10k_mcs_init' ld: drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.o: in function `otx2_remove': otx2_pf.c:(.text+0x5031): undefined reference to `cn10k_mcs_free' ld: drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.o: in function `otx2_mbox_up_handler_mcs_intr_notify': otx2_pf.c:(.text+0x5f11): undefined reference to `cn10k_handle_mcs_event' Make CONFIG_OCTEONTX2_PF depends on CONFIG_MACSEC to fix it. Because it has empty stub functions of cn10k, CONFIG_OCTEONTX2_PF can be enabled if CONFIG_MACSEC is disabled Fixes: c54ffc73601c ("octeontx2-pf: mcs: Introduce MACSEC hardware offloading") Reported-by: kernel test robot Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221105063442.2013981-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index 993ac180a5db..6b4f640163f7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -32,6 +32,7 @@ config OCTEONTX2_PF tristate "Marvell OcteonTX2 NIC Physical Function driver" select OCTEONTX2_MBOX select NET_DEVLINK + depends on MACSEC || !MACSEC depends on (64BIT && COMPILE_TEST) || ARM64 select DIMLIB depends on PCI From 8d820bc9d12b8beebca836cceaf2bbe68216c2f8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 5 Nov 2022 17:02:45 +0800 Subject: [PATCH 282/328] net: broadcom: Fix BCMGENET Kconfig While BCMGENET select BROADCOM_PHY as y, but PTP_1588_CLOCK_OPTIONAL is m, kconfig warning and build errors: WARNING: unmet direct dependencies detected for BROADCOM_PHY Depends on [m]: NETDEVICES [=y] && PHYLIB [=y] && PTP_1588_CLOCK_OPTIONAL [=m] Selected by [y]: - BCMGENET [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_BROADCOM [=y] && HAS_IOMEM [=y] && ARCH_BCM2835 [=y] drivers/net/phy/broadcom.o: In function `bcm54xx_suspend': broadcom.c:(.text+0x6ac): undefined reference to `bcm_ptp_stop' drivers/net/phy/broadcom.o: In function `bcm54xx_phy_probe': broadcom.c:(.text+0x784): undefined reference to `bcm_ptp_probe' drivers/net/phy/broadcom.o: In function `bcm54xx_config_init': broadcom.c:(.text+0xd4c): undefined reference to `bcm_ptp_config_init' Fixes: 99addbe31f55 ("net: broadcom: Select BROADCOM_PHY for BCMGENET") Signed-off-by: YueHaibing Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20221105090245.8508-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index f4e1ca68d831..55dfdb34e37b 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -77,7 +77,7 @@ config BCMGENET select BCM7XXX_PHY select MDIO_BCM_UNIMAC select DIMLIB - select BROADCOM_PHY if ARCH_BCM2835 + select BROADCOM_PHY if (ARCH_BCM2835 && PTP_1588_CLOCK_OPTIONAL) help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. From 1c075b192fe41030457cd4a5f7dea730412bca40 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 4 Nov 2022 16:48:53 -0400 Subject: [PATCH 283/328] tipc: fix the msg->req tlv len check in tipc_nl_compat_name_table_dump_header This is a follow-up for commit 974cb0e3e7c9 ("tipc: fix uninit-value in tipc_nl_compat_name_table_dump") where it should have type casted sizeof(..) to int to work when TLV_GET_DATA_LEN() returns a negative value. syzbot reported a call trace because of it: BUG: KMSAN: uninit-value in ... tipc_nl_compat_name_table_dump+0x841/0xea0 net/tipc/netlink_compat.c:934 __tipc_nl_compat_dumpit+0xab2/0x1320 net/tipc/netlink_compat.c:238 tipc_nl_compat_dumpit+0x991/0xb50 net/tipc/netlink_compat.c:321 tipc_nl_compat_recv+0xb6e/0x1640 net/tipc/netlink_compat.c:1324 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] genl_family_rcv_msg net/netlink/genetlink.c:775 [inline] genl_rcv_msg+0x103f/0x1260 net/netlink/genetlink.c:792 netlink_rcv_skb+0x3a5/0x6c0 net/netlink/af_netlink.c:2501 genl_rcv+0x3c/0x50 net/netlink/genetlink.c:803 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0xf3b/0x1270 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x1288/0x1440 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] Reported-by: syzbot+e5dbaaa238680ce206ea@syzkaller.appspotmail.com Fixes: 974cb0e3e7c9 ("tipc: fix uninit-value in tipc_nl_compat_name_table_dump") Signed-off-by: Xin Long Link: https://lore.kernel.org/r/ccd6a7ea801b15aec092c3b532a883b4c5708695.1667594933.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/tipc/netlink_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index fc68733673ba..dfea27a906f2 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -880,7 +880,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) }; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); - if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) + if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query)) return -EINVAL; depth = ntohl(ntq->depth); From 9f0b773210c27a8f5d98ddb2fc4ba60a42a3285f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 4 Nov 2022 17:45:15 -0400 Subject: [PATCH 284/328] sctp: remove the unnecessary sinfo_stream check in sctp_prsctp_prune_unsent Since commit 5bbbbe32a431 ("sctp: introduce stream scheduler foundations"), sctp_stream_outq_migrate() has been called in sctp_stream_init/update to removes those chunks to streams higher than the new max. There is no longer need to do such check in sctp_prsctp_prune_unsent(). Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski --- net/sctp/outqueue.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e213aaf45d67..c99fe3dc19bc 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -384,6 +384,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, { struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chk, *temp; + struct sctp_stream_out *sout; q->sched->unsched_all(&asoc->stream); @@ -398,12 +399,9 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, sctp_sched_dequeue_common(q, chk); asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; - if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { - struct sctp_stream_out *streamout = - SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); - streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; - } + sout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); + sout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk); sctp_chunk_free(chk); From 2f201ae14ae0f91dbf1cffea7bb1e29e81d4d108 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 4 Nov 2022 17:45:16 -0400 Subject: [PATCH 285/328] sctp: clear out_curr if all frag chunks of current msg are pruned A crash was reported by Zhen Chen: list_del corruption, ffffa035ddf01c18->next is NULL WARNING: CPU: 1 PID: 250682 at lib/list_debug.c:49 __list_del_entry_valid+0x59/0xe0 RIP: 0010:__list_del_entry_valid+0x59/0xe0 Call Trace: sctp_sched_dequeue_common+0x17/0x70 [sctp] sctp_sched_fcfs_dequeue+0x37/0x50 [sctp] sctp_outq_flush_data+0x85/0x360 [sctp] sctp_outq_uncork+0x77/0xa0 [sctp] sctp_cmd_interpreter.constprop.0+0x164/0x1450 [sctp] sctp_side_effects+0x37/0xe0 [sctp] sctp_do_sm+0xd0/0x230 [sctp] sctp_primitive_SEND+0x2f/0x40 [sctp] sctp_sendmsg_to_asoc+0x3fa/0x5c0 [sctp] sctp_sendmsg+0x3d5/0x440 [sctp] sock_sendmsg+0x5b/0x70 and in sctp_sched_fcfs_dequeue() it dequeued a chunk from stream out_curr outq while this outq was empty. Normally stream->out_curr must be set to NULL once all frag chunks of current msg are dequeued, as we can see in sctp_sched_dequeue_done(). However, in sctp_prsctp_prune_unsent() as it is not a proper dequeue, sctp_sched_dequeue_done() is not called to do this. This patch is to fix it by simply setting out_curr to NULL when the last frag chunk of current msg is dequeued from out_curr stream in sctp_prsctp_prune_unsent(). Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: Zhen Chen Tested-by: Caowangbao Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski --- net/sctp/outqueue.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c99fe3dc19bc..20831079fb09 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -403,6 +403,11 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, sout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream); sout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + /* clear out_curr if all frag chunks are pruned */ + if (asoc->stream.out_curr == sout && + list_is_last(&chk->frag_list, &chk->msg->chunks)) + asoc->stream.out_curr = NULL; + msg_len -= chk->skb->truesize + sizeof(struct sctp_chunk); sctp_chunk_free(chk); if (msg_len <= 0) From 3faf7e14ec0c3462c2d747fa6793b8645d1391df Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 7 Nov 2022 09:14:45 +0800 Subject: [PATCH 286/328] net: lapbether: fix issue of invalid opcode in lapbeth_open() If lapb_register() failed when lapb device goes to up for the first time, the NAPI is not disabled. As a result, the invalid opcode issue is reported when the lapb device goes to up for the second time. The stack info is as follows: [ 1958.311422][T11356] kernel BUG at net/core/dev.c:6442! [ 1958.312206][T11356] invalid opcode: 0000 [#1] PREEMPT SMP KASAN [ 1958.315979][T11356] RIP: 0010:napi_enable+0x16a/0x1f0 [ 1958.332310][T11356] Call Trace: [ 1958.332817][T11356] [ 1958.336135][T11356] lapbeth_open+0x18/0x90 [ 1958.337446][T11356] __dev_open+0x258/0x490 [ 1958.341672][T11356] __dev_change_flags+0x4d4/0x6a0 [ 1958.345325][T11356] dev_change_flags+0x93/0x160 [ 1958.346027][T11356] devinet_ioctl+0x1276/0x1bf0 [ 1958.346738][T11356] inet_ioctl+0x1c8/0x2d0 [ 1958.349638][T11356] sock_ioctl+0x5d1/0x750 [ 1958.356059][T11356] __x64_sys_ioctl+0x3ec/0x1790 [ 1958.365594][T11356] do_syscall_64+0x35/0x80 [ 1958.366239][T11356] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 1958.377381][T11356] Fixes: 514e1150da9c ("net: x25: Queue received packets in the drivers instead of per-CPU queues") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221107011445.207372-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/wan/lapbether.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index cb360dca3250..d62a904d2e42 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -325,6 +325,7 @@ static int lapbeth_open(struct net_device *dev) err = lapb_register(dev, &lapbeth_callbacks); if (err != LAPB_OK) { + napi_disable(&lapbeth->napi); pr_err("lapb_register error: %d\n", err); return -ENODEV; } From b0c09c7f08c2467b2089bdf4adb2fbbc2464f4a8 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 7 Nov 2022 09:21:59 +0800 Subject: [PATCH 287/328] net: ethernet: mtk-star-emac: disable napi when connect and start PHY failed in mtk_star_enable() When failed to connect to and start PHY in mtk_star_enable() for opening device, napi isn't disabled. When open mtk star device next time, it will reports a invalid opcode issue. Fix it. Only be compiled, not be tested. Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver") Signed-off-by: Zhengchao Shao Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221107012159.211387-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mediatek/mtk_star_emac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index 7e890f81148e..7050351250b7 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1026,6 +1026,8 @@ static int mtk_star_enable(struct net_device *ndev) return 0; err_free_irq: + napi_disable(&priv->rx_napi); + napi_disable(&priv->tx_napi); free_irq(ndev->irq, ndev); err_free_skbs: mtk_star_free_rx_skbs(priv); From f0dfc4c88ef39be0ba736aa0ce6119263fc19aeb Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Mon, 7 Nov 2022 09:05:05 +0530 Subject: [PATCH 288/328] octeontx2-pf: Fix SQE threshold checking Current way of checking available SQE count which is based on HW updated SQB count could result in driver submitting an SQE even before CQE for the previously transmitted SQE at the same index is processed in NAPI resulting losing SKB pointers, hence a leak. Fix this by checking a consumer index which is updated once CQE is processed. Fixes: 3ca6c4c882a7 ("octeontx2-pf: Add packet transmission support") Signed-off-by: Ratheesh Kannoth Reviewed-by: Sunil Kovvuri Goutham Link: https://lore.kernel.org/r/20221107033505.2491464-1-rkannoth@marvell.com Signed-off-by: Paolo Abeni --- .../marvell/octeontx2/nic/otx2_common.c | 1 + .../marvell/octeontx2/nic/otx2_txrx.c | 32 +++++++++++-------- .../marvell/octeontx2/nic/otx2_txrx.h | 1 + 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 9ac9e6615ae7..9e10e7471b88 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -898,6 +898,7 @@ static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) } sq->head = 0; + sq->cons_head = 0; sq->sqe_per_sqb = (pfvf->hw.sqb_size / sq->sqe_size) - 1; sq->num_sqbs = (qset->sqe_cnt + sq->sqe_per_sqb) / sq->sqe_per_sqb; /* Set SQE threshold to 10% of total SQEs */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 5ec11d71bf60..ef10aef3cda0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -441,6 +441,7 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int budget) { int tx_pkts = 0, tx_bytes = 0, qidx; + struct otx2_snd_queue *sq; struct nix_cqe_tx_s *cqe; int processed_cqe = 0; @@ -451,6 +452,9 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf, return 0; process_cqe: + qidx = cq->cq_idx - pfvf->hw.rx_queues; + sq = &pfvf->qset.sq[qidx]; + while (likely(processed_cqe < budget) && cq->pend_cqe) { cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq); if (unlikely(!cqe)) { @@ -458,18 +462,20 @@ process_cqe: return 0; break; } + if (cq->cq_type == CQ_XDP) { - qidx = cq->cq_idx - pfvf->hw.rx_queues; - otx2_xdp_snd_pkt_handler(pfvf, &pfvf->qset.sq[qidx], - cqe); + otx2_xdp_snd_pkt_handler(pfvf, sq, cqe); } else { - otx2_snd_pkt_handler(pfvf, cq, - &pfvf->qset.sq[cq->cint_idx], - cqe, budget, &tx_pkts, &tx_bytes); + otx2_snd_pkt_handler(pfvf, cq, sq, cqe, budget, + &tx_pkts, &tx_bytes); } + cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID; processed_cqe++; cq->pend_cqe--; + + sq->cons_head++; + sq->cons_head &= (sq->sqe_cnt - 1); } /* Free CQEs to HW */ @@ -1072,17 +1078,17 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, { struct netdev_queue *txq = netdev_get_tx_queue(netdev, qidx); struct otx2_nic *pfvf = netdev_priv(netdev); - int offset, num_segs, free_sqe; + int offset, num_segs, free_desc; struct nix_sqe_hdr_s *sqe_hdr; - /* Check if there is room for new SQE. - * 'Num of SQBs freed to SQ's pool - SQ's Aura count' - * will give free SQE count. + /* Check if there is enough room between producer + * and consumer index. */ - free_sqe = (sq->num_sqbs - *sq->aura_fc_addr) * sq->sqe_per_sqb; + free_desc = (sq->cons_head - sq->head - 1 + sq->sqe_cnt) & (sq->sqe_cnt - 1); + if (free_desc < sq->sqe_thresh) + return false; - if (free_sqe < sq->sqe_thresh || - free_sqe < otx2_get_sqe_count(pfvf, skb)) + if (free_desc < otx2_get_sqe_count(pfvf, skb)) return false; num_segs = skb_shinfo(skb)->nr_frags + 1; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index fbe62bbfb789..93cac2c2664c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -79,6 +79,7 @@ struct sg_list { struct otx2_snd_queue { u8 aura_id; u16 head; + u16 cons_head; u16 sqe_size; u32 sqe_cnt; u16 num_sqbs; From ce9e57feeed81d17d5e80ed86f516ff0d39c3867 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 7 Nov 2022 12:30:32 +0800 Subject: [PATCH 289/328] drivers: net: xgene: disable napi when register irq failed in xgene_enet_open() When failed to register irq in xgene_enet_open() for opening device, napi isn't disabled. When open xgene device next time, it will reports a invalid opcode issue. Fix it. Only be compiled, not be tested. Fixes: aeb20b6b3f4e ("drivers: net: xgene: fix: ifconfig up/down crash") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221107043032.357673-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index d6cfea65a714..390671640388 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1004,8 +1004,10 @@ static int xgene_enet_open(struct net_device *ndev) xgene_enet_napi_enable(pdata); ret = xgene_enet_register_irq(ndev); - if (ret) + if (ret) { + xgene_enet_napi_disable(pdata); return ret; + } if (ndev->phydev) { phy_start(ndev->phydev); From 03832a32bf8ff0a8305d94ddd3979835a807248f Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 3 Nov 2022 09:12:02 +0800 Subject: [PATCH 290/328] netfilter: nfnetlink: fix potential dead lock in nfnetlink_rcv_msg() When type is NFNL_CB_MUTEX and -EAGAIN error occur in nfnetlink_rcv_msg(), it does not execute nfnl_unlock(). That would trigger potential dead lock. Fixes: 50f2db9e368f ("netfilter: nfnetlink: consolidate callback types") Signed-off-by: Ziyang Xuan Signed-off-by: Florian Westphal --- net/netfilter/nfnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9c44518cb70f..6d18fb346868 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -294,6 +294,7 @@ replay: nfnl_lock(subsys_id); if (nfnl_dereference_protected(subsys_id) != ss || nfnetlink_find_client(type, ss) != nc) { + nfnl_unlock(subsys_id); err = -EAGAIN; break; } From 03c1f1ef1584c981935fab2fa0c45d3e43e2c235 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Thu, 3 Nov 2022 22:08:49 +0900 Subject: [PATCH 291/328] netfilter: Cleanup nft_net->module_list from nf_tables_exit_net() syzbot reported a warning like below [1]: WARNING: CPU: 3 PID: 9 at net/netfilter/nf_tables_api.c:10096 nf_tables_exit_net+0x71c/0x840 Modules linked in: CPU: 2 PID: 9 Comm: kworker/u8:0 Tainted: G W 6.1.0-rc3-00072-g8e5423e991e8 #47 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014 Workqueue: netns cleanup_net RIP: 0010:nf_tables_exit_net+0x71c/0x840 ... Call Trace: ? __nft_release_table+0xfc0/0xfc0 ops_exit_list+0xb5/0x180 cleanup_net+0x506/0xb10 ? unregister_pernet_device+0x80/0x80 process_one_work+0xa38/0x1730 ? pwq_dec_nr_in_flight+0x2b0/0x2b0 ? rwlock_bug.part.0+0x90/0x90 ? _raw_spin_lock_irq+0x46/0x50 worker_thread+0x67e/0x10e0 ? process_one_work+0x1730/0x1730 kthread+0x2e5/0x3a0 ? kthread_complete_and_exit+0x40/0x40 ret_from_fork+0x1f/0x30 In nf_tables_exit_net(), there is a case where nft_net->commit_list is empty but nft_net->module_list is not empty. Such a case occurs with the following scenario: 1. nfnetlink_rcv_batch() is called 2. nf_tables_newset() returns -EAGAIN and NFNL_BATCH_FAILURE bit is set to status 3. nf_tables_abort() is called with NFNL_ABORT_AUTOLOAD (nft_net->commit_list is released, but nft_net->module_list is not because of NFNL_ABORT_AUTOLOAD flag) 4. Jump to replay label 5. netlink_skb_clone() fails and returns from the function (this is caused by fault injection in the reproducer of syzbot) This patch fixes this issue by calling __nf_tables_abort() when nft_net->module_list is not empty in nf_tables_exit_net(). Fixes: eb014de4fd41 ("netfilter: nf_tables: autoload modules from the abort path") Link: https://syzkaller.appspot.com/bug?id=802aba2422de4218ad0c01b46c9525cc9d4e4aa3 [1] Reported-by: syzbot+178efee9e2d7f87f5103@syzkaller.appspotmail.com Signed-off-by: Shigeru Yoshida Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 76bd4d03dbda..e7152d599d73 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10090,7 +10090,8 @@ static void __net_exit nf_tables_exit_net(struct net *net) struct nftables_pernet *nft_net = nft_pernet(net); mutex_lock(&nft_net->commit_mutex); - if (!list_empty(&nft_net->commit_list)) + if (!list_empty(&nft_net->commit_list) || + !list_empty(&nft_net->module_list)) __nf_tables_abort(net, NFNL_ABORT_NONE); __nft_release_tables(net); mutex_unlock(&nft_net->commit_mutex); From 519b58bbfa825f042fcf80261cc18e1e35f85ffd Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 8 Nov 2022 10:56:07 +0800 Subject: [PATCH 292/328] net: marvell: prestera: fix memory leak in prestera_rxtx_switch_init() When prestera_sdma_switch_init() failed, the memory pointed to by sw->rxtx isn't released. Fix it. Only be compiled, not be tested. Fixes: 501ef3066c89 ("net: marvell: prestera: Add driver for Prestera family ASIC devices") Signed-off-by: Zhengchao Shao Reviewed-by: Vadym Kochan Link: https://lore.kernel.org/r/20221108025607.338450-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_rxtx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c index 42ee963e9f75..9277a8fd1339 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c @@ -776,6 +776,7 @@ tx_done: int prestera_rxtx_switch_init(struct prestera_switch *sw) { struct prestera_rxtx *rxtx; + int err; rxtx = kzalloc(sizeof(*rxtx), GFP_KERNEL); if (!rxtx) @@ -783,7 +784,11 @@ int prestera_rxtx_switch_init(struct prestera_switch *sw) sw->rxtx = rxtx; - return prestera_sdma_switch_init(sw); + err = prestera_sdma_switch_init(sw); + if (err) + kfree(rxtx); + + return err; } void prestera_rxtx_switch_fini(struct prestera_switch *sw) From 07d120aa33cc9d9115753d159f64d20c94458781 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Nov 2022 18:00:11 +0000 Subject: [PATCH 293/328] net: tun: call napi_schedule_prep() to ensure we own a napi A recent patch exposed another issue in napi_get_frags() caught by syzbot [1] Before feeding packets to GRO, and calling napi_complete() we must first grab NAPI_STATE_SCHED. [1] WARNING: CPU: 0 PID: 3612 at net/core/dev.c:6076 napi_complete_done+0x45b/0x880 net/core/dev.c:6076 Modules linked in: CPU: 0 PID: 3612 Comm: syz-executor408 Not tainted 6.1.0-rc3-syzkaller-00175-g1118b2049d77 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 RIP: 0010:napi_complete_done+0x45b/0x880 net/core/dev.c:6076 Code: c1 ea 03 0f b6 14 02 4c 89 f0 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 24 04 00 00 41 89 5d 1c e9 73 fc ff ff e8 b5 53 22 fa <0f> 0b e9 82 fe ff ff e8 a9 53 22 fa 48 8b 5c 24 08 31 ff 48 89 de RSP: 0018:ffffc90003c4f920 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000030 RCX: 0000000000000000 RDX: ffff8880251c0000 RSI: ffffffff875a58db RDI: 0000000000000007 RBP: 0000000000000001 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff888072d02628 R13: ffff888072d02618 R14: ffff888072d02634 R15: 0000000000000000 FS: 0000555555f13300(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055c44d3892b8 CR3: 00000000172d2000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: napi_complete include/linux/netdevice.h:510 [inline] tun_get_user+0x206d/0x3a60 drivers/net/tun.c:1980 tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2027 call_write_iter include/linux/fs.h:2191 [inline] do_iter_readv_writev+0x20b/0x3b0 fs/read_write.c:735 do_iter_write+0x182/0x700 fs/read_write.c:861 vfs_writev+0x1aa/0x630 fs/read_write.c:934 do_writev+0x133/0x2f0 fs/read_write.c:977 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f37021a3c19 Fixes: 1118b2049d77 ("net: tun: Fix memory leaks of napi_get_frags") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Wang Yufen Link: https://lore.kernel.org/r/20221107180011.188437-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index eb12f3136a54..7a3ab3427369 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1967,18 +1967,25 @@ drop: skb_headlen(skb)); if (unlikely(headlen > skb_headlen(skb))) { + WARN_ON_ONCE(1); + err = -ENOMEM; dev_core_stats_rx_dropped_inc(tun->dev); +napi_busy: napi_free_frags(&tfile->napi); rcu_read_unlock(); mutex_unlock(&tfile->napi_mutex); - WARN_ON(1); - return -ENOMEM; + return err; } - local_bh_disable(); - napi_gro_frags(&tfile->napi); - napi_complete(&tfile->napi); - local_bh_enable(); + if (likely(napi_schedule_prep(&tfile->napi))) { + local_bh_disable(); + napi_gro_frags(&tfile->napi); + napi_complete(&tfile->napi); + local_bh_enable(); + } else { + err = -EBUSY; + goto napi_busy; + } mutex_unlock(&tfile->napi_mutex); } else if (tfile->napi_enabled) { struct sk_buff_head *queue = &tfile->sk.sk_write_queue; From b06334919c7a068d54ba5b219c05e919d89943f7 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 7 Nov 2022 18:14:43 +0800 Subject: [PATCH 294/328] net: nixge: disable napi when enable interrupts failed in nixge_open() When failed to enable interrupts in nixge_open() for opening device, napi isn't disabled. When open nixge device next time, it will reports a invalid opcode issue. Fix it. Only be compiled, not be tested. Fixes: 492caffa8a1a ("net: ethernet: nixge: Add support for National Instruments XGE netdev") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221107101443.120205-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ni/nixge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 3db4a2431741..19d043b593cc 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -900,6 +900,7 @@ static int nixge_open(struct net_device *ndev) err_rx_irq: free_irq(priv->tx_irq, ndev); err_tx_irq: + napi_disable(&priv->napi); phy_stop(phy); phy_disconnect(phy); tasklet_kill(&priv->dma_err_tasklet); From 742c60e1285ca40642e988f7e3db92232171b27d Mon Sep 17 00:00:00 2001 From: Nick Child Date: Mon, 7 Nov 2022 14:32:15 -0600 Subject: [PATCH 295/328] ibmveth: Reduce default tx queues to 8 Previously, the default number of transmit queues was 16. Due to resource concerns, set to 8 queues instead. Still allow the user to set more queues (max 16) if they like. Since the driver is virtualized away from the physical NIC, the purpose of multiple queues is purely to allow for parallel calls to the hypervisor. Therefore, there is no noticeable effect on performance by reducing queue count to 8. Fixes: d926793c1de9 ("ibmveth: Implement multi queue on xmit") Reported-by: Dave Taht Signed-off-by: Nick Child Link: https://lore.kernel.org/r/20221107203215.58206-1-nnac123@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmveth.c | 3 ++- drivers/net/ethernet/ibm/ibmveth.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 3b14dc93f59d..5b96cd94dcd2 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1757,7 +1757,8 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) kobject_uevent(kobj, KOBJ_ADD); } - rc = netif_set_real_num_tx_queues(netdev, ibmveth_real_max_tx_queues()); + rc = netif_set_real_num_tx_queues(netdev, min(num_online_cpus(), + IBMVETH_DEFAULT_QUEUES)); if (rc) { netdev_dbg(netdev, "failed to set number of tx queues rc=%d\n", rc); diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index daf6f615c03f..115d4c45aa77 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -100,6 +100,7 @@ static inline long h_illan_attributes(unsigned long unit_address, #define IBMVETH_MAX_BUF_SIZE (1024 * 128) #define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64) #define IBMVETH_MAX_QUEUES 16U +#define IBMVETH_DEFAULT_QUEUES 8U static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; static int pool_count[] = { 256, 512, 256, 256, 256 }; From 58bb78ce02269c0cf5b1f2bd2e4a605500b44c6b Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 9 Nov 2022 10:06:04 +0100 Subject: [PATCH 296/328] selftests: netfilter: Fix and review rpath.sh Address a few problems with the initial test script version: * On systems with ip6tables but no ip6tables-legacy, testing for ip6tables was disabled by accident. * Firewall setup phase did not respect possibly unavailable tools. * Consistently call nft via '$nft'. Fixes: 6e31ce831c63b ("selftests: netfilter: Test reverse path filtering") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/rpath.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/netfilter/rpath.sh index 2d8da7bd8ab7..f7311e66d219 100755 --- a/tools/testing/selftests/netfilter/rpath.sh +++ b/tools/testing/selftests/netfilter/rpath.sh @@ -15,7 +15,7 @@ fi if ip6tables-legacy --version >/dev/null 2>&1; then ip6tables='ip6tables-legacy' -elif ! ip6tables --version >/dev/null 2>&1; then +elif ip6tables --version >/dev/null 2>&1; then ip6tables='ip6tables' else ip6tables='' @@ -62,9 +62,11 @@ ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad # firewall matches to test -ip netns exec "$ns2" "$iptables" -t raw -A PREROUTING -s 192.168.0.0/16 -m rpfilter -ip netns exec "$ns2" "$ip6tables" -t raw -A PREROUTING -s fec0::/16 -m rpfilter -ip netns exec "$ns2" nft -f - < Date: Mon, 7 Nov 2022 13:04:49 +0530 Subject: [PATCH 297/328] net: wwan: iosm: fix memory leak in ipc_pcie_read_bios_cfg ipc_pcie_read_bios_cfg() is using the acpi_evaluate_dsm() to obtain the wwan power state configuration from BIOS but is not freeing the acpi_object. The acpi_evaluate_dsm() returned acpi_object to be freed. Free the acpi_object after use. Fixes: 7e98d785ae61 ("net: iosm: entry point") Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_pcie.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index 31f57b986df2..97cb6846c6ae 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -232,6 +232,7 @@ static void ipc_pcie_config_init(struct iosm_pcie *ipc_pcie) */ static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev) { + enum ipc_pcie_sleep_state sleep_state = IPC_PCIE_D0L12; union acpi_object *object; acpi_handle handle_acpi; @@ -242,12 +243,16 @@ static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev) } object = acpi_evaluate_dsm(handle_acpi, &wwan_acpi_guid, 0, 3, NULL); + if (!object) + goto default_ret; - if (object && object->integer.value == 3) - return IPC_PCIE_D3L2; + if (object->integer.value == 3) + sleep_state = IPC_PCIE_D3L2; + + kfree(object); default_ret: - return IPC_PCIE_D0L12; + return sleep_state; } static int ipc_pcie_probe(struct pci_dev *pci, From 035e3befc191347331dd2530c3686e05a8acfbb2 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Mon, 7 Nov 2022 13:05:02 +0530 Subject: [PATCH 298/328] net: wwan: iosm: fix driver not working with INTEL_IOMMU disabled With INTEL_IOMMU disable config or by forcing intel_iommu=off from grub some of the features of IOSM driver like browsing, flashing & coredump collection is not working. When driver calls DMA API - dma_map_single() for tx transfers. It is resulting in dma mapping error. Set the device DMA addressing capabilities using dma_set_mask() and remove the INTEL_IOMMU dependency in kconfig so that driver follows the platform config either INTEL_IOMMU enable or disable. Fixes: f7af616c632e ("net: iosm: infrastructure") Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/Kconfig | 2 +- drivers/net/wwan/iosm/iosm_ipc_pcie.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig index 3486ffe94ac4..ac4d73b5626f 100644 --- a/drivers/net/wwan/Kconfig +++ b/drivers/net/wwan/Kconfig @@ -94,7 +94,7 @@ config RPMSG_WWAN_CTRL config IOSM tristate "IOSM Driver for Intel M.2 WWAN Device" - depends on INTEL_IOMMU + depends on PCI select NET_DEVLINK select RELAY if WWAN_DEBUGFS help diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index 97cb6846c6ae..d3d34d1c4704 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -259,6 +259,7 @@ static int ipc_pcie_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { struct iosm_pcie *ipc_pcie = kzalloc(sizeof(*ipc_pcie), GFP_KERNEL); + int ret; pr_debug("Probing device 0x%X from the vendor 0x%X", pci_id->device, pci_id->vendor); @@ -291,6 +292,12 @@ static int ipc_pcie_probe(struct pci_dev *pci, goto pci_enable_fail; } + ret = dma_set_mask(ipc_pcie->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(ipc_pcie->dev, "Could not set PCI DMA mask: %d", ret); + return ret; + } + ipc_pcie_config_aspm(ipc_pcie); dev_dbg(ipc_pcie->dev, "PCIe device enabled."); From 02d2d2ea4a3bc2391f6ac31f6854da83e8a63829 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Mon, 7 Nov 2022 13:05:13 +0530 Subject: [PATCH 299/328] net: wwan: iosm: fix invalid mux header type Data stall seen during peak DL throughput test & packets are dropped by mux layer due to invalid header type in datagram. During initlization Mux aggregration protocol is set to default UL/DL size and TD count of Mux lite protocol. This configuration mismatch between device and driver is resulting in data stall/packet drops. Override the UL/DL size and TD count for Mux aggregation protocol. Fixes: 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support") Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_imem_ops.c | 8 ++++++++ drivers/net/wwan/iosm/iosm_ipc_mux.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c index b7f9237dedf7..66b90cc4c346 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem_ops.c @@ -91,6 +91,14 @@ void ipc_imem_wwan_channel_init(struct iosm_imem *ipc_imem, } ipc_chnl_cfg_get(&chnl_cfg, ipc_imem->nr_of_channels); + + if (ipc_imem->mmio->mux_protocol == MUX_AGGREGATION && + ipc_imem->nr_of_channels == IPC_MEM_IP_CHL_ID_0) { + chnl_cfg.ul_nr_of_entries = IPC_MEM_MAX_TDS_MUX_AGGR_UL; + chnl_cfg.dl_nr_of_entries = IPC_MEM_MAX_TDS_MUX_AGGR_DL; + chnl_cfg.dl_buf_size = IPC_MEM_MAX_ADB_BUF_SIZE; + } + ipc_imem_channel_init(ipc_imem, IPC_CTYPE_WWAN, chnl_cfg, IRQ_MOD_OFF); diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.h b/drivers/net/wwan/iosm/iosm_ipc_mux.h index cd9d74cc097f..9968bb885c1f 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mux.h @@ -10,6 +10,7 @@ #define IPC_MEM_MAX_UL_DG_ENTRIES 100 #define IPC_MEM_MAX_TDS_MUX_AGGR_UL 60 +#define IPC_MEM_MAX_TDS_MUX_AGGR_DL 60 #define IPC_MEM_MAX_ADB_BUF_SIZE (16 * 1024) #define IPC_MEM_MAX_UL_ADB_BUF_SIZE IPC_MEM_MAX_ADB_BUF_SIZE From 980ec04a88c9f0046c1da65833fb77b2ffa34b04 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Mon, 7 Nov 2022 13:05:24 +0530 Subject: [PATCH 300/328] net: wwan: iosm: fix kernel test robot reported errors Include linux/vmalloc.h in iosm_ipc_coredump.c & iosm_ipc_devlink.c to resolve kernel test robot errors. Reported-by: kernel test robot Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_coredump.c | 1 + drivers/net/wwan/iosm/iosm_ipc_devlink.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/wwan/iosm/iosm_ipc_coredump.c b/drivers/net/wwan/iosm/iosm_ipc_coredump.c index 9acd87724c9d..26ca30476f40 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_coredump.c +++ b/drivers/net/wwan/iosm/iosm_ipc_coredump.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2020-2021 Intel Corporation. */ +#include #include "iosm_ipc_coredump.h" diff --git a/drivers/net/wwan/iosm/iosm_ipc_devlink.c b/drivers/net/wwan/iosm/iosm_ipc_devlink.c index 17da85a8f337..2fe724d623c0 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_devlink.c +++ b/drivers/net/wwan/iosm/iosm_ipc_devlink.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2020-2021 Intel Corporation. */ +#include #include "iosm_ipc_chnl_cfg.h" #include "iosm_ipc_coredump.h" From 15f8f168952f54d3c86d734dc764f20844e423ac Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 2 Nov 2022 23:55:37 -0700 Subject: [PATCH 301/328] net/mlx5: Bridge, verify LAG state when adding bond to bridge Mlx5 LAG is initialized asynchronously on a workqueue which means that for a brief moment after setting mlx5 UL representors as lower devices of a bond netdevice the LAG itself is not fully initialized in the driver. When adding such bond device to a bridge mlx5 bridge code will not consider it as offload-capable, skip creating necessary bookkeeping and fail any further bridge offload-related commands with it (setting VLANs, offloading FDBs, etc.). In order to make the error explicit during bridge initialization stage implement the code that detects such condition during NETDEV_PRECHANGEUPPER event and returns an error. Fixes: ff9b7521468b ("net/mlx5: Bridge, support LAG") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/rep/bridge.c | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 39ef2a2561a3..8099a21e674c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -164,6 +164,36 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr return err; } +static int +mlx5_esw_bridge_changeupper_validate_netdev(void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info = ptr; + struct net_device *upper = info->upper_dev; + struct net_device *lower; + struct list_head *iter; + + if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev)) + return 0; + + netdev_for_each_lower_dev(dev, lower, iter) { + struct mlx5_core_dev *mdev; + struct mlx5e_priv *priv; + + if (!mlx5e_eswitch_rep(lower)) + continue; + + priv = netdev_priv(lower); + mdev = priv->mdev; + if (!mlx5_lag_is_active(mdev)) + return -EAGAIN; + if (!mlx5_lag_is_shared_fdb(mdev)) + return -EOPNOTSUPP; + } + + return 0; +} + static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -171,6 +201,7 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, switch (event) { case NETDEV_PRECHANGEUPPER: + err = mlx5_esw_bridge_changeupper_validate_netdev(ptr); break; case NETDEV_CHANGEUPPER: From 2808b37b59288ad8f1897e3546c2296df3384b65 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Wed, 2 Nov 2022 23:55:38 -0700 Subject: [PATCH 302/328] net/mlx5: Allow async trigger completion execution on single CPU systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a single CPU system, the kernel thread executing mlx5_cmd_flush() never releases the CPU but calls down_trylock(&cmd→sem) in a busy loop. On a single processor system, this leads to a deadlock as the kernel thread which executes mlx5_cmd_invoke() never gets scheduled. Fix this, by adding the cond_resched() call to the loop, allow the command completion kernel thread to execute. Fixes: 8e715cd613a1 ("net/mlx5: Set command entry semaphore up once got index free") Signed-off-by: Alexander Schmidt Signed-off-by: Roy Novich Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 46ba4c2faad2..2e0d59ca62b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1770,12 +1770,17 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) struct mlx5_cmd *cmd = &dev->cmd; int i; - for (i = 0; i < cmd->max_reg_cmds; i++) - while (down_trylock(&cmd->sem)) + for (i = 0; i < cmd->max_reg_cmds; i++) { + while (down_trylock(&cmd->sem)) { mlx5_cmd_trigger_completions(dev); + cond_resched(); + } + } - while (down_trylock(&cmd->pages_sem)) + while (down_trylock(&cmd->pages_sem)) { mlx5_cmd_trigger_completions(dev); + cond_resched(); + } /* Unlock cmdif */ up(&cmd->pages_sem); From e12de39c07a7872c1ac7250311bb60b74ff29f25 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 2 Nov 2022 23:55:39 -0700 Subject: [PATCH 303/328] net/mlx5: E-switch, Set to legacy mode if failed to change switchdev mode No need to rollback to the other mode because probably will fail again. Just set to legacy mode and clear fdb table created flag. So that fdb table will not be cleared again. Fixes: f019679ea5f2 ("net/mlx5: E-switch, Remove dependency between sriov and eswitch mode") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 14 ++++++++------ .../mellanox/mlx5/core/eswitch_offloads.c | 18 +++--------------- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index c59107fa9e6d..2169486c4bfb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1387,12 +1387,14 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); - esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED; - if (esw->mode == MLX5_ESWITCH_OFFLOADS) - esw_offloads_disable(esw); - else if (esw->mode == MLX5_ESWITCH_LEGACY) - esw_legacy_disable(esw); - mlx5_esw_acls_ns_cleanup(esw); + if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) { + esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED; + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + esw_offloads_disable(esw); + else if (esw->mode == MLX5_ESWITCH_LEGACY) + esw_legacy_disable(esw); + mlx5_esw_acls_ns_cleanup(esw); + } if (esw->mode == MLX5_ESWITCH_OFFLOADS) devl_rate_nodes_destroy(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4e50df3139c6..728ca9f2bb9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2310,7 +2310,7 @@ out_free: static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - int err, err1; + int err; esw->mode = MLX5_ESWITCH_OFFLOADS; err = mlx5_eswitch_enable_locked(esw, esw->dev->priv.sriov.num_vfs); @@ -2318,11 +2318,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to offloads"); esw->mode = MLX5_ESWITCH_LEGACY; - err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); - if (err1) { - NL_SET_ERR_MSG_MOD(extack, - "Failed setting eswitch back to legacy"); - } mlx5_rescan_drivers(esw->dev); } if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { @@ -3389,19 +3384,12 @@ err_metadata: static int esw_offloads_stop(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - int err, err1; + int err; esw->mode = MLX5_ESWITCH_LEGACY; err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); - if (err) { + if (err) NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); - esw->mode = MLX5_ESWITCH_OFFLOADS; - err1 = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); - if (err1) { - NL_SET_ERR_MSG_MOD(extack, - "Failed setting eswitch back to offloads"); - } - } return err; } From 7d167b4a4c810919ba1affd02fc6b7f40e7e6eeb Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 2 Nov 2022 23:55:40 -0700 Subject: [PATCH 304/328] net/mlx5: fw_reset: Don't try to load device in case PCI isn't working In case PCI reads fail after unload, there is no use in trying to load the device. Fixes: 5ec697446f46 ("net/mlx5: Add support for devlink reload action fw activate") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 07c583996c29..9d908a0ccfef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -152,7 +152,8 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) mlx5_unload_one(dev); if (mlx5_health_wait_pci_up(dev)) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); - mlx5_load_one(dev, false); + else + mlx5_load_one(dev, false); devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); From f9c955b4fe5c8626d11b8a9b93ccc0ba77358fec Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 2 Nov 2022 23:55:42 -0700 Subject: [PATCH 305/328] net/mlx5e: Add missing sanity checks for max TX WQE size The commit cited below started using the firmware capability for the maximum TX WQE size. This commit adds an important check to verify that the driver doesn't attempt to exceed this capability, and also restores another check mistakenly removed in the cited commit (a WQE must not exceed the page size). Fixes: c27bd1718c06 ("net/mlx5e: Read max WQEBBs on the SQ from firmware") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 24 ++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++++++ .../net/ethernet/mellanox/mlx5/core/en_tx.c | 5 ++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index cb164b62f543..853f312cd757 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -11,6 +11,27 @@ #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) +/* IPSEC inline data includes: + * 1. ESP trailer: up to 255 bytes of padding, 1 byte for pad length, 1 byte for + * next header. + * 2. ESP authentication data: 16 bytes for ICV. + */ +#define MLX5E_MAX_TX_IPSEC_DS DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + \ + 255 + 1 + 1 + 16, MLX5_SEND_WQE_DS) + +/* 366 should be big enough to cover all L2, L3 and L4 headers with possible + * encapsulations. + */ +#define MLX5E_MAX_TX_INLINE_DS DIV_ROUND_UP(366 - INL_HDR_START_SZ + VLAN_HLEN, \ + MLX5_SEND_WQE_DS) + +/* Sync the calculation with mlx5e_sq_calc_wqe_attr. */ +#define MLX5E_MAX_TX_WQEBBS DIV_ROUND_UP(MLX5E_TX_WQE_EMPTY_DS_COUNT + \ + MLX5E_MAX_TX_INLINE_DS + \ + MLX5E_MAX_TX_IPSEC_DS + \ + MAX_SKB_FRAGS + 1, \ + MLX5_SEND_WQEBB_NUM_DS) + #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) static inline @@ -424,6 +445,8 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size) { + WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < mlx5e_get_max_sq_wqebbs(mdev)); + /* A WQE must not cross the page boundary, hence two conditions: * 1. Its size must not exceed the page size. * 2. If the WQE size is X, and the space remaining in a page is less @@ -436,7 +459,6 @@ static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_si "wqe_size %u is greater than max SQ WQEBBs %u", wqe_size, mlx5e_get_max_sq_wqebbs(mdev)); - return MLX5E_STOP_ROOM(wqe_size); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 364f04309149..e3a4f01bcceb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5694,6 +5694,13 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) mlx5e_fs_set_state_destroy(priv->fs, !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + /* Validate the max_wqe_size_sq capability. */ + if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) { + mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %lu\n", + mlx5e_get_max_sq_wqebbs(priv->mdev), MLX5E_MAX_TX_WQEBBS); + return -EIO; + } + /* max number of channels may have changed */ max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile); if (priv->channels.params.num_channels > max_nch) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 6adca01fbdc9..f7897ddb29c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -305,6 +305,8 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at u16 ds_cnt_inl = 0; u16 ds_cnt_ids = 0; + /* Sync the calculation with MLX5E_MAX_TX_WQEBBS. */ + if (attr->insz) ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz, MLX5_SEND_WQE_DS); @@ -317,6 +319,9 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at inl += VLAN_HLEN; ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); + if (WARN_ON_ONCE(ds_cnt_inl > MLX5E_MAX_TX_INLINE_DS)) + netdev_warn(skb->dev, "ds_cnt_inl = %u > max %u\n", ds_cnt_inl, + (u16)MLX5E_MAX_TX_INLINE_DS); ds_cnt += ds_cnt_inl; } From 8d4b475e9d0f100c3920d8bf7d392a6dac88583e Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 2 Nov 2022 23:55:43 -0700 Subject: [PATCH 306/328] net/mlx5e: Fix usage of DMA sync API DMA sync functions should use the same direction that was used by DMA mapping. Use DMA_BIDIRECTIONAL for XDP_TX from regular RQ, which reuses the same mapping that was used for RX, and DMA_TO_DEVICE for XDP_TX from XSK RQ and XDP_REDIRECT, which establish a new mapping in this direction. On the RX side, use the same direction that was used when setting up the mapping (DMA_BIDIRECTIONAL for XDP, DMA_FROM_DEVICE otherwise). Also don't skip sync for device when establishing a DMA_FROM_DEVICE mapping for RX, as some architectures (ARM) may require invalidating caches before the device can use the mapping. It doesn't break the bugfix made in commit 0b7cfa4082fb ("net/mlx5e: Fix page DMA map/unmap attributes"), since the bug happened on unmap. Fixes: 0b7cfa4082fb ("net/mlx5e: Fix page DMA map/unmap attributes") Fixes: b5503b994ed5 ("net/mlx5e: XDP TX forwarding support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 +-- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 27 ++++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 4685c652c97e..20507ef2f956 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -117,7 +117,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, xdpi.page.rq = rq; dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); - dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_TO_DEVICE); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL); if (unlikely(xdp_frame_has_frags(xdpf))) { sinfo = xdp_get_shared_info_from_frame(xdpf); @@ -131,7 +131,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, skb_frag_off(frag); len = skb_frag_size(frag); dma_sync_single_for_device(sq->pdev, addr, len, - DMA_TO_DEVICE); + DMA_BIDIRECTIONAL); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 58084650151f..a61a43fc8d5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -266,7 +266,7 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_uni addr = page_pool_get_dma_addr(au->page); /* Non-XSK always uses PAGE_SIZE. */ - dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, DMA_FROM_DEVICE); + dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir); return true; } @@ -282,8 +282,7 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_u return -ENOMEM; /* Non-XSK always uses PAGE_SIZE. */ - addr = dma_map_page_attrs(rq->pdev, au->page, 0, PAGE_SIZE, - rq->buff.map_dir, DMA_ATTR_SKIP_CPU_SYNC); + addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, addr))) { page_pool_recycle_direct(rq->page_pool, au->page); au->page = NULL; @@ -427,14 +426,15 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, { dma_addr_t addr = page_pool_get_dma_addr(au->page); - dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, + rq->buff.map_dir); page_ref_inc(au->page); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, au->page, frag_offset, len, truesize); } static inline void -mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, +mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, struct page *page, dma_addr_t addr, int offset_from, int dma_offset, u32 headlen) { @@ -442,7 +442,8 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, /* Aligning len to sizeof(long) optimizes memcpy performance */ unsigned int len = ALIGN(headlen, sizeof(long)); - dma_sync_single_for_cpu(pdev, addr + dma_offset, len, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len, + rq->buff.map_dir); skb_copy_to_linear_data(skb, from, len); } @@ -1538,7 +1539,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, addr = page_pool_get_dma_addr(au->page); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, - frag_size, DMA_FROM_DEVICE); + frag_size, rq->buff.map_dir); net_prefetch(data); prog = rcu_dereference(rq->xdp_prog); @@ -1587,7 +1588,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi addr = page_pool_get_dma_addr(au->page); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, - rq->buff.frame0_sz, DMA_FROM_DEVICE); + rq->buff.frame0_sz, rq->buff.map_dir); net_prefetchw(va); /* xdp_frame data area */ net_prefetch(va + rx_headroom); @@ -1608,7 +1609,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi addr = page_pool_get_dma_addr(au->page); dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, - frag_consumed_bytes, DMA_FROM_DEVICE); + frag_consumed_bytes, rq->buff.map_dir); if (!xdp_buff_has_frags(&xdp)) { /* Init on the first fragment to avoid cold cache access @@ -1905,7 +1906,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset); /* copy header */ addr = page_pool_get_dma_addr(head_au->page); - mlx5e_copy_skb_header(rq->pdev, skb, head_au->page, addr, + mlx5e_copy_skb_header(rq, skb, head_au->page, addr, head_offset, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; @@ -1939,7 +1940,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, addr = page_pool_get_dma_addr(au->page); dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, - frag_size, DMA_FROM_DEVICE); + frag_size, rq->buff.map_dir); net_prefetch(data); prog = rcu_dereference(rq->xdp_prog); @@ -1987,7 +1988,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { /* build SKB around header */ - dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE); + dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir); prefetchw(hdr); prefetch(data); skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0); @@ -2009,7 +2010,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, } prefetchw(skb->data); - mlx5e_copy_skb_header(rq->pdev, skb, head->page, head->addr, + mlx5e_copy_skb_header(rq, skb, head->page, head->addr, head_offset + rx_headroom, rx_headroom, head_size); /* skb linear part was allocated with headlen and aligned to long */ From 08912ea799cd2a43b8999457e316967fe4e2f327 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 2 Nov 2022 23:55:44 -0700 Subject: [PATCH 307/328] net/mlx5e: Fix tc acts array not to be dependent on enum order The tc acts array should not be dependent on kernel internal flow action id enum. Fix the array initialization. Fixes: fad547906980 ("net/mlx5e: Add tc action infrastructure") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc/act/act.c | 92 +++++++------------ 1 file changed, 32 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index 305fde62a78d..3337241cfd84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -6,70 +6,42 @@ #include "en/tc_priv.h" #include "mlx5_core.h" -/* Must be aligned with enum flow_action_id. */ static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = { - &mlx5e_tc_act_accept, - &mlx5e_tc_act_drop, - &mlx5e_tc_act_trap, - &mlx5e_tc_act_goto, - &mlx5e_tc_act_mirred, - &mlx5e_tc_act_mirred, - &mlx5e_tc_act_redirect_ingress, - NULL, /* FLOW_ACTION_MIRRED_INGRESS, */ - &mlx5e_tc_act_vlan, - &mlx5e_tc_act_vlan, - &mlx5e_tc_act_vlan_mangle, - &mlx5e_tc_act_tun_encap, - &mlx5e_tc_act_tun_decap, - &mlx5e_tc_act_pedit, - &mlx5e_tc_act_pedit, - &mlx5e_tc_act_csum, - NULL, /* FLOW_ACTION_MARK, */ - &mlx5e_tc_act_ptype, - NULL, /* FLOW_ACTION_PRIORITY, */ - NULL, /* FLOW_ACTION_WAKE, */ - NULL, /* FLOW_ACTION_QUEUE, */ - &mlx5e_tc_act_sample, - &mlx5e_tc_act_police, - &mlx5e_tc_act_ct, - NULL, /* FLOW_ACTION_CT_METADATA, */ - &mlx5e_tc_act_mpls_push, - &mlx5e_tc_act_mpls_pop, - NULL, /* FLOW_ACTION_MPLS_MANGLE, */ - NULL, /* FLOW_ACTION_GATE, */ - NULL, /* FLOW_ACTION_PPPOE_PUSH, */ - NULL, /* FLOW_ACTION_JUMP, */ - NULL, /* FLOW_ACTION_PIPE, */ - &mlx5e_tc_act_vlan, - &mlx5e_tc_act_vlan, + [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept, + [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop, + [FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap, + [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto, + [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred, + [FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred, + [FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress, + [FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_POP] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_MANGLE] = &mlx5e_tc_act_vlan_mangle, + [FLOW_ACTION_TUNNEL_ENCAP] = &mlx5e_tc_act_tun_encap, + [FLOW_ACTION_TUNNEL_DECAP] = &mlx5e_tc_act_tun_decap, + [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum, + [FLOW_ACTION_PTYPE] = &mlx5e_tc_act_ptype, + [FLOW_ACTION_SAMPLE] = &mlx5e_tc_act_sample, + [FLOW_ACTION_POLICE] = &mlx5e_tc_act_police, + [FLOW_ACTION_CT] = &mlx5e_tc_act_ct, + [FLOW_ACTION_MPLS_PUSH] = &mlx5e_tc_act_mpls_push, + [FLOW_ACTION_MPLS_POP] = &mlx5e_tc_act_mpls_pop, + [FLOW_ACTION_VLAN_PUSH_ETH] = &mlx5e_tc_act_vlan, + [FLOW_ACTION_VLAN_POP_ETH] = &mlx5e_tc_act_vlan, }; -/* Must be aligned with enum flow_action_id. */ static struct mlx5e_tc_act *tc_acts_nic[NUM_FLOW_ACTIONS] = { - &mlx5e_tc_act_accept, - &mlx5e_tc_act_drop, - NULL, /* FLOW_ACTION_TRAP, */ - &mlx5e_tc_act_goto, - &mlx5e_tc_act_mirred_nic, - NULL, /* FLOW_ACTION_MIRRED, */ - NULL, /* FLOW_ACTION_REDIRECT_INGRESS, */ - NULL, /* FLOW_ACTION_MIRRED_INGRESS, */ - NULL, /* FLOW_ACTION_VLAN_PUSH, */ - NULL, /* FLOW_ACTION_VLAN_POP, */ - NULL, /* FLOW_ACTION_VLAN_MANGLE, */ - NULL, /* FLOW_ACTION_TUNNEL_ENCAP, */ - NULL, /* FLOW_ACTION_TUNNEL_DECAP, */ - &mlx5e_tc_act_pedit, - &mlx5e_tc_act_pedit, - &mlx5e_tc_act_csum, - &mlx5e_tc_act_mark, - NULL, /* FLOW_ACTION_PTYPE, */ - NULL, /* FLOW_ACTION_PRIORITY, */ - NULL, /* FLOW_ACTION_WAKE, */ - NULL, /* FLOW_ACTION_QUEUE, */ - NULL, /* FLOW_ACTION_SAMPLE, */ - NULL, /* FLOW_ACTION_POLICE, */ - &mlx5e_tc_act_ct, + [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept, + [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop, + [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto, + [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred_nic, + [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit, + [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum, + [FLOW_ACTION_MARK] = &mlx5e_tc_act_mark, + [FLOW_ACTION_CT] = &mlx5e_tc_act_ct, }; /** From 9e06430841363a1d2932d546fdce1cc5edb3c2a0 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 2 Nov 2022 23:55:45 -0700 Subject: [PATCH 308/328] net/mlx5e: TC, Fix wrong rejection of packet-per-second policing In the bellow commit, we added support for PPS policing without removing the check which block offload of such cases. Fix it by removing this check. Fixes: a8d52b024d6d ("net/mlx5e: TC, Support offloading police action") Signed-off-by: Jianbo Liu Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index dd6fea9e9a5b..372dfb89e396 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4758,12 +4758,6 @@ int mlx5e_policer_validate(const struct flow_action *action, return -EOPNOTSUPP; } - if (act->police.rate_pkt_ps) { - NL_SET_ERR_MSG_MOD(extack, - "QoS offload not support packets per second"); - return -EOPNOTSUPP; - } - return 0; } From f4f4096b410e8d31c3f07f39de3b17d144edd53d Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 2 Nov 2022 23:55:46 -0700 Subject: [PATCH 309/328] net/mlx5e: E-Switch, Fix comparing termination table instance The pkt_reformat pointer being saved under flow_act and not dest attribute in the termination table instance. Fix the comparison pointers. Also fix returning success if one pkt_reformat pointer is null and the other is not. Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink") Signed-off-by: Roi Dayan Reviewed-by: Chris Mi Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads_termtbl.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index ee568bf34ae2..108a3503f413 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -30,9 +30,9 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, sizeof(dest->vport.num), hash); hash = jhash((const void *)&dest->vport.vhca_id, sizeof(dest->vport.num), hash); - if (dest->vport.pkt_reformat) - hash = jhash(dest->vport.pkt_reformat, - sizeof(*dest->vport.pkt_reformat), + if (flow_act->pkt_reformat) + hash = jhash(flow_act->pkt_reformat, + sizeof(*flow_act->pkt_reformat), hash); return hash; } @@ -53,9 +53,11 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, if (ret) return ret; - return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ? - memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat, - sizeof(*dest1->vport.pkt_reformat)) : 0; + if (flow_act1->pkt_reformat && flow_act2->pkt_reformat) + return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat, + sizeof(*flow_act1->pkt_reformat)); + + return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat); } static int From 7f1a6d4b9e820b08479a07f6e14c36ccfa641751 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 2 Nov 2022 23:55:47 -0700 Subject: [PATCH 310/328] net/mlx5e: TC, Fix slab-out-of-bounds in parse_tc_actions esw_attr is only allocated if namespace is fdb. BUG: KASAN: slab-out-of-bounds in parse_tc_actions+0xdc6/0x10e0 [mlx5_core] Write of size 4 at addr ffff88815f185b04 by task tc/2135 CPU: 5 PID: 2135 Comm: tc Not tainted 6.1.0-rc2+ #2 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x7d print_report+0x170/0x471 ? parse_tc_actions+0xdc6/0x10e0 [mlx5_core] kasan_report+0xbc/0xf0 ? parse_tc_actions+0xdc6/0x10e0 [mlx5_core] parse_tc_actions+0xdc6/0x10e0 [mlx5_core] Fixes: 94d651739e17 ("net/mlx5e: TC, Fix cloned flow attr instance dests are not zeroed") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 372dfb89e396..5a6aa61ec82a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3633,10 +3633,14 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, attr2->action = 0; attr2->flags = 0; attr2->parse_attr = parse_attr; - attr2->esw_attr->out_count = 0; - attr2->esw_attr->split_count = 0; attr2->dest_chain = 0; attr2->dest_ft = NULL; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { + attr2->esw_attr->out_count = 0; + attr2->esw_attr->split_count = 0; + } + return attr2; } From f23df5220d2bf8d5e639f074b76f206a736d09e1 Mon Sep 17 00:00:00 2001 From: Norbert Zulinski Date: Mon, 10 Oct 2022 10:22:22 -0400 Subject: [PATCH 311/328] ice: Fix spurious interrupt during removal of trusted VF Previously, during removal of trusted VF when VF is down there was number of spurious interrupt equal to number of queues on VF. Add check if VF already has inactive queues. If VF is disabled and has inactive rx queues then do not disable rx queues. Add check in ice_vsi_stop_tx_ring if it's VF's vsi and if VF is disabled. Fixes: efe41860008e ("ice: Fix memory corruption in VF driver") Signed-off-by: Norbert Zulinski Signed-off-by: Mateusz Palczewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_base.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.c | 25 +++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_lib.h | 1 + drivers/net/ethernet/intel/ice/ice_vf_lib.c | 5 ++++- 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 9e36f01dfa4f..e864634d66bc 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -958,7 +958,7 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, * associated to the queue to schedule NAPI handler */ q_vector = ring->q_vector; - if (q_vector) + if (q_vector && !(vsi->vf && ice_is_vf_disabled(vsi->vf))) ice_trigger_sw_intr(hw, q_vector); status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx, diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 938ba8c215cb..7276badfa19e 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2239,6 +2239,31 @@ int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi) return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq); } +/** + * ice_vsi_is_rx_queue_active + * @vsi: the VSI being configured + * + * Return true if at least one queue is active. + */ +bool ice_vsi_is_rx_queue_active(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + int i; + + ice_for_each_rxq(vsi, i) { + u32 rx_reg; + int pf_q; + + pf_q = vsi->rxq_map[i]; + rx_reg = rd32(hw, QRX_CTRL(pf_q)); + if (rx_reg & QRX_CTRL_QENA_STAT_M) + return true; + } + + return false; +} + /** * ice_vsi_is_vlan_pruning_ena - check if VLAN pruning is enabled or not * @vsi: VSI to check whether or not VLAN pruning is enabled. diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index ec4bf0c89857..dcdf69a693e9 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -129,4 +129,5 @@ u16 ice_vsi_num_non_zero_vlans(struct ice_vsi *vsi); bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f); void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f); void ice_init_feature_support(struct ice_pf *pf); +bool ice_vsi_is_rx_queue_active(struct ice_vsi *vsi); #endif /* !_ICE_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 0abeed092de1..1c51778db951 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -576,7 +576,10 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) return -EINVAL; } ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); - ice_vsi_stop_all_rx_rings(vsi); + + if (ice_vsi_is_rx_queue_active(vsi)) + ice_vsi_stop_all_rx_rings(vsi); + dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", vf->vf_id); return 0; From 0e710a3ffd0caaf23b8791b041e8792f252f8e4f Mon Sep 17 00:00:00 2001 From: Michal Jaron Date: Fri, 14 Oct 2022 10:45:37 +0200 Subject: [PATCH 312/328] iavf: Fix VF driver counting VLAN 0 filters VF driver mistakenly counts VLAN 0 filters, when no PF driver counts them. Do not count VLAN 0 filters, when VLAN_V2 is engaged. Counting those filters in, will affect filters size by -1, when sending batched VLAN addition message. Fixes: 968996c070ef ("iavf: Fix VLAN_V2 addition/rejection") Signed-off-by: Przemyslaw Patynowski Signed-off-by: Michal Jaron Signed-off-by: Kamil Maziarz Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 5a9e6563923e..24a701fd140e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -2438,6 +2438,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, list_for_each_entry(f, &adapter->vlan_filter_list, list) { if (f->is_new_vlan) { f->is_new_vlan = false; + if (!f->vlan.vid) + continue; if (f->vlan.tpid == ETH_P_8021Q) set_bit(f->vlan.vid, adapter->vsi.active_cvlans); From 6d47b53fb3f363a74538a1dbd09954af3d8d4131 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 09:15:37 +0800 Subject: [PATCH 313/328] net: cpsw: disable napi in cpsw_ndo_open() When failed to create xdp rxqs or fill rx channels in cpsw_ndo_open() for opening device, napi isn't disabled. When open cpsw device next time, it will report a invalid opcode issue. Compiled tested only. Fixes: d354eb85d618 ("drivers: net: cpsw: dual_emac: simplify napi usage") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109011537.96975-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 709ca6dd6ecb..13c9c2d6b79b 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -854,6 +854,8 @@ static int cpsw_ndo_open(struct net_device *ndev) err_cleanup: if (!cpsw->usage_count) { + napi_disable(&cpsw->napi_rx); + napi_disable(&cpsw->napi_tx); cpdma_ctlr_stop(cpsw->dma); cpsw_destroy_xdp_rxqs(cpsw); } From d75aed1428da787cbe42bc073d76f1354f364d92 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 10:14:51 +0800 Subject: [PATCH 314/328] net: cxgb3_main: disable napi when bind qsets failed in cxgb_up() When failed to bind qsets in cxgb_up() for opening device, napi isn't disabled. When open cxgb3 device next time, it will trigger a BUG_ON() in napi_enable(). Compile tested only. Fixes: 48c4b6dbb7e2 ("cxgb3 - fix port up/down error path") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109021451.121490-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index a52e6b6e2876..9b84c8d8d309 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -1301,6 +1301,7 @@ static int cxgb_up(struct adapter *adap) if (ret < 0) { CH_ERR(adap, "failed to bind qsets, err %d\n", ret); t3_intr_disable(adap); + quiesce_rx(adap); free_irq_resources(adap); err = ret; goto out; From dcea1a8107c04b9521dee1dd37971757a22db162 Mon Sep 17 00:00:00 2001 From: "Tan, Tee Min" Date: Mon, 7 Nov 2022 21:08:11 -0500 Subject: [PATCH 315/328] stmmac: intel: Update PCH PTP clock rate from 200MHz to 204.8MHz Current Intel platform has an output of ~976ms interval when probed on 1 Pulse-per-Second(PPS) hardware pin. The correct PTP clock frequency for PCH GbE should be 204.8MHz instead of 200MHz. PSE GbE PTP clock rate remains at 200MHz. Fixes: 58da0cfa6cf1 ("net: stmmac: create dwmac-intel.c to contain all Intel platform") Signed-off-by: Ling Pei Lee Signed-off-by: Tan, Tee Min Signed-off-by: Voon Weifeng Signed-off-by: Gan Yi Fang Link: https://lore.kernel.org/r/20221108020811.12919-1-yi.fang.gan@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 0a2afc1a3124..7deb1f817dac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -629,7 +629,6 @@ static int ehl_common_data(struct pci_dev *pdev, { plat->rx_queues_to_use = 8; plat->tx_queues_to_use = 8; - plat->clk_ptp_rate = 200000000; plat->use_phy_wol = 1; plat->safety_feat_cfg->tsoee = 1; @@ -654,6 +653,8 @@ static int ehl_sgmii_data(struct pci_dev *pdev, plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; + plat->clk_ptp_rate = 204800000; + return ehl_common_data(pdev, plat); } @@ -667,6 +668,8 @@ static int ehl_rgmii_data(struct pci_dev *pdev, plat->bus_id = 1; plat->phy_interface = PHY_INTERFACE_MODE_RGMII; + plat->clk_ptp_rate = 204800000; + return ehl_common_data(pdev, plat); } @@ -683,6 +686,8 @@ static int ehl_pse0_common_data(struct pci_dev *pdev, plat->bus_id = 2; plat->addr64 = 32; + plat->clk_ptp_rate = 200000000; + intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ); return ehl_common_data(pdev, plat); @@ -722,6 +727,8 @@ static int ehl_pse1_common_data(struct pci_dev *pdev, plat->bus_id = 3; plat->addr64 = 32; + plat->clk_ptp_rate = 200000000; + intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ); return ehl_common_data(pdev, plat); @@ -757,7 +764,7 @@ static int tgl_common_data(struct pci_dev *pdev, { plat->rx_queues_to_use = 6; plat->tx_queues_to_use = 4; - plat->clk_ptp_rate = 200000000; + plat->clk_ptp_rate = 204800000; plat->speed_mode_2500 = intel_speed_mode_2500; plat->safety_feat_cfg->tsoee = 1; From d4072058af4fd8fb4658e7452289042a406a9398 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 8 Nov 2022 09:55:17 +0000 Subject: [PATCH 316/328] mctp: Fix an error handling path in mctp_init() If mctp_neigh_init() return error, the routes resources should be released in the error handling path. Otherwise some resources leak. Fixes: 4d8b9319282a ("mctp: Add neighbour implementation") Signed-off-by: Wei Yongjun Acked-by: Matt Johnston Link: https://lore.kernel.org/r/20221108095517.620115-1-weiyongjun@huaweicloud.com Signed-off-by: Jakub Kicinski --- net/mctp/af_mctp.c | 4 +++- net/mctp/route.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index b6b5e496fa40..fc9e728b6333 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -665,12 +665,14 @@ static __init int mctp_init(void) rc = mctp_neigh_init(); if (rc) - goto err_unreg_proto; + goto err_unreg_routes; mctp_device_init(); return 0; +err_unreg_routes: + mctp_routes_exit(); err_unreg_proto: proto_unregister(&mctp_proto); err_unreg_sock: diff --git a/net/mctp/route.c b/net/mctp/route.c index 2155f15a074c..f9a80b82dc51 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -1400,7 +1400,7 @@ int __init mctp_routes_init(void) return register_pernet_subsys(&mctp_net_ops); } -void __exit mctp_routes_exit(void) +void mctp_routes_exit(void) { unregister_pernet_subsys(&mctp_net_ops); rtnl_unregister(PF_MCTP, RTM_DELROUTE); From c6092ea1e6d7bd12acd881f6aa2b5054cd70e096 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 09:21:00 +0800 Subject: [PATCH 317/328] cxgb4vf: shut down the adapter when t4vf_update_port_info() failed in cxgb4vf_open() When t4vf_update_port_info() failed in cxgb4vf_open(), resources applied during adapter goes up are not cleared. Fix it. Only be compiled, not be tested. Fixes: 18d79f721e0a ("cxgb4vf: Update port information in cxgb4vf_open()") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109012100.99132-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 54db79f4dcfe..63b2bd084130 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -858,7 +858,7 @@ static int cxgb4vf_open(struct net_device *dev) */ err = t4vf_update_port_info(pi); if (err < 0) - return err; + goto err_unwind; /* * Note that this interface is up and start everything up ... From f2d45fdf9a0ed2c94c01c422a0d0add8ffd42099 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 19:46:45 +0800 Subject: [PATCH 318/328] stmmac: dwmac-loongson: fix missing pci_disable_msi() while module exiting pci_enable_msi() has been called in loongson_dwmac_probe(), so pci_disable_msi() needs be called in remove path and error path of probe(). Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") Signed-off-by: Yang Yingliang Signed-off-by: Paolo Abeni --- .../net/ethernet/stmicro/stmmac/dwmac-loongson.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 79fa7870563b..16915b4d9505 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -125,6 +125,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id if (res.irq < 0) { dev_err(&pdev->dev, "IRQ macirq not found\n"); ret = -ENODEV; + goto err_disable_msi; } res.wol_irq = of_irq_get_byname(np, "eth_wake_irq"); @@ -137,9 +138,18 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id if (res.lpi_irq < 0) { dev_err(&pdev->dev, "IRQ eth_lpi not found\n"); ret = -ENODEV; + goto err_disable_msi; } - return stmmac_dvr_probe(&pdev->dev, plat, &res); + ret = stmmac_dvr_probe(&pdev->dev, plat, &res); + if (ret) + goto err_disable_msi; + + return ret; + +err_disable_msi: + pci_disable_msi(pdev); + return ret; } static void loongson_dwmac_remove(struct pci_dev *pdev) @@ -155,6 +165,7 @@ static void loongson_dwmac_remove(struct pci_dev *pdev) break; } + pci_disable_msi(pdev); pci_disable_device(pdev); } From fe5b3ce8b4377e543960220f539b989a927afd8a Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 19:46:46 +0800 Subject: [PATCH 319/328] stmmac: dwmac-loongson: fix missing pci_disable_device() in loongson_dwmac_probe() Add missing pci_disable_device() in the error path in loongson_dwmac_probe(). Fixes: 30bba69d7db4 ("stmmac: pci: Add dwmac support for Loongson") Signed-off-by: Yang Yingliang Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 16915b4d9505..2d480bc49c51 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -97,7 +97,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id continue; ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); if (ret) - return ret; + goto err_disable_device; break; } @@ -108,7 +108,8 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id phy_mode = device_get_phy_mode(&pdev->dev); if (phy_mode < 0) { dev_err(&pdev->dev, "phy_mode not found\n"); - return phy_mode; + ret = phy_mode; + goto err_disable_device; } plat->phy_interface = phy_mode; @@ -149,6 +150,8 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id err_disable_msi: pci_disable_msi(pdev); +err_disable_device: + pci_disable_device(pdev); return ret; } From 7f94d0498f9c763f37172c08059ae91804c3075a Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 8 Nov 2022 19:46:47 +0800 Subject: [PATCH 320/328] stmmac: dwmac-loongson: fix missing of_node_put() while module exiting The node returned by of_get_child_by_name() with refcount decremented, of_node_put() needs be called when finish using it. So add it in the error path in loongson_dwmac_probe() and in loongson_dwmac_remove(). Fixes: 2ae34111fe4e ("stmmac: dwmac-loongson: fix invalid mdio_node") Signed-off-by: Yang Yingliang Signed-off-by: Paolo Abeni --- .../ethernet/stmicro/stmmac/dwmac-loongson.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index 2d480bc49c51..a25c187d3185 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -75,20 +75,24 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id plat->mdio_bus_data = devm_kzalloc(&pdev->dev, sizeof(*plat->mdio_bus_data), GFP_KERNEL); - if (!plat->mdio_bus_data) - return -ENOMEM; + if (!plat->mdio_bus_data) { + ret = -ENOMEM; + goto err_put_node; + } plat->mdio_bus_data->needs_reset = true; } plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL); - if (!plat->dma_cfg) - return -ENOMEM; + if (!plat->dma_cfg) { + ret = -ENOMEM; + goto err_put_node; + } /* Enable pci device */ ret = pci_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); - return ret; + goto err_put_node; } /* Get the base address of device */ @@ -152,13 +156,18 @@ err_disable_msi: pci_disable_msi(pdev); err_disable_device: pci_disable_device(pdev); +err_put_node: + of_node_put(plat->mdio_node); return ret; } static void loongson_dwmac_remove(struct pci_dev *pdev) { + struct net_device *ndev = dev_get_drvdata(&pdev->dev); + struct stmmac_priv *priv = netdev_priv(ndev); int i; + of_node_put(priv->plat->mdio_node); stmmac_dvr_remove(&pdev->dev); for (i = 0; i < PCI_STD_NUM_BARS; i++) { From 1b16b3fdf675cca15a537572bac50cc5354368fc Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 8 Nov 2022 16:34:58 +0100 Subject: [PATCH 321/328] net: phy: mscc: macsec: clear encryption keys when freeing a flow Commit aaab73f8fba4 ("macsec: clear encryption keys from the stack after setting up offload") made sure to clean encryption keys from the stack after setting up offloading, but the MSCC PHY driver made a copy, kept it in the flow data and did not clear it when freeing a flow. Fix this. Fixes: 28c5107aa904 ("net: phy: mscc: macsec support") Signed-off-by: Antoine Tenart Signed-off-by: Paolo Abeni --- drivers/net/phy/mscc/mscc_macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c index ee5b17edca39..f81b077618f4 100644 --- a/drivers/net/phy/mscc/mscc_macsec.c +++ b/drivers/net/phy/mscc/mscc_macsec.c @@ -632,6 +632,7 @@ static void vsc8584_macsec_free_flow(struct vsc8531_private *priv, list_del(&flow->list); clear_bit(flow->index, bitmap); + memzero_explicit(flow->key, sizeof(flow->key)); kfree(flow); } From 879785def0f5e71d54399de0f8a5cb399db14171 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 8 Nov 2022 16:34:59 +0100 Subject: [PATCH 322/328] net: atlantic: macsec: clear encryption keys from the stack Commit aaab73f8fba4 ("macsec: clear encryption keys from the stack after setting up offload") made sure to clean encryption keys from the stack after setting up offloading, but the atlantic driver made a copy and did not clear it. Fix this. [4 Fixes tags below, all part of the same series, no need to split this] Fixes: 9ff40a751a6f ("net: atlantic: MACSec ingress offload implementation") Fixes: b8f8a0b7b5cb ("net: atlantic: MACSec ingress offload HW bindings") Fixes: 27736563ce32 ("net: atlantic: MACSec egress offload implementation") Fixes: 9d106c6dd81b ("net: atlantic: MACSec egress offload HW bindings") Signed-off-by: Antoine Tenart Signed-off-by: Paolo Abeni --- .../net/ethernet/aquantia/atlantic/aq_macsec.c | 2 ++ .../aquantia/atlantic/macsec/macsec_api.c | 18 +++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c index a0180811305d..7eb5851eb95d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c @@ -570,6 +570,7 @@ static int aq_update_txsa(struct aq_nic_s *nic, const unsigned int sc_idx, ret = aq_mss_set_egress_sakey_record(hw, &key_rec, sa_idx); + memzero_explicit(&key_rec, sizeof(key_rec)); return ret; } @@ -899,6 +900,7 @@ static int aq_update_rxsa(struct aq_nic_s *nic, const unsigned int sc_idx, ret = aq_mss_set_ingress_sakey_record(hw, &sa_key_record, sa_idx); + memzero_explicit(&sa_key_record, sizeof(sa_key_record)); return ret; } diff --git a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c index 36c7cf05630a..431924959520 100644 --- a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c +++ b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c @@ -757,6 +757,7 @@ set_ingress_sakey_record(struct aq_hw_s *hw, u16 table_index) { u16 packed_record[18]; + int ret; if (table_index >= NUMROWS_INGRESSSAKEYRECORD) return -EINVAL; @@ -789,9 +790,12 @@ set_ingress_sakey_record(struct aq_hw_s *hw, packed_record[16] = rec->key_len & 0x3; - return set_raw_ingress_record(hw, packed_record, 18, 2, - ROWOFFSET_INGRESSSAKEYRECORD + - table_index); + ret = set_raw_ingress_record(hw, packed_record, 18, 2, + ROWOFFSET_INGRESSSAKEYRECORD + + table_index); + + memzero_explicit(packed_record, sizeof(packed_record)); + return ret; } int aq_mss_set_ingress_sakey_record(struct aq_hw_s *hw, @@ -1739,14 +1743,14 @@ static int set_egress_sakey_record(struct aq_hw_s *hw, ret = set_raw_egress_record(hw, packed_record, 8, 2, ROWOFFSET_EGRESSSAKEYRECORD + table_index); if (unlikely(ret)) - return ret; + goto clear_key; ret = set_raw_egress_record(hw, packed_record + 8, 8, 2, ROWOFFSET_EGRESSSAKEYRECORD + table_index - 32); - if (unlikely(ret)) - return ret; - return 0; +clear_key: + memzero_explicit(packed_record, sizeof(packed_record)); + return ret; } int aq_mss_set_egress_sakey_record(struct aq_hw_s *hw, From 0348c1ab980c1d43fb37b758d4b760990c066cb5 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 10:37:41 +0800 Subject: [PATCH 323/328] ethernet: s2io: disable napi when start nic failed in s2io_card_up() When failed to start nic or add interrupt service routine in s2io_card_up() for opening device, napi isn't disabled. When open s2io device next time, it will trigger a BUG_ON()in napi_enable(). Compile tested only. Fixes: 5f490c968056 ("S2io: Fixed synchronization between scheduling of napi with card reset and close") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109023741.131552-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/neterion/s2io.c | 29 +++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index dcf8212119f9..1d3c4474b7cb 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7128,9 +7128,8 @@ static int s2io_card_up(struct s2io_nic *sp) if (ret) { DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n", dev->name); - s2io_reset(sp); - free_rx_buffers(sp); - return -ENOMEM; + ret = -ENOMEM; + goto err_fill_buff; } DBG_PRINT(INFO_DBG, "Buf in ring:%d is %d:\n", i, ring->rx_bufs_left); @@ -7168,18 +7167,16 @@ static int s2io_card_up(struct s2io_nic *sp) /* Enable Rx Traffic and interrupts on the NIC */ if (start_nic(sp)) { DBG_PRINT(ERR_DBG, "%s: Starting NIC failed\n", dev->name); - s2io_reset(sp); - free_rx_buffers(sp); - return -ENODEV; + ret = -ENODEV; + goto err_out; } /* Add interrupt service routine */ if (s2io_add_isr(sp) != 0) { if (sp->config.intr_type == MSI_X) s2io_rem_isr(sp); - s2io_reset(sp); - free_rx_buffers(sp); - return -ENODEV; + ret = -ENODEV; + goto err_out; } timer_setup(&sp->alarm_timer, s2io_alarm_handle, 0); @@ -7199,6 +7196,20 @@ static int s2io_card_up(struct s2io_nic *sp) } return 0; + +err_out: + if (config->napi) { + if (config->intr_type == MSI_X) { + for (i = 0; i < sp->config.rx_ring_num; i++) + napi_disable(&sp->mac_control.rings[i].napi); + } else { + napi_disable(&sp->napi); + } + } +err_fill_buff: + s2io_reset(sp); + free_rx_buffers(sp); + return ret; } /** From f111606b63ff2282428ffbac0447c871eb957b6c Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 10:54:32 +0800 Subject: [PATCH 324/328] net: mv643xx_eth: disable napi when init rxq or txq failed in mv643xx_eth_open() When failed to init rxq or txq in mv643xx_eth_open() for opening device, napi isn't disabled. When open mv643xx_eth device next time, it will trigger a BUG_ON() in napi_enable(). Compile tested only. Fixes: 2257e05c1705 ("mv643xx_eth: get rid of receive-side locking") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109025432.80900-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/mv643xx_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 707993b445d1..8941f69d93e9 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2481,6 +2481,7 @@ out_free: for (i = 0; i < mp->rxq_count; i++) rxq_deinit(mp->rxq + i); out: + napi_disable(&mp->napi); free_irq(dev->irq, dev); return err; From acce40037041f97baad18142bb253064491ebde3 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 12:40:16 +0800 Subject: [PATCH 325/328] ethernet: tundra: free irq when alloc ring failed in tsi108_open() When alloc tx/rx ring failed in tsi108_open(), it doesn't free irq. Fix it. Fixes: 5e123b844a1c ("[PATCH] Add tsi108/9 On Chip Ethernet device driver support") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109044016.126866-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/tundra/tsi108_eth.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 2cd2afc3fff0..d09d352e1c0a 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1290,12 +1290,15 @@ static int tsi108_open(struct net_device *dev) data->rxring = dma_alloc_coherent(&data->pdev->dev, rxring_size, &data->rxdma, GFP_KERNEL); - if (!data->rxring) + if (!data->rxring) { + free_irq(data->irq_num, dev); return -ENOMEM; + } data->txring = dma_alloc_coherent(&data->pdev->dev, txring_size, &data->txdma, GFP_KERNEL); if (!data->txring) { + free_irq(data->irq_num, dev); dma_free_coherent(&data->pdev->dev, rxring_size, data->rxring, data->rxdma); return -ENOMEM; From 23569b5652ee8e8e55a12f7835f59af6f3cefc30 Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Wed, 9 Nov 2022 17:07:34 +0800 Subject: [PATCH 326/328] net: macvlan: fix memory leaks of macvlan_common_newlink kmemleak reports memory leaks in macvlan_common_newlink, as follows: ip link add link eth0 name .. type macvlan mode source macaddr add kmemleak reports: unreferenced object 0xffff8880109bb140 (size 64): comm "ip", pid 284, jiffies 4294986150 (age 430.108s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 b8 aa 5a 12 80 88 ff ff ..........Z..... 80 1b fa 0d 80 88 ff ff 1e ff ac af c7 c1 6b 6b ..............kk backtrace: [] kmem_cache_alloc_trace+0x1c7/0x300 [] macvlan_hash_add_source+0x45/0xc0 [] macvlan_changelink_sources+0xd7/0x170 [] macvlan_common_newlink+0x38c/0x5a0 [] macvlan_newlink+0xe/0x20 [] __rtnl_newlink+0x7af/0xa50 [] rtnl_newlink+0x48/0x70 ... In the scenario where the macvlan mode is configured as 'source', macvlan_changelink_sources() will be execured to reconfigure list of remote source mac addresses, at the same time, if register_netdevice() return an error, the resource generated by macvlan_changelink_sources() is not cleaned up. Using this patch, in the case of an error, it will execute macvlan_flush_sources() to ensure that the resource is cleaned up. Fixes: aa5fd0fb7748 ("driver: macvlan: Destroy new macvlan port if macvlan_common_newlink failed.") Signed-off-by: Chuang Wang Link: https://lore.kernel.org/r/20221109090735.690500-1-nashuiliang@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/macvlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c5cfe8555199..578897aaada0 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1533,8 +1533,10 @@ destroy_macvlan_port: /* the macvlan port may be freed by macvlan_uninit when fail to register. * so we destroy the macvlan port only when it's valid. */ - if (create && macvlan_port_get_rtnl(lowerdev)) + if (create && macvlan_port_get_rtnl(lowerdev)) { + macvlan_flush_sources(port, vlan); macvlan_port_destroy(port->dev); + } return err; } EXPORT_SYMBOL_GPL(macvlan_common_newlink); From 6ce3df596be25d95c2cddc9e1b673394948e3732 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 9 Nov 2022 15:19:07 -0800 Subject: [PATCH 327/328] MAINTAINERS: Move Vivien to CREDITS Last patch from Vivien was nearly 3 years ago and he has not reviewed or responded to DSA patches since then, move to CREDITS. Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221109231907.621678-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- CREDITS | 5 +++++ MAINTAINERS | 2 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index 1841184c834d..54672cbcd719 100644 --- a/CREDITS +++ b/CREDITS @@ -918,6 +918,11 @@ S: Ottawa, Ontario S: K1N 6Z9 S: CANADA +N: Vivien Didelot +E: vivien.didelot@gmail.com +D: DSA framework and MV88E6XXX driver +S: Montreal, Quebec, Canada + N: Jeff Dike E: jdike@karaya.com W: http://user-mode-linux.sourceforge.net diff --git a/MAINTAINERS b/MAINTAINERS index e1bc31a6624b..239407627be2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12226,7 +12226,6 @@ F: arch/mips/boot/dts/img/pistachio* MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER M: Andrew Lunn -M: Vivien Didelot L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/dsa/marvell.txt @@ -14324,7 +14323,6 @@ F: drivers/net/wireless/ NETWORKING [DSA] M: Andrew Lunn -M: Vivien Didelot M: Florian Fainelli M: Vladimir Oltean S: Maintained From de91b3197d15172407608b2c357aab7ac1451e2b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 9 Nov 2022 15:01:16 +0000 Subject: [PATCH 328/328] eth: sp7021: drop free_netdev() from spl2sw_init_netdev() It's not necessary to free netdev allocated with devm_alloc_etherdev() and using free_netdev() leads to double free. Fixes: fd3040b9394c ("net: ethernet: Add driver for Sunplus SP7021") Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20221109150116.2988194-1-weiyongjun@huaweicloud.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sunplus/spl2sw_driver.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/sunplus/spl2sw_driver.c b/drivers/net/ethernet/sunplus/spl2sw_driver.c index 9be585237277..c499a14314f1 100644 --- a/drivers/net/ethernet/sunplus/spl2sw_driver.c +++ b/drivers/net/ethernet/sunplus/spl2sw_driver.c @@ -287,7 +287,6 @@ static u32 spl2sw_init_netdev(struct platform_device *pdev, u8 *mac_addr, if (ret) { dev_err(&pdev->dev, "Failed to register net device \"%s\"!\n", ndev->name); - free_netdev(ndev); *r_ndev = NULL; return ret; }