From a61675294735570daca3779bd1dbb3715f7232bd Mon Sep 17 00:00:00 2001 From: Chen Aotian Date: Sun, 9 Apr 2023 10:20:48 +0800 Subject: [PATCH 001/934] ieee802154: hwsim: Fix possible memory leaks After replacing e->info, it is necessary to free the old einfo. Fixes: f25da51fdc38 ("ieee802154: hwsim: add replacement for fakelb") Reviewed-by: Miquel Raynal Reviewed-by: Alexander Aring Signed-off-by: Chen Aotian Link: https://lore.kernel.org/r/20230409022048.61223-1-chenaotian2@163.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mac802154_hwsim.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 8445c2189d11..31cba9aa7636 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -685,7 +685,7 @@ static int hwsim_del_edge_nl(struct sk_buff *msg, struct genl_info *info) static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info) { struct nlattr *edge_attrs[MAC802154_HWSIM_EDGE_ATTR_MAX + 1]; - struct hwsim_edge_info *einfo; + struct hwsim_edge_info *einfo, *einfo_old; struct hwsim_phy *phy_v0; struct hwsim_edge *e; u32 v0, v1; @@ -723,8 +723,10 @@ static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info) list_for_each_entry_rcu(e, &phy_v0->edges, list) { if (e->endpoint->idx == v1) { einfo->lqi = lqi; - rcu_assign_pointer(e->info, einfo); + einfo_old = rcu_replace_pointer(e->info, einfo, + lockdep_is_held(&hwsim_phys_lock)); rcu_read_unlock(); + kfree_rcu(einfo_old, rcu); mutex_unlock(&hwsim_phys_lock); return 0; } From e3a0877e7e66b465588f6262680437024edccb9f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 11 Apr 2023 11:01:21 +0200 Subject: [PATCH 002/934] MAINTAINERS: Update wpan tree The wpan maintainers group is switching from Stefan's tree to a group tree called 'wpan'. We will now maintain: * wpan/wpan.git master: Fixes targeting the 'net' tree * wpan/wpan-next.git master: Features targeting the 'net-next' tree * wpan/wpan-next.git staging: Same as the wpan-next master branch, but we will push there first, expecting robots to parse the tree and report mistakes we would have not catch. This branch can be rebased and force pushed, unlike the others. Signed-off-by: Miquel Raynal Acked-by: Alexander Aring [Fixed two small typos stefan@datenfreihafen.org] Link: https://lore.kernel.org/r/20230411090122.419761-1-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3bc692d65328..5087b97d8c77 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9890,8 +9890,8 @@ M: Miquel Raynal L: linux-wpan@vger.kernel.org S: Maintained W: https://linux-wpan.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan-next.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wpan/wpan.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wpan/wpan-next.git F: Documentation/networking/ieee802154.rst F: drivers/net/ieee802154/ F: include/linux/ieee802154.h From bd4e3d82f4ccb422672029b099e402e5b3acd5ee Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 11 Apr 2023 11:01:22 +0200 Subject: [PATCH 003/934] MAINTAINERS: Add wpan patchwork This patchwork instance is hosted on kernel.org and has been used for a long time already, it was just not mentioned in MAINTAINERS. Signed-off-by: Miquel Raynal Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20230411090122.419761-2-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5087b97d8c77..ac67099d0755 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9890,6 +9890,7 @@ M: Miquel Raynal L: linux-wpan@vger.kernel.org S: Maintained W: https://linux-wpan.org/ +Q: https://patchwork.kernel.org/project/linux-wpan/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wpan/wpan.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/wpan/wpan-next.git F: Documentation/networking/ieee802154.rst From 529de2f1ca3f0898c0d905b7d355a43dce1de7dc Mon Sep 17 00:00:00 2001 From: Maximilian Weigand Date: Mon, 1 May 2023 17:07:49 -0700 Subject: [PATCH 004/934] Input: cyttsp5 - fix array length The cmd array should be initialized with the proper command size and not with the actual command value that is sent to the touchscreen. Signed-off-by: Maximilian Weigand Reviewed-by: Alistair Francis Link: https://lore.kernel.org/r/20230501113010.891786-2-mweigand@mweigand.net Fixes: 5b0c03e24a06 ("Input: Add driver for Cypress Generation 5 touchscreen") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/cyttsp5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/cyttsp5.c b/drivers/input/touchscreen/cyttsp5.c index 30102cb80fac..3c9d07218f48 100644 --- a/drivers/input/touchscreen/cyttsp5.c +++ b/drivers/input/touchscreen/cyttsp5.c @@ -560,7 +560,7 @@ static int cyttsp5_hid_output_get_sysinfo(struct cyttsp5 *ts) static int cyttsp5_hid_output_bl_launch_app(struct cyttsp5 *ts) { int rc; - u8 cmd[HID_OUTPUT_BL_LAUNCH_APP]; + u8 cmd[HID_OUTPUT_BL_LAUNCH_APP_SIZE]; u16 crc; put_unaligned_le16(HID_OUTPUT_BL_LAUNCH_APP_SIZE, cmd); From 978134c4b192ed04ecf699be3e1b4d23b5d20457 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 5 May 2023 11:16:07 -0700 Subject: [PATCH 005/934] Input: fix open count when closing inhibited device Because the kernel increments device's open count in input_open_device() even if device is inhibited, the counter should always be decremented in input_close_device() to keep it balanced. Fixes: a181616487db ("Input: Add "inhibited" property") Reviewed-by: Peter Hutterer Link: https://lore.kernel.org/r/ZFFz0xAdPNSL3PT7@google.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/input.c b/drivers/input/input.c index 37e876d45eb9..641eb86f276e 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -703,7 +703,7 @@ void input_close_device(struct input_handle *handle) __input_release_device(handle); - if (!dev->inhibited && !--dev->users) { + if (!--dev->users && !dev->inhibited) { if (dev->poller) input_dev_poller_stop(dev->poller); if (dev->close) From 03633c4ef1fb5ee119296dfe0c411656a9b5e04f Mon Sep 17 00:00:00 2001 From: Lorenz Brun Date: Fri, 21 Apr 2023 23:38:41 +0200 Subject: [PATCH 006/934] arm64: dts: rockchip: fix USB regulator on ROCK64 Currently the ROCK64 device tree specifies two regulators, vcc_host_5v and vcc_host1_5v for USB VBUS on the device. Both of those are however specified with RK_PA2 as the GPIO enabling them, causing the following error when booting: rockchip-pinctrl pinctrl: pin gpio0-2 already requested by vcc-host-5v-regulator; cannot claim for vcc-host1-5v-regulator rockchip-pinctrl pinctrl: pin-2 (vcc-host1-5v-regulator) status -22 rockchip-pinctrl pinctrl: could not request pin 2 (gpio0-2) from group usb20-host-drv on device rockchip-pinctrl reg-fixed-voltage vcc-host1-5v-regulator: Error applying setting, reverse things back Looking at the schematic, there are in fact three USB regulators, vcc_host_5v, vcc_host1_5v and vcc_otg_v5. But the enable signal for all three is driven by Q2604 which is in turn driven by GPIO_A2/PA2. Since these three regulators are not controllable separately, I removed the second one which was causing the error and added labels for all rails to the single regulator. Signed-off-by: Lorenz Brun Tested-by: Diederik de Haas Link: https://lore.kernel.org/r/20230421213841.3079632-1-lorenz@brun.one Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328-rock64.dts | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index f69a38f42d2d..0a27fa5271f5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -37,7 +37,8 @@ vin-supply = <&vcc_io>; }; - vcc_host_5v: vcc-host-5v-regulator { + /* Common enable line for all of the rails mentioned in the labels */ + vcc_host_5v: vcc_host1_5v: vcc_otg_5v: vcc-host-5v-regulator { compatible = "regulator-fixed"; gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; @@ -48,17 +49,6 @@ vin-supply = <&vcc_sys>; }; - vcc_host1_5v: vcc_otg_5v: vcc-host1-5v-regulator { - compatible = "regulator-fixed"; - gpio = <&gpio0 RK_PA2 GPIO_ACTIVE_LOW>; - pinctrl-names = "default"; - pinctrl-0 = <&usb20_host_drv>; - regulator-name = "vcc_host1_5v"; - regulator-always-on; - regulator-boot-on; - vin-supply = <&vcc_sys>; - }; - vcc_sys: vcc-sys { compatible = "regulator-fixed"; regulator-name = "vcc_sys"; From 42dcd054a6493e1adf292c3e246d1a2a9258942e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 22 Apr 2023 00:31:48 +0200 Subject: [PATCH 007/934] arm64: dts: rockchip: add missing cache properties As all level 2 and level 3 caches are unified, add required cache-unified properties to fix warnings like: rk3588s-khadas-edge2.dtb: l3-cache: 'cache-unified' is a dependency of 'cache-size' Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230421223149.115185-1-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3308.dtsi | 1 + arch/arm64/boot/dts/rockchip/rk3328.dtsi | 1 + arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi index dd228a256a32..2ae4bb7d5e62 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi @@ -97,6 +97,7 @@ l2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 6d7a7bf72ac7..e729e7a22b23 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -103,6 +103,7 @@ l2: l2-cache0 { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 657c019d27fa..a3124bd2e092 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -229,6 +229,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -238,6 +239,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -247,6 +249,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -256,6 +259,7 @@ cache-line-size = <64>; cache-sets = <512>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -265,6 +269,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -274,6 +279,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -283,6 +289,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -292,6 +299,7 @@ cache-line-size = <64>; cache-sets = <1024>; cache-level = <2>; + cache-unified; next-level-cache = <&l3_cache>; }; @@ -301,6 +309,7 @@ cache-line-size = <64>; cache-sets = <4096>; cache-level = <3>; + cache-unified; }; }; From cf9ae4a0077496e8224d68fc88e3df13dd7e5f37 Mon Sep 17 00:00:00 2001 From: Nicolas Frattaroli Date: Fri, 21 Apr 2023 17:26:10 +0200 Subject: [PATCH 008/934] arm64: dts: rockchip: fix nEXTRST on SOQuartz In pre-production prototypes (of which I only know one person having one, Peter Geis), GPIO0 pin A5 was tied to the SDMMC power enable pin on the CM4 connector. On all production models, this is not the case; instead, this pin is used for the nEXTRST signal, and the SDMMC power enable pin is always pulled high. Since everyone currently using the SOQuartz device trees will want this change, it is made to the tree without splitting the trees into two separate ones of which users will then inevitably choose the wrong one. This fixes USB and PCIe on a wide variety of CM4IO-compatible boards which use the nEXTRST signal. Fixes: 5859b5a9c3ac ("arm64: dts: rockchip: add SoQuartz CM4IO dts") Signed-off-by: Nicolas Frattaroli Link: https://lore.kernel.org/r/20230421152610.21688-1-frattaroli.nicolas@gmail.com Signed-off-by: Heiko Stuebner --- .../boot/dts/rockchip/rk3566-soquartz-cm4.dts | 18 +++++++----- .../boot/dts/rockchip/rk3566-soquartz.dtsi | 29 +++++++++---------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts b/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts index 263ce40770dd..cddf6cd2fecb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz-cm4.dts @@ -28,6 +28,16 @@ regulator-max-microvolt = <5000000>; vin-supply = <&vcc12v_dcin>; }; + + vcc_sd_pwr: vcc-sd-pwr-regulator { + compatible = "regulator-fixed"; + regulator-name = "vcc_sd_pwr"; + regulator-always-on; + regulator-boot-on; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + vin-supply = <&vcc3v3_sys>; + }; }; /* phy for pcie */ @@ -130,13 +140,7 @@ }; &sdmmc0 { - vmmc-supply = <&sdmmc_pwr>; - status = "okay"; -}; - -&sdmmc_pwr { - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; + vmmc-supply = <&vcc_sd_pwr>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi index 102e448bc026..31aa2b8efe39 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi @@ -104,16 +104,6 @@ regulator-max-microvolt = <3300000>; vin-supply = <&vcc5v0_sys>; }; - - sdmmc_pwr: sdmmc-pwr-regulator { - compatible = "regulator-fixed"; - enable-active-high; - gpio = <&gpio0 RK_PA5 GPIO_ACTIVE_HIGH>; - pinctrl-names = "default"; - pinctrl-0 = <&sdmmc_pwr_h>; - regulator-name = "sdmmc_pwr"; - status = "disabled"; - }; }; &cpu0 { @@ -155,6 +145,19 @@ status = "disabled"; }; +&gpio0 { + nextrst-hog { + gpio-hog; + /* + * GPIO_ACTIVE_LOW + output-low here means that the pin is set + * to high, because output-low decides the value pre-inversion. + */ + gpios = ; + line-name = "nEXTRST"; + output-low; + }; +}; + &gpu { mali-supply = <&vdd_gpu>; status = "okay"; @@ -538,12 +541,6 @@ rockchip,pins = <2 RK_PC2 RK_FUNC_GPIO &pcfg_pull_none>; }; }; - - sdmmc-pwr { - sdmmc_pwr_h: sdmmc-pwr-h { - rockchip,pins = <0 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>; - }; - }; }; &pmu_io_domains { From 70a640c0efa7667453c3911b13335304ce46ad8b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 26 Apr 2023 15:35:45 +0200 Subject: [PATCH 009/934] regmap: REGMAP_KUNIT should not select REGMAP Enabling a (modular) test should not silently enable additional kernel functionality, as that may increase the attack vector of a product. Fix this by: 1. making REGMAP visible if CONFIG_KUNIT_ALL_TESTS is enabled, 2. making REGMAP_KUNIT depend on REGMAP instead of selecting it. After this, one can safely enable CONFIG_KUNIT_ALL_TESTS=m to build modules for all appropriate tests for ones system, without pulling in extra unwanted functionality, while still allowing a tester to manually enable REGMAP and its test suite on a system where REGMAP is not enabled by default. Fixes: 2238959b6ad27040 ("regmap: Add some basic kunit tests") Signed-off-by: Geert Uytterhoeven Date: Fri, 28 Apr 2023 00:16:06 +0300 Subject: [PATCH 010/934] staging: octeon: delete my name from TODO contact I gave up using MIPS OCTEON hardware after the drivers were deleted from staging in 2019. Afterwards, the driver seems to be added back but the TODO contact name was not updated accordingly. Delete my name, as I still get mails from people asking help with the driver and their systems. Fixes: 422d97b8b05e ("Revert "staging: octeon: delete driver"") Signed-off-by: Aaro Koskinen Link: https://lore.kernel.org/r/20230427211606.GD881984@darkstar.musicnaut.iki.fi Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/TODO | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/octeon/TODO b/drivers/staging/octeon/TODO index 67a0a1f6b922..044e48e3d65f 100644 --- a/drivers/staging/octeon/TODO +++ b/drivers/staging/octeon/TODO @@ -6,4 +6,3 @@ TODO: - make driver self-contained instead of being split between staging and arch/mips/cavium-octeon. -Contact: Aaro Koskinen From cb6aeeb69af06b09e687fbef4da36a81c36b4994 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Mon, 24 Apr 2023 03:10:16 -0700 Subject: [PATCH 011/934] x86/hyperv/vtl: Add noop for realmode pointers Assign the realmode pointers to noop, instead of NULL to fix kernel panic. Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1682331016-22561-1-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu --- arch/x86/hyperv/hv_vtl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 1ba5d3b99b16..85d38b9f3586 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -20,6 +20,8 @@ void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); + x86_platform.realmode_reserve = x86_init_noop; + x86_platform.realmode_init = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; From ec97e112985c2581ee61854a4b74f080f6cdfc2c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 4 May 2023 15:41:55 -0700 Subject: [PATCH 012/934] Drivers: hv: vmbus: Call hv_synic_free() if hv_synic_alloc() fails Commit 572086325ce9 ("Drivers: hv: vmbus: Cleanup synic memory free path") says "Any memory allocations that succeeded will be freed when the caller cleans up by calling hv_synic_free()", but if the get_zeroed_page() in hv_synic_alloc() fails, currently hv_synic_free() is not really called in vmbus_bus_init(), consequently there will be a memory leak, e.g. hv_context.hv_numa_map is not freed in the error path. Fix this by updating the goto labels. Cc: stable@kernel.org Signed-off-by: Dexuan Cui Fixes: 4df4cb9e99f8 ("x86/hyperv: Initialize clockevents earlier in CPU onlining") Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20230504224155.10484-1-decui@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 1c65a6dfb9fa..67f95a29aeca 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1372,7 +1372,7 @@ static int vmbus_bus_init(void) ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online", hv_synic_init, hv_synic_cleanup); if (ret < 0) - goto err_cpuhp; + goto err_alloc; hyperv_cpuhp_online = ret; ret = vmbus_connect(); @@ -1392,9 +1392,8 @@ static int vmbus_bus_init(void) err_connect: cpuhp_remove_state(hyperv_cpuhp_online); -err_cpuhp: - hv_synic_free(); err_alloc: + hv_synic_free(); if (vmbus_irq == -1) { hv_remove_vmbus_handler(); } else { From 20a99a291d564a559cc2fd013b4824a3bb3f1db7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 11 May 2023 11:57:04 -0700 Subject: [PATCH 013/934] Input: soc_button_array - add invalid acpi_index DMI quirk handling Some devices have a wrong entry in their button array which points to a GPIO which is required in another driver, so soc_button_array must not claim it. A specific example of this is the Lenovo Yoga Book X90F / X90L, where the PNP0C40 home button entry points to a GPIO which is not a home button and which is required by the lenovo-yogabook driver. Add a DMI quirk table which can specify an ACPI GPIO resource index which should be skipped; and add an entry for the Lenovo Yoga Book X90F / X90L to this new DMI quirk table. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230414072116.4497-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/soc_button_array.c | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index 09489380afda..e79f5497948b 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -108,6 +108,27 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = { {} /* Terminating entry */ }; +/* + * Some devices have a wrong entry which points to a GPIO which is + * required in another driver, so this driver must not claim it. + */ +static const struct dmi_system_id dmi_invalid_acpi_index[] = { + { + /* + * Lenovo Yoga Book X90F / X90L, the PNP0C40 home button entry + * points to a GPIO which is not a home button and which is + * required by the lenovo-yogabook driver. + */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"), + }, + .driver_data = (void *)1l, + }, + {} /* Terminating entry */ +}; + /* * Get the Nth GPIO number from the ACPI object. */ @@ -137,6 +158,8 @@ soc_button_device_create(struct platform_device *pdev, struct platform_device *pd; struct gpio_keys_button *gpio_keys; struct gpio_keys_platform_data *gpio_keys_pdata; + const struct dmi_system_id *dmi_id; + int invalid_acpi_index = -1; int error, gpio, irq; int n_buttons = 0; @@ -154,10 +177,17 @@ soc_button_device_create(struct platform_device *pdev, gpio_keys = (void *)(gpio_keys_pdata + 1); n_buttons = 0; + dmi_id = dmi_first_match(dmi_invalid_acpi_index); + if (dmi_id) + invalid_acpi_index = (long)dmi_id->driver_data; + for (info = button_info; info->name; info++) { if (info->autorepeat != autorepeat) continue; + if (info->acpi_index == invalid_acpi_index) + continue; + error = soc_button_lookup_gpio(&pdev->dev, info->acpi_index, &gpio, &irq); if (error || irq < 0) { /* From 7b63a88bb62ba2ddf5fcd956be85fe46624628b9 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 11 May 2023 12:08:37 -0700 Subject: [PATCH 014/934] Input: psmouse - fix OOB access in Elantech protocol The kernel only allocate 5 MT slots; check that transmitted slot ID falls within the acceptable range. Link: https://lore.kernel.org/r/ZFnEL91nrT789dbG@google.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index ece97f8c6a3e..2118b2075f43 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -674,10 +674,11 @@ static void process_packet_head_v4(struct psmouse *psmouse) struct input_dev *dev = psmouse->dev; struct elantech_data *etd = psmouse->private; unsigned char *packet = psmouse->packet; - int id = ((packet[3] & 0xe0) >> 5) - 1; + int id; int pres, traces; - if (id < 0) + id = ((packet[3] & 0xe0) >> 5) - 1; + if (id < 0 || id >= ETP_MAX_FINGERS) return; etd->mt[id].x = ((packet[1] & 0x0f) << 8) | packet[2]; @@ -707,7 +708,7 @@ static void process_packet_motion_v4(struct psmouse *psmouse) int id, sid; id = ((packet[0] & 0xe0) >> 5) - 1; - if (id < 0) + if (id < 0 || id >= ETP_MAX_FINGERS) return; sid = ((packet[3] & 0xe0) >> 5) - 1; @@ -728,7 +729,7 @@ static void process_packet_motion_v4(struct psmouse *psmouse) input_report_abs(dev, ABS_MT_POSITION_X, etd->mt[id].x); input_report_abs(dev, ABS_MT_POSITION_Y, etd->mt[id].y); - if (sid >= 0) { + if (sid >= 0 && sid < ETP_MAX_FINGERS) { etd->mt[sid].x += delta_x2 * weight; etd->mt[sid].y -= delta_y2 * weight; input_mt_slot(dev, sid); From 08c7f09356e45d093d1867c7a3c6ac6526e2f98b Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Sun, 7 May 2023 11:29:29 -0700 Subject: [PATCH 015/934] RDMA/bnxt_re: Fix the page_size used during the MR creation Driver populates the list of pages used for Memory region wrongly when page size is more than system page size. This is causing a failure when some of the applications that creates MR with page size as 2M. Since HW can support multiple page sizes, pass the correct page size while creating the MR. Also, driver need not adjust the number of pages when HW Queues are created with user memory. It should work with the number of dma blocks returned by ib_umem_num_dma_blocks. Fix this calculation also. Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation") Fixes: f6919d56388c ("RDMA/bnxt_re: Code refactor while populating user MRs") Link: https://lore.kernel.org/r/1683484169-9539-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Kalesh AP Signed-off-by: Kashyap Desai Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 12 ++---------- drivers/infiniband/hw/bnxt_re/qplib_sp.c | 7 +++---- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 126d4f26f75a..81b0c5e879f9 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -215,17 +215,9 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, return -EINVAL; hwq_attr->sginfo->npages = npages; } else { - unsigned long sginfo_num_pages = ib_umem_num_dma_blocks( - hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize); - + npages = ib_umem_num_dma_blocks(hwq_attr->sginfo->umem, + hwq_attr->sginfo->pgsize); hwq->is_user = true; - npages = sginfo_num_pages; - npages = (npages * PAGE_SIZE) / - BIT_ULL(hwq_attr->sginfo->pgshft); - if ((sginfo_num_pages * PAGE_SIZE) % - BIT_ULL(hwq_attr->sginfo->pgshft)) - if (!npages) - npages++; } if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 1714a1e23113..b967a17a44be 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -617,16 +617,15 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, /* Free the hwq if it already exist, must be a rereg */ if (mr->hwq.max_elements) bnxt_qplib_free_hwq(res, &mr->hwq); - /* Use system PAGE_SIZE */ hwq_attr.res = res; hwq_attr.depth = pages; - hwq_attr.stride = buf_pg_size; + hwq_attr.stride = sizeof(dma_addr_t); hwq_attr.type = HWQ_TYPE_MR; hwq_attr.sginfo = &sginfo; hwq_attr.sginfo->umem = umem; hwq_attr.sginfo->npages = pages; - hwq_attr.sginfo->pgsize = PAGE_SIZE; - hwq_attr.sginfo->pgshft = PAGE_SHIFT; + hwq_attr.sginfo->pgsize = buf_pg_size; + hwq_attr.sginfo->pgshft = ilog2(buf_pg_size); rc = bnxt_qplib_alloc_init_hwq(&mr->hwq, &hwq_attr); if (rc) { dev_err(&res->pdev->dev, From 78b6a9a3f445e121ca08195084206cb8faa6999f Mon Sep 17 00:00:00 2001 From: Haoyue Xu Date: Sat, 6 May 2023 15:06:04 +0800 Subject: [PATCH 016/934] MAINTAINERS: Update maintainers of HiSilicon RoCE Wenpeng has moved to other technical areas, and Junxian will take over his responsibilities in maintaining this module. Link: https://lore.kernel.org/r/20230506070604.2982542-1-xuhaoyue1@hisilicon.com Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7e0b87d5aa2e..c56043737848 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9360,7 +9360,7 @@ F: drivers/crypto/hisilicon/zip/ HISILICON ROCE DRIVER M: Haoyue Xu -M: Wenpeng Liang +M: Junxian Huang L: linux-rdma@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt From 5325593377f07de31f7e473a9677a28a04c891f3 Mon Sep 17 00:00:00 2001 From: Tianling Shen Date: Thu, 11 May 2023 00:18:50 +0800 Subject: [PATCH 017/934] arm64: dts: rockchip: fix button reset pin for nanopi r5c The reset pin was wrongly assigned due to a copy/paste error, fix it to match actual gpio pin. While at it, remove a blank line from nanopi r5s dts. Fixes: 05620031408a ("arm64: dts: rockchip: Add FriendlyARM NanoPi R5C") Signed-off-by: Tianling Shen Link: https://lore.kernel.org/r/20230510161850.4866-1-cnsztl@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts index f70ca9f0470a..c718b8dbb9c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5c.dts @@ -106,7 +106,7 @@ rockchip-key { reset_button_pin: reset-button-pin { - rockchip,pins = <4 RK_PA0 RK_FUNC_GPIO &pcfg_pull_up>; + rockchip,pins = <0 RK_PB7 RK_FUNC_GPIO &pcfg_pull_up>; }; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts index 2a1118f15c29..b6ad8328c7eb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dts @@ -134,4 +134,3 @@ }; }; }; - From 20f291b88ecf23f674ee2ed980a4d93b7f16a06f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Feb 2023 18:47:30 +0300 Subject: [PATCH 018/934] iio: adc: imx93: fix a signedness bug in imx93_adc_read_raw() The problem is these lines: ret = vref_uv = regulator_get_voltage(adc->vref); if (ret < 0) The "ret" variable is type long and "vref_uv" is u32 so that means the condition can never be true on a 64bit system. A negative error code from regulator_get_voltage() would be cast to a high positive u32 value and then remain a high positive value when cast to a long. The "ret" variable only ever stores ints so it should be declared as an int. We can delete the "vref_uv" variable and use "ret" directly. Fixes: 7d02296ac8b8 ("iio: adc: add imx93 adc support") Signed-off-by: Dan Carpenter Reviewed-by: Haibo Chen Link: https://lore.kernel.org/r/Y+utEvjfjQRQo2QB@kili Signed-off-by: Jonathan Cameron --- drivers/iio/adc/imx93_adc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/imx93_adc.c b/drivers/iio/adc/imx93_adc.c index a775d2e40567..dce9ec91e4a7 100644 --- a/drivers/iio/adc/imx93_adc.c +++ b/drivers/iio/adc/imx93_adc.c @@ -236,8 +236,7 @@ static int imx93_adc_read_raw(struct iio_dev *indio_dev, { struct imx93_adc *adc = iio_priv(indio_dev); struct device *dev = adc->dev; - long ret; - u32 vref_uv; + int ret; switch (mask) { case IIO_CHAN_INFO_RAW: @@ -253,10 +252,10 @@ static int imx93_adc_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: - ret = vref_uv = regulator_get_voltage(adc->vref); + ret = regulator_get_voltage(adc->vref); if (ret < 0) return ret; - *val = vref_uv / 1000; + *val = ret / 1000; *val2 = 12; return IIO_VAL_FRACTIONAL_LOG2; From 00ffdd6fa90298522d45ca0c348b23485584dcdc Mon Sep 17 00:00:00 2001 From: ChiaEn Wu Date: Mon, 10 Apr 2023 18:34:22 +0800 Subject: [PATCH 019/934] iio: adc: mt6370: Fix ibus and ibat scaling value of some specific vendor ID chips The scale value of ibus and ibat on the datasheet is incorrect due to the customer report after the experimentation with some specific vendor ID chips. Fixes: c1404d1b659f ("iio: adc: mt6370: Add MediaTek MT6370 support") Signed-off-by: ChiaEn Wu Reviewed-by: Alexandre Mergnat Link: https://lore.kernel.org/r/1681122862-1994-1-git-send-email-chiaen_wu@richtek.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mt6370-adc.c | 53 ++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/mt6370-adc.c b/drivers/iio/adc/mt6370-adc.c index bc62e5a9d50d..0bc112135bca 100644 --- a/drivers/iio/adc/mt6370-adc.c +++ b/drivers/iio/adc/mt6370-adc.c @@ -19,6 +19,7 @@ #include +#define MT6370_REG_DEV_INFO 0x100 #define MT6370_REG_CHG_CTRL3 0x113 #define MT6370_REG_CHG_CTRL7 0x117 #define MT6370_REG_CHG_ADC 0x121 @@ -27,6 +28,7 @@ #define MT6370_ADC_START_MASK BIT(0) #define MT6370_ADC_IN_SEL_MASK GENMASK(7, 4) #define MT6370_AICR_ICHG_MASK GENMASK(7, 2) +#define MT6370_VENID_MASK GENMASK(7, 4) #define MT6370_AICR_100_mA 0x0 #define MT6370_AICR_150_mA 0x1 @@ -47,6 +49,10 @@ #define ADC_CONV_TIME_MS 35 #define ADC_CONV_POLLING_TIME_US 1000 +#define MT6370_VID_RT5081 0x8 +#define MT6370_VID_RT5081A 0xA +#define MT6370_VID_MT6370 0xE + struct mt6370_adc_data { struct device *dev; struct regmap *regmap; @@ -55,6 +61,7 @@ struct mt6370_adc_data { * from being read at the same time. */ struct mutex adc_lock; + unsigned int vid; }; static int mt6370_adc_read_channel(struct mt6370_adc_data *priv, int chan, @@ -98,6 +105,30 @@ adc_unlock: return ret; } +static int mt6370_adc_get_ibus_scale(struct mt6370_adc_data *priv) +{ + switch (priv->vid) { + case MT6370_VID_RT5081: + case MT6370_VID_RT5081A: + case MT6370_VID_MT6370: + return 3350; + default: + return 3875; + } +} + +static int mt6370_adc_get_ibat_scale(struct mt6370_adc_data *priv) +{ + switch (priv->vid) { + case MT6370_VID_RT5081: + case MT6370_VID_RT5081A: + case MT6370_VID_MT6370: + return 2680; + default: + return 3870; + } +} + static int mt6370_adc_read_scale(struct mt6370_adc_data *priv, int chan, int *val1, int *val2) { @@ -123,7 +154,7 @@ static int mt6370_adc_read_scale(struct mt6370_adc_data *priv, case MT6370_AICR_250_mA: case MT6370_AICR_300_mA: case MT6370_AICR_350_mA: - *val1 = 3350; + *val1 = mt6370_adc_get_ibus_scale(priv); break; default: *val1 = 5000; @@ -150,7 +181,7 @@ static int mt6370_adc_read_scale(struct mt6370_adc_data *priv, case MT6370_ICHG_600_mA: case MT6370_ICHG_700_mA: case MT6370_ICHG_800_mA: - *val1 = 2680; + *val1 = mt6370_adc_get_ibat_scale(priv); break; default: *val1 = 5000; @@ -251,6 +282,20 @@ static const struct iio_chan_spec mt6370_adc_channels[] = { MT6370_ADC_CHAN(TEMP_JC, IIO_TEMP, 12, BIT(IIO_CHAN_INFO_OFFSET)), }; +static int mt6370_get_vendor_info(struct mt6370_adc_data *priv) +{ + unsigned int dev_info; + int ret; + + ret = regmap_read(priv->regmap, MT6370_REG_DEV_INFO, &dev_info); + if (ret) + return ret; + + priv->vid = FIELD_GET(MT6370_VENID_MASK, dev_info); + + return 0; +} + static int mt6370_adc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -272,6 +317,10 @@ static int mt6370_adc_probe(struct platform_device *pdev) priv->regmap = regmap; mutex_init(&priv->adc_lock); + ret = mt6370_get_vendor_info(priv); + if (ret) + return dev_err_probe(dev, ret, "Failed to get vid\n"); + ret = regmap_write(priv->regmap, MT6370_REG_CHG_ADC, 0); if (ret) return dev_err_probe(dev, ret, "Failed to reset ADC\n"); From 265c82ea8b172129cb6d4eff41af856c3aff6168 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 13 Apr 2023 18:37:52 -0700 Subject: [PATCH 020/934] iio: tmag5273: Fix runtime PM leak on measurement error The tmag5273 gets a runtime PM reference before reading a measurement and releases it when done. But if the measurement fails the tmag5273_read_raw() function exits before releasing the reference. Make sure that this error path also releases the runtime PM reference. Fixes: 866a1389174b ("iio: magnetometer: add ti tmag5273 driver") Signed-off-by: Lars-Peter Clausen Acked-by: Gerald Loacker Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20230414013752.498767-1-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/tmag5273.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/magnetometer/tmag5273.c b/drivers/iio/magnetometer/tmag5273.c index 28bb7efe8df8..e155a75b3cd2 100644 --- a/drivers/iio/magnetometer/tmag5273.c +++ b/drivers/iio/magnetometer/tmag5273.c @@ -296,12 +296,13 @@ static int tmag5273_read_raw(struct iio_dev *indio_dev, return ret; ret = tmag5273_get_measure(data, &t, &x, &y, &z, &angle, &magnitude); - if (ret) - return ret; pm_runtime_mark_last_busy(data->dev); pm_runtime_put_autosuspend(data->dev); + if (ret) + return ret; + switch (chan->address) { case TEMPERATURE: *val = t; From 28f73ded19d403697f87473c9b85a27eb8ed9cf2 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 14 Apr 2023 08:07:02 -0700 Subject: [PATCH 021/934] iio: ad4130: Make sure clock provider gets removed The ad4130 driver registers a clock provider, but never removes it. This leaves a stale clock provider behind that references freed clocks when the device is unbound. Register a managed action to remove the clock provider when the device is removed. Fixes: 62094060cf3a ("iio: adc: ad4130: add AD4130 driver") Signed-off-by: Lars-Peter Clausen Link: https://lore.kernel.org/r/20230414150702.518441-1-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad4130.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c index 38394341fd6e..5a5dd5e87ffc 100644 --- a/drivers/iio/adc/ad4130.c +++ b/drivers/iio/adc/ad4130.c @@ -1817,6 +1817,11 @@ static const struct clk_ops ad4130_int_clk_ops = { .unprepare = ad4130_int_clk_unprepare, }; +static void ad4130_clk_del_provider(void *of_node) +{ + of_clk_del_provider(of_node); +} + static int ad4130_setup_int_clk(struct ad4130_state *st) { struct device *dev = &st->spi->dev; @@ -1824,6 +1829,7 @@ static int ad4130_setup_int_clk(struct ad4130_state *st) struct clk_init_data init; const char *clk_name; struct clk *clk; + int ret; if (st->int_pin_sel == AD4130_INT_PIN_CLK || st->mclk_sel != AD4130_MCLK_76_8KHZ) @@ -1843,7 +1849,11 @@ static int ad4130_setup_int_clk(struct ad4130_state *st) if (IS_ERR(clk)) return PTR_ERR(clk); - return of_clk_add_provider(of_node, of_clk_src_simple_get, clk); + ret = of_clk_add_provider(of_node, of_clk_src_simple_get, clk); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, ad4130_clk_del_provider, of_node); } static int ad4130_setup(struct iio_dev *indio_dev) From 27b2ed5b6d53cd62fc61c3f259ae52f5cac23b66 Mon Sep 17 00:00:00 2001 From: Jiakai Luo Date: Sat, 22 Apr 2023 06:34:06 -0700 Subject: [PATCH 022/934] iio: adc: mxs-lradc: fix the order of two cleanup operations Smatch reports: drivers/iio/adc/mxs-lradc-adc.c:766 mxs_lradc_adc_probe() warn: missing unwind goto? the order of three init operation: 1.mxs_lradc_adc_trigger_init 2.iio_triggered_buffer_setup 3.mxs_lradc_adc_hw_init thus, the order of three cleanup operation should be: 1.mxs_lradc_adc_hw_stop 2.iio_triggered_buffer_cleanup 3.mxs_lradc_adc_trigger_remove we exchange the order of two cleanup operations, introducing the following differences: 1.if mxs_lradc_adc_trigger_init fails, returns directly; 2.if trigger_init succeeds but iio_triggered_buffer_setup fails, goto err_trig and remove the trigger. In addition, we also reorder the unwind that goes on in the remove() callback to match the new ordering. Fixes: 6dd112b9f85e ("iio: adc: mxs-lradc: Add support for ADC driver") Signed-off-by: Jiakai Luo Reviewed-by: Dongliang Mu Link: https://lore.kernel.org/r/20230422133407.72908-1-jkluo@hust.edu.cn Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mxs-lradc-adc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/mxs-lradc-adc.c b/drivers/iio/adc/mxs-lradc-adc.c index bca79a93cbe4..a50f39143d3e 100644 --- a/drivers/iio/adc/mxs-lradc-adc.c +++ b/drivers/iio/adc/mxs-lradc-adc.c @@ -757,13 +757,13 @@ static int mxs_lradc_adc_probe(struct platform_device *pdev) ret = mxs_lradc_adc_trigger_init(iio); if (ret) - goto err_trig; + return ret; ret = iio_triggered_buffer_setup(iio, &iio_pollfunc_store_time, &mxs_lradc_adc_trigger_handler, &mxs_lradc_adc_buffer_ops); if (ret) - return ret; + goto err_trig; adc->vref_mv = mxs_lradc_adc_vref_mv[lradc->soc]; @@ -801,9 +801,9 @@ static int mxs_lradc_adc_probe(struct platform_device *pdev) err_dev: mxs_lradc_adc_hw_stop(adc); - mxs_lradc_adc_trigger_remove(iio); -err_trig: iio_triggered_buffer_cleanup(iio); +err_trig: + mxs_lradc_adc_trigger_remove(iio); return ret; } @@ -814,8 +814,8 @@ static int mxs_lradc_adc_remove(struct platform_device *pdev) iio_device_unregister(iio); mxs_lradc_adc_hw_stop(adc); - mxs_lradc_adc_trigger_remove(iio); iio_triggered_buffer_cleanup(iio); + mxs_lradc_adc_trigger_remove(iio); return 0; } From 279c3a2a5eb2c1054fbe38e5c33be08584229047 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Apr 2023 13:41:56 +0300 Subject: [PATCH 023/934] iio: adc: palmas: fix off by one bugs Valid values for "adc_chan" are zero to (PALMAS_ADC_CH_MAX - 1). Smatch detects some buffer overflows caused by this: drivers/iio/adc/palmas_gpadc.c:721 palmas_gpadc_read_event_value() error: buffer overflow 'adc->thresholds' 16 <= 16 drivers/iio/adc/palmas_gpadc.c:758 palmas_gpadc_write_event_value() error: buffer overflow 'adc->thresholds' 16 <= 16 The effect of this bug in other functions is more complicated but obviously we should fix all of them. Fixes: a99544c6c883 ("iio: adc: palmas: add support for iio threshold events") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/14fee94a-7db7-4371-b7d6-e94d86b9561e@kili.mountain Signed-off-by: Jonathan Cameron --- drivers/iio/adc/palmas_gpadc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c index c1c439215aeb..7dfc9c927a23 100644 --- a/drivers/iio/adc/palmas_gpadc.c +++ b/drivers/iio/adc/palmas_gpadc.c @@ -547,7 +547,7 @@ static int palmas_gpadc_read_raw(struct iio_dev *indio_dev, int adc_chan = chan->channel; int ret = 0; - if (adc_chan > PALMAS_ADC_CH_MAX) + if (adc_chan >= PALMAS_ADC_CH_MAX) return -EINVAL; mutex_lock(&adc->lock); @@ -595,7 +595,7 @@ static int palmas_gpadc_read_event_config(struct iio_dev *indio_dev, int adc_chan = chan->channel; int ret = 0; - if (adc_chan > PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH) + if (adc_chan >= PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH) return -EINVAL; mutex_lock(&adc->lock); @@ -684,7 +684,7 @@ static int palmas_gpadc_write_event_config(struct iio_dev *indio_dev, int adc_chan = chan->channel; int ret; - if (adc_chan > PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH) + if (adc_chan >= PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH) return -EINVAL; mutex_lock(&adc->lock); @@ -710,7 +710,7 @@ static int palmas_gpadc_read_event_value(struct iio_dev *indio_dev, int adc_chan = chan->channel; int ret; - if (adc_chan > PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH) + if (adc_chan >= PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH) return -EINVAL; mutex_lock(&adc->lock); @@ -744,7 +744,7 @@ static int palmas_gpadc_write_event_value(struct iio_dev *indio_dev, int old; int ret; - if (adc_chan > PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH) + if (adc_chan >= PALMAS_ADC_CH_MAX || type != IIO_EV_TYPE_THRESH) return -EINVAL; mutex_lock(&adc->lock); From 672cde9ef80ffde9e76d38f7aa2b287c4a18de9a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 21 Apr 2023 08:46:11 +0300 Subject: [PATCH 024/934] iio: fix doc for iio_gts_find_sel_by_int_time The kerneldoc for iio_gts_find_sel_by_int_time() has an error. Documentation states that function is searching a selector for a HW-gain while it is searching a selector for an integration time. Fix the documentation by saying the function is looking for a selector for an integration time. Fixes: 38416c28e168 ("iio: light: Add gain-time-scale helpers") Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/ZEIjI4YUzqPZk/9X@fedora Signed-off-by: Jonathan Cameron --- include/linux/iio/iio-gts-helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iio/iio-gts-helper.h b/include/linux/iio/iio-gts-helper.h index dd64e544a3da..9cb6c80dea71 100644 --- a/include/linux/iio/iio-gts-helper.h +++ b/include/linux/iio/iio-gts-helper.h @@ -135,7 +135,7 @@ static inline int iio_gts_find_int_time_by_sel(struct iio_gts *gts, int sel) /** * iio_gts_find_sel_by_int_time - find selector matching integration time * @gts: Gain time scale descriptor - * @gain: HW-gain for which matching selector is searched for + * @time: Integration time for which matching selector is searched for * * Return: a selector matching given integration time or -EINVAL if * selector was not found. From b7b04f393ffa1530213f522115e7d77761eb5c4f Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 17 Apr 2023 12:19:57 +0300 Subject: [PATCH 025/934] iio: bu27034: Fix integration time The bu27034 uses micro seconds for integration time configuration. This is incorrect as the ABI mandates use of seconds. Change BU27034 driver to use seconds for integration time. Fixes: e52afbd61039 ("iio: light: ROHM BU27034 Ambient Light Sensor") Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/a05647669af22ba919c7c87dccb43975e3235a87.1681722914.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/light/rohm-bu27034.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/iio/light/rohm-bu27034.c b/drivers/iio/light/rohm-bu27034.c index e486dcf35eba..25c9b79574a5 100644 --- a/drivers/iio/light/rohm-bu27034.c +++ b/drivers/iio/light/rohm-bu27034.c @@ -1167,11 +1167,12 @@ static int bu27034_read_raw(struct iio_dev *idev, switch (mask) { case IIO_CHAN_INFO_INT_TIME: - *val = bu27034_get_int_time(data); - if (*val < 0) - return *val; + *val = 0; + *val2 = bu27034_get_int_time(data); + if (*val2 < 0) + return *val2; - return IIO_VAL_INT; + return IIO_VAL_INT_PLUS_MICRO; case IIO_CHAN_INFO_SCALE: return bu27034_get_scale(data, chan->channel, val, val2); @@ -1229,7 +1230,10 @@ static int bu27034_write_raw(struct iio_dev *idev, ret = bu27034_set_scale(data, chan->channel, val, val2); break; case IIO_CHAN_INFO_INT_TIME: - ret = bu27034_try_set_int_time(data, val); + if (!val) + ret = bu27034_try_set_int_time(data, val2); + else + ret = -EINVAL; break; default: ret = -EINVAL; From e65065132fa6d9e1fd3b7418eae5215e3b7bf6c7 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 17 Apr 2023 12:20:18 +0300 Subject: [PATCH 026/934] iio: gts-helpers: fix integration time units The IIO ABI mandates expressing integration times in seconds. The GTS helper errorneously uses micro seconds in integration_times_available. Fix this by converting the lists to IIO_VAL_INT_PLUS_MICRO. Fixes: 38416c28e168 ("iio: light: Add gain-time-scale helpers") Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/eeacd192c259e885850b5a2dd8b776bccfc44fa8.1681722914.git.mazziesaccount@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-gts-helper.c | 42 ++++++++++++++++++++------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/drivers/iio/industrialio-gts-helper.c b/drivers/iio/industrialio-gts-helper.c index 8bb68975b259..7653261d2dc2 100644 --- a/drivers/iio/industrialio-gts-helper.c +++ b/drivers/iio/industrialio-gts-helper.c @@ -337,6 +337,17 @@ free_gains: return ret; } +static void iio_gts_us_to_int_micro(int *time_us, int *int_micro_times, + int num_times) +{ + int i; + + for (i = 0; i < num_times; i++) { + int_micro_times[i * 2] = time_us[i] / 1000000; + int_micro_times[i * 2 + 1] = time_us[i] % 1000000; + } +} + /** * iio_gts_build_avail_time_table - build table of available integration times * @gts: Gain time scale descriptor @@ -351,7 +362,7 @@ free_gains: */ static int iio_gts_build_avail_time_table(struct iio_gts *gts) { - int *times, i, j, idx = 0; + int *times, i, j, idx = 0, *int_micro_times; if (!gts->num_itime) return 0; @@ -378,13 +389,24 @@ static int iio_gts_build_avail_time_table(struct iio_gts *gts) } } } - gts->avail_time_tables = times; - /* - * This is just to survive a unlikely corner-case where times in the - * given time table were not unique. Else we could just trust the - * gts->num_itime. - */ - gts->num_avail_time_tables = idx; + + /* create a list of times formatted as list of IIO_VAL_INT_PLUS_MICRO */ + int_micro_times = kcalloc(idx, sizeof(int) * 2, GFP_KERNEL); + if (int_micro_times) { + /* + * This is just to survive a unlikely corner-case where times in + * the given time table were not unique. Else we could just + * trust the gts->num_itime. + */ + gts->num_avail_time_tables = idx; + iio_gts_us_to_int_micro(times, int_micro_times, idx); + } + + gts->avail_time_tables = int_micro_times; + kfree(times); + + if (!int_micro_times) + return -ENOMEM; return 0; } @@ -683,8 +705,8 @@ int iio_gts_avail_times(struct iio_gts *gts, const int **vals, int *type, return -EINVAL; *vals = gts->avail_time_tables; - *type = IIO_VAL_INT; - *length = gts->num_avail_time_tables; + *type = IIO_VAL_INT_PLUS_MICRO; + *length = gts->num_avail_time_tables * 2; return IIO_AVAIL_LIST; } From 79b8ded9d9c595db9bd5b2f62f5f738b36de1e22 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 16 Apr 2023 23:24:09 +0200 Subject: [PATCH 027/934] iio: accel: st_accel: Fix invalid mount_matrix on devices without ACPI _ONT method When apply_acpi_orientation() fails, st_accel_common_probe() will fall back to iio_read_mount_matrix(), which checks for a mount-matrix device property and if that is not set falls back to the identity matrix. But when a sensor has no ACPI companion fwnode, or when the ACPI fwnode does not have a "_ONT" method apply_acpi_orientation() was returning 0, causing iio_read_mount_matrix() to never get called resulting in an invalid mount_matrix: [root@fedora ~]# cat /sys/bus/iio/devices/iio\:device0/mount_matrix (null), (null), (null); (null), (null), (null); (null), (null), (null) Fix this by making apply_acpi_orientation() always return an error when it did not set the mount_matrix. Fixes: 3d8ad94bb175 ("iio: accel: st_sensors: Support generic mounting matrix") Signed-off-by: Hans de Goede Reviewed-by: Linus Walleij Tested-by: Marius Hoch Link: https://lore.kernel.org/r/20230416212409.310936-1-hdegoede@redhat.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 5f7d81b44b1d..282e539157f8 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -1291,12 +1291,12 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev) adev = ACPI_COMPANION(indio_dev->dev.parent); if (!adev) - return 0; + return -ENXIO; /* Read _ONT data, which should be a package of 6 integers. */ status = acpi_evaluate_object(adev->handle, "_ONT", NULL, &buffer); if (status == AE_NOT_FOUND) { - return 0; + return -ENXIO; } else if (ACPI_FAILURE(status)) { dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n", status); From e55245d115bb9054cb72cdd5dda5660f4484873a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 30 Mar 2023 12:21:00 +0200 Subject: [PATCH 028/934] iio: adc: ad7192: Change "shorted" channels to differential The AD7192 provides a specific channel configuration where both negative and positive inputs are connected to AIN2. This was represented in the ad7192 driver as a IIO channel with .channel = 2 and .extended_name set to "shorted". The problem with this approach, is that the driver provided two IIO channels with the identifier .channel = 2; one "shorted" and the other not. This goes against the IIO ABI, as a channel identifier should be unique. Address this issue by changing "shorted" channels to being differential instead, with channel 2 vs. itself, as we're actually measuring AIN2 vs. itself. Note that the fix tag is for the commit that moved the driver out of staging. The bug existed before that, but backporting would become very complex further down and unlikely to happen. Fixes: b581f748cce0 ("staging: iio: adc: ad7192: move out of staging") Signed-off-by: Paul Cercueil Co-developed-by: Alisa Roman Signed-off-by: Alisa Roman Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20230330102100.17590-1-paul@crapouillou.net Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7192.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index 55a6ab591016..99bb604b78c8 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -897,10 +897,6 @@ static const struct iio_info ad7195_info = { __AD719x_CHANNEL(_si, _channel1, -1, _address, NULL, IIO_VOLTAGE, \ BIT(IIO_CHAN_INFO_SCALE), ad7192_calibsys_ext_info) -#define AD719x_SHORTED_CHANNEL(_si, _channel1, _address) \ - __AD719x_CHANNEL(_si, _channel1, -1, _address, "shorted", IIO_VOLTAGE, \ - BIT(IIO_CHAN_INFO_SCALE), ad7192_calibsys_ext_info) - #define AD719x_TEMP_CHANNEL(_si, _address) \ __AD719x_CHANNEL(_si, 0, -1, _address, NULL, IIO_TEMP, 0, NULL) @@ -908,7 +904,7 @@ static const struct iio_chan_spec ad7192_channels[] = { AD719x_DIFF_CHANNEL(0, 1, 2, AD7192_CH_AIN1P_AIN2M), AD719x_DIFF_CHANNEL(1, 3, 4, AD7192_CH_AIN3P_AIN4M), AD719x_TEMP_CHANNEL(2, AD7192_CH_TEMP), - AD719x_SHORTED_CHANNEL(3, 2, AD7192_CH_AIN2P_AIN2M), + AD719x_DIFF_CHANNEL(3, 2, 2, AD7192_CH_AIN2P_AIN2M), AD719x_CHANNEL(4, 1, AD7192_CH_AIN1), AD719x_CHANNEL(5, 2, AD7192_CH_AIN2), AD719x_CHANNEL(6, 3, AD7192_CH_AIN3), @@ -922,7 +918,7 @@ static const struct iio_chan_spec ad7193_channels[] = { AD719x_DIFF_CHANNEL(2, 5, 6, AD7193_CH_AIN5P_AIN6M), AD719x_DIFF_CHANNEL(3, 7, 8, AD7193_CH_AIN7P_AIN8M), AD719x_TEMP_CHANNEL(4, AD7193_CH_TEMP), - AD719x_SHORTED_CHANNEL(5, 2, AD7193_CH_AIN2P_AIN2M), + AD719x_DIFF_CHANNEL(5, 2, 2, AD7193_CH_AIN2P_AIN2M), AD719x_CHANNEL(6, 1, AD7193_CH_AIN1), AD719x_CHANNEL(7, 2, AD7193_CH_AIN2), AD719x_CHANNEL(8, 3, AD7193_CH_AIN3), From 9c0d6ccd7d6bbd275e390b55a3390b4274291d95 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Wed, 3 May 2023 18:20:28 +0200 Subject: [PATCH 029/934] iio: adc: stm32-adc: skip adc-diff-channels setup if none is present If no adc differential channels are defined driver will fail with EINVAL: stm32-adc: probe of 48003000.adc:adc@0 failed with error -22 Fix this by skipping the initialization if no channels are defined. This applies only to the legacy way of initializing adc channels. Fixes: d7705f35448a ("iio: adc: stm32-adc: convert to device properties") Signed-off-by: Sean Nyekjaer Link: https://lore.kernel.org/r/20230503162029.3654093-1-sean@geanix.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 1aadb2ad2cab..c105d82c3e45 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -2006,16 +2006,15 @@ static int stm32_adc_get_legacy_chan_count(struct iio_dev *indio_dev, struct stm * to get the *real* number of channels. */ ret = device_property_count_u32(dev, "st,adc-diff-channels"); - if (ret < 0) - return ret; - - ret /= (int)(sizeof(struct stm32_adc_diff_channel) / sizeof(u32)); - if (ret > adc_info->max_channels) { - dev_err(&indio_dev->dev, "Bad st,adc-diff-channels?\n"); - return -EINVAL; - } else if (ret > 0) { - adc->num_diff = ret; - num_channels += ret; + if (ret > 0) { + ret /= (int)(sizeof(struct stm32_adc_diff_channel) / sizeof(u32)); + if (ret > adc_info->max_channels) { + dev_err(&indio_dev->dev, "Bad st,adc-diff-channels?\n"); + return -EINVAL; + } else if (ret > 0) { + adc->num_diff = ret; + num_channels += ret; + } } /* Optional sample time is provided either for each, or all channels */ From 3e27ef0ced49f8ae7883c25fadf76a2086e99025 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Wed, 3 May 2023 18:20:29 +0200 Subject: [PATCH 030/934] iio: adc: stm32-adc: skip adc-channels setup if none is present If only adc differential channels are defined driver will fail with stm32-adc: probe of 48003000.adc:adc@0 failed with error -22 Fix this by skipping the initialization if no channels are defined. This applies only to the legacy way of initializing adc channels. Fixes: d7705f35448a ("iio: adc: stm32-adc: convert to device properties") Signed-off-by: Sean Nyekjaer Reviewed-by: Olivier Moysan Link: https://lore.kernel.org/r/20230503162029.3654093-2-sean@geanix.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc.c | 42 ++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index c105d82c3e45..bd7e2408bf28 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -2036,6 +2036,7 @@ static int stm32_adc_legacy_chan_init(struct iio_dev *indio_dev, struct stm32_adc_diff_channel diff[STM32_ADC_CH_MAX]; struct device *dev = &indio_dev->dev; u32 num_diff = adc->num_diff; + int num_se = nchans - num_diff; int size = num_diff * sizeof(*diff) / sizeof(u32); int scan_index = 0, ret, i, c; u32 smp = 0, smps[STM32_ADC_CH_MAX], chans[STM32_ADC_CH_MAX]; @@ -2062,29 +2063,32 @@ static int stm32_adc_legacy_chan_init(struct iio_dev *indio_dev, scan_index++; } } - - ret = device_property_read_u32_array(dev, "st,adc-channels", chans, - nchans); - if (ret) - return ret; - - for (c = 0; c < nchans; c++) { - if (chans[c] >= adc_info->max_channels) { - dev_err(&indio_dev->dev, "Invalid channel %d\n", - chans[c]); - return -EINVAL; + if (num_se > 0) { + ret = device_property_read_u32_array(dev, "st,adc-channels", chans, num_se); + if (ret) { + dev_err(&indio_dev->dev, "Failed to get st,adc-channels %d\n", ret); + return ret; } - /* Channel can't be configured both as single-ended & diff */ - for (i = 0; i < num_diff; i++) { - if (chans[c] == diff[i].vinp) { - dev_err(&indio_dev->dev, "channel %d misconfigured\n", chans[c]); + for (c = 0; c < num_se; c++) { + if (chans[c] >= adc_info->max_channels) { + dev_err(&indio_dev->dev, "Invalid channel %d\n", + chans[c]); return -EINVAL; } + + /* Channel can't be configured both as single-ended & diff */ + for (i = 0; i < num_diff; i++) { + if (chans[c] == diff[i].vinp) { + dev_err(&indio_dev->dev, "channel %d misconfigured\n", + chans[c]); + return -EINVAL; + } + } + stm32_adc_chan_init_one(indio_dev, &channels[scan_index], + chans[c], 0, scan_index, false); + scan_index++; } - stm32_adc_chan_init_one(indio_dev, &channels[scan_index], - chans[c], 0, scan_index, false); - scan_index++; } if (adc->nsmps > 0) { @@ -2305,7 +2309,7 @@ static int stm32_adc_chan_fw_init(struct iio_dev *indio_dev, bool timestamping) if (legacy) ret = stm32_adc_legacy_chan_init(indio_dev, adc, channels, - num_channels); + timestamping ? num_channels - 1 : num_channels); else ret = stm32_adc_generic_chan_init(indio_dev, adc, channels); if (ret < 0) From 72c1d11074d6160f19760d57fc78b2a7f52bcb1a Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 24 Apr 2023 11:23:12 +0200 Subject: [PATCH 031/934] dt-bindings: iio: imx8qxp-adc: add missing vref-supply Although this property is used right now for IIO_CHAN_INFO_SCALE, this ADC has two internal reference voltages, which the driver currently doesn't make use of. Fixes: db73419d8c06 ("dt-bindings: iio: adc: Add binding documentation for NXP IMX8QXP ADC") Signed-off-by: Alexander Stein Reviewed-by: Haibo Chen Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230424092312.61746-1-alexander.stein@ew.tq-group.com Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml b/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml index 63369ba388e4..0a192ca192c5 100644 --- a/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml @@ -39,6 +39,12 @@ properties: power-domains: maxItems: 1 + vref-supply: + description: | + External ADC reference voltage supply on VREFH pad. If VERID[MVI] is + set, there are additional, internal reference voltages selectable. + VREFH1 is always from VREFH pad. + "#io-channel-cells": const: 1 @@ -72,6 +78,7 @@ examples: assigned-clocks = <&clk IMX_SC_R_ADC_0>; assigned-clock-rates = <24000000>; power-domains = <&pd IMX_SC_R_ADC_0>; + vref-supply = <®_1v8>; #io-channel-cells = <1>; }; }; From a551c26e8e568fad42120843521529241b9bceec Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 1 May 2023 10:36:04 -0400 Subject: [PATCH 032/934] iio: light: vcnl4035: fixed chip ID check VCNL4035 register(0xE) ID_L and ID_M define as: ID_L: 0x80 ID_H: 7:6 (0:0) 5:4 (0:0) slave address = 0x60 (7-bit) (0:1) slave address = 0x51 (7-bit) (1:0) slave address = 0x40 (7-bit) (1:0) slave address = 0x41 (7-bit) 3:0 Version code default (0:0:0:0) So just check ID_L. Fixes: 55707294c4eb ("iio: light: Add support for vishay vcnl4035") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20230501143605.1615549-1-Frank.Li@nxp.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/vcnl4035.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iio/light/vcnl4035.c b/drivers/iio/light/vcnl4035.c index 14e29330e972..94f5d611e98c 100644 --- a/drivers/iio/light/vcnl4035.c +++ b/drivers/iio/light/vcnl4035.c @@ -8,6 +8,7 @@ * TODO: Proximity */ #include +#include #include #include #include @@ -42,6 +43,7 @@ #define VCNL4035_ALS_PERS_MASK GENMASK(3, 2) #define VCNL4035_INT_ALS_IF_H_MASK BIT(12) #define VCNL4035_INT_ALS_IF_L_MASK BIT(13) +#define VCNL4035_DEV_ID_MASK GENMASK(7, 0) /* Default values */ #define VCNL4035_MODE_ALS_ENABLE BIT(0) @@ -413,6 +415,7 @@ static int vcnl4035_init(struct vcnl4035_data *data) return ret; } + id = FIELD_GET(VCNL4035_DEV_ID_MASK, id); if (id != VCNL4035_DEV_ID_VAL) { dev_err(&data->client->dev, "Wrong id, got %x, expected %x\n", id, VCNL4035_DEV_ID_VAL); From 24febc99ca725dcf42d57168a2f4e8a75a5ade92 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 3 May 2023 11:58:17 +0200 Subject: [PATCH 033/934] iio: addac: ad74413: fix resistance input processing On success, ad74413r_get_single_adc_result() returns IIO_VAL_INT aka 1. So currently, the IIO_CHAN_INFO_PROCESSED case is effectively equivalent to the IIO_CHAN_INFO_RAW case, and we never call ad74413r_adc_to_resistance_result() to convert the adc measurement to ohms. Check ret for being negative rather than non-zero. Fixes: fea251b6a5dbd (iio: addac: add AD74413R driver) Signed-off-by: Rasmus Villemoes Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20230503095817.452551-1-linux@rasmusvillemoes.dk Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/addac/ad74413r.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index 07e9f6ae16a8..e3366cf5eb31 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -1007,7 +1007,7 @@ static int ad74413r_read_raw(struct iio_dev *indio_dev, ret = ad74413r_get_single_adc_result(indio_dev, chan->channel, val); - if (ret) + if (ret < 0) return ret; ad74413r_adc_to_resistance_result(*val, val); From a146eccb68be161ae9eab5f3f68bb0ed7c0fbaa8 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 8 May 2023 06:02:08 +0200 Subject: [PATCH 034/934] iio: dac: build ad5758 driver when AD5758 is selected Commit 28d1a7ac2a0d ("iio: dac: Add AD5758 support") adds the config AD5758 and the corresponding driver ad5758.c. In the Makefile, the ad5758 driver is however included when AD5755 is selected, not when AD5758 is selected. Probably, this was simply a mistake that happened by copy-and-paste and forgetting to adjust the actual line. Surprisingly, no one has ever noticed that this driver is actually only included when AD5755 is selected and that the config AD5758 has actually no effect on the build. Fixes: 28d1a7ac2a0d ("iio: dac: Add AD5758 support") Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20230508040208.12033-1-lukas.bulwahn@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/dac/Makefile b/drivers/iio/dac/Makefile index 6c74fea21736..addd97a78838 100644 --- a/drivers/iio/dac/Makefile +++ b/drivers/iio/dac/Makefile @@ -17,7 +17,7 @@ obj-$(CONFIG_AD5592R_BASE) += ad5592r-base.o obj-$(CONFIG_AD5592R) += ad5592r.o obj-$(CONFIG_AD5593R) += ad5593r.o obj-$(CONFIG_AD5755) += ad5755.o -obj-$(CONFIG_AD5755) += ad5758.o +obj-$(CONFIG_AD5758) += ad5758.o obj-$(CONFIG_AD5761) += ad5761.o obj-$(CONFIG_AD5764) += ad5764.o obj-$(CONFIG_AD5766) += ad5766.o From d049151adde4ec17392592d145e94f0086f8ee80 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Mon, 8 May 2023 14:01:25 +0300 Subject: [PATCH 035/934] iio: bu27034: Ensure reset is written The reset bit must be always written to the hardware no matter what value is in a cache or register. Ensure this by using regmap_write_bits() instead of the regmap_update_bits(). Furthermore, the SWRESET bit may be self-clearing, so mark the SYSTEM_CONTROL register volatile to guarantee we do also read the right state - should we ever need to read it. Finally, writing the SWRESET bit will restore the default register values. This can cause register cache to be outdated if there are any register values cached. Rebuild register cache after SWRESET and use regmap_update_bits() when performing the reset. Signed-off-by: Matti Vaittinen Fixes: e52afbd61039 ("iio: light: ROHM BU27034 Ambient Light Sensor") Link: https://lore.kernel.org/r/ZFjWhbfuN5XcKty+@fedora Signed-off-by: Jonathan Cameron --- drivers/iio/light/rohm-bu27034.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/iio/light/rohm-bu27034.c b/drivers/iio/light/rohm-bu27034.c index 25c9b79574a5..f85194fda6b0 100644 --- a/drivers/iio/light/rohm-bu27034.c +++ b/drivers/iio/light/rohm-bu27034.c @@ -231,6 +231,9 @@ struct bu27034_result { static const struct regmap_range bu27034_volatile_ranges[] = { { + .range_min = BU27034_REG_SYSTEM_CONTROL, + .range_max = BU27034_REG_SYSTEM_CONTROL, + }, { .range_min = BU27034_REG_MODE_CONTROL4, .range_max = BU27034_REG_MODE_CONTROL4, }, { @@ -1272,12 +1275,19 @@ static int bu27034_chip_init(struct bu27034_data *data) int ret, sel; /* Reset */ - ret = regmap_update_bits(data->regmap, BU27034_REG_SYSTEM_CONTROL, + ret = regmap_write_bits(data->regmap, BU27034_REG_SYSTEM_CONTROL, BU27034_MASK_SW_RESET, BU27034_MASK_SW_RESET); if (ret) return dev_err_probe(data->dev, ret, "Sensor reset failed\n"); msleep(1); + + ret = regmap_reinit_cache(data->regmap, &bu27034_regmap); + if (ret) { + dev_err(data->dev, "Failed to reinit reg cache\n"); + return ret; + } + /* * Read integration time here to ensure it is in regmap cache. We do * this to speed-up the int-time acquisition in the start of the buffer From 56cd3d1c5c5b073a1a444eafdcf97d4d866d351a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 12 May 2023 10:53:41 +0300 Subject: [PATCH 036/934] iio: accel: kx022a fix irq getting The fwnode_irq_get_byname() was returning 0 at device-tree mapping error. If this occurred, the KX022A driver did abort the probe but errorneously directly returned the return value from fwnode_irq_get_byname() from probe. In case of a device-tree mapping error this indicated success. The fwnode_irq_get_byname() has since been fixed to not return zero on error so the check for fwnode_irq_get_byname() can be relaxed to only treat negative values as errors. This will also do decent fix even when backported to branches where fwnode_irq_get_byname() can still return zero on error because KX022A probe should later fail at IRQ requesting and a prober error handling should follow. Relax the return value check for fwnode_irq_get_byname() to only treat negative values as errors. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202305110245.MFxC9bUj-lkp@intel.com/ Link: https://lore.kernel.org/r/202305110245.MFxC9bUj-lkp@intel.com/ Signed-off-by: Matti Vaittinen Fixes: 7c1d1677b322 ("iio: accel: Support Kionix/ROHM KX022A accelerometer") Link: https://lore.kernel.org/r/b45b4b638db109c6078d243252df3a7b0485f7d5.1683875389.git.mazziesaccount@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/kionix-kx022a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c index f98393d74666..b8636fa8eaeb 100644 --- a/drivers/iio/accel/kionix-kx022a.c +++ b/drivers/iio/accel/kionix-kx022a.c @@ -1048,7 +1048,7 @@ int kx022a_probe_internal(struct device *dev) data->ien_reg = KX022A_REG_INC4; } else { irq = fwnode_irq_get_byname(fwnode, "INT2"); - if (irq <= 0) + if (irq < 0) return dev_err_probe(dev, irq, "No suitable IRQ\n"); data->inc_reg = KX022A_REG_INC5; From 09d3bec7009186bdba77039df01e5834788b3f95 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 11 May 2023 02:43:30 +0200 Subject: [PATCH 037/934] iio: dac: mcp4725: Fix i2c_master_send() return value handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The i2c_master_send() returns number of sent bytes on success, or negative on error. The suspend/resume callbacks expect zero on success and non-zero on error. Adapt the return value of the i2c_master_send() to the expectation of the suspend and resume callbacks, including proper validation of the return value. Fixes: cf35ad61aca2 ("iio: add mcp4725 I2C DAC driver") Signed-off-by: Marek Vasut Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20230511004330.206942-1-marex@denx.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/mcp4725.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c index 46bf758760f8..3f5661a3718f 100644 --- a/drivers/iio/dac/mcp4725.c +++ b/drivers/iio/dac/mcp4725.c @@ -47,12 +47,18 @@ static int mcp4725_suspend(struct device *dev) struct mcp4725_data *data = iio_priv(i2c_get_clientdata( to_i2c_client(dev))); u8 outbuf[2]; + int ret; outbuf[0] = (data->powerdown_mode + 1) << 4; outbuf[1] = 0; data->powerdown = true; - return i2c_master_send(data->client, outbuf, 2); + ret = i2c_master_send(data->client, outbuf, 2); + if (ret < 0) + return ret; + else if (ret != 2) + return -EIO; + return 0; } static int mcp4725_resume(struct device *dev) @@ -60,13 +66,19 @@ static int mcp4725_resume(struct device *dev) struct mcp4725_data *data = iio_priv(i2c_get_clientdata( to_i2c_client(dev))); u8 outbuf[2]; + int ret; /* restore previous DAC value */ outbuf[0] = (data->dac_value >> 8) & 0xf; outbuf[1] = data->dac_value & 0xff; data->powerdown = false; - return i2c_master_send(data->client, outbuf, 2); + ret = i2c_master_send(data->client, outbuf, 2); + if (ret < 0) + return ret; + else if (ret != 2) + return -EIO; + return 0; } static DEFINE_SIMPLE_DEV_PM_OPS(mcp4725_pm_ops, mcp4725_suspend, mcp4725_resume); From 55720d242052e860b9fde445e302e0425722e7f1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 9 May 2023 14:34:22 +0200 Subject: [PATCH 038/934] dt-bindings: iio: adc: renesas,rcar-gyroadc: Fix adi,ad7476 compatible value The conversion to json-schema accidentally dropped the "ad" part prefix from the compatible value. Fixes: 8c41245872e2 ("dt-bindings:iio:adc:renesas,rcar-gyroadc: txt to yaml conversion.") Signed-off-by: Geert Uytterhoeven Reviewed-by: Marek Vasut Reviewed-by: Krzysztof Kozlowski Reviewed-by: Wolfram Sang Link: https://lore.kernel.org/r/6b328a3f52657c20759f3a5bb2fe033d47644ba8.1683635404.git.geert+renesas@glider.be Cc: Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml b/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml index 1c7aee5ed3e0..36dff3250ea7 100644 --- a/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/renesas,rcar-gyroadc.yaml @@ -90,7 +90,7 @@ patternProperties: of the MAX chips to the GyroADC, while MISO line of each Maxim ADC connects to a shared input pin of the GyroADC. enum: - - adi,7476 + - adi,ad7476 - fujitsu,mb88101a - maxim,max1162 - maxim,max11100 From 3530167c6fe8001de6c026a3058eaca4c8a5329f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 13 May 2023 13:17:47 +0200 Subject: [PATCH 039/934] soc: qcom: icc-bwmon: fix incorrect error code passed to dev_err_probe() Pass to dev_err_probe() PTR_ERR from actual dev_pm_opp_find_bw_floor() call which failed, instead of previous ret which at this point is 0. Failure of dev_pm_opp_find_bw_floor() would result in prematurely ending the probe with success. Fixes smatch warnings: drivers/soc/qcom/icc-bwmon.c:776 bwmon_probe() warn: passing zero to 'dev_err_probe' drivers/soc/qcom/icc-bwmon.c:781 bwmon_probe() warn: passing zero to 'dev_err_probe' Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/202305131657.76XeHDjF-lkp@intel.com/ Cc: Fixes: b9c2ae6cac40 ("soc: qcom: icc-bwmon: Add bandwidth monitoring driver") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230513111747.132532-1-krzysztof.kozlowski@linaro.org --- drivers/soc/qcom/icc-bwmon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/qcom/icc-bwmon.c b/drivers/soc/qcom/icc-bwmon.c index fd58c5b69897..f65bfeca7ed6 100644 --- a/drivers/soc/qcom/icc-bwmon.c +++ b/drivers/soc/qcom/icc-bwmon.c @@ -773,12 +773,12 @@ static int bwmon_probe(struct platform_device *pdev) bwmon->max_bw_kbps = UINT_MAX; opp = dev_pm_opp_find_bw_floor(dev, &bwmon->max_bw_kbps, 0); if (IS_ERR(opp)) - return dev_err_probe(dev, ret, "failed to find max peak bandwidth\n"); + return dev_err_probe(dev, PTR_ERR(opp), "failed to find max peak bandwidth\n"); bwmon->min_bw_kbps = 0; opp = dev_pm_opp_find_bw_ceil(dev, &bwmon->min_bw_kbps, 0); if (IS_ERR(opp)) - return dev_err_probe(dev, ret, "failed to find min peak bandwidth\n"); + return dev_err_probe(dev, PTR_ERR(opp), "failed to find min peak bandwidth\n"); bwmon->dev = dev; From 3395d36e6805786c26d13188735bc796b9d7a7c9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 13 May 2023 13:29:13 +0200 Subject: [PATCH 040/934] soc: qcom: rpmh-rsc: drop redundant unsigned >=0 comparision Unsigned int "minor" is always >= 0 as reported by Smatch: drivers/soc/qcom/rpmh-rsc.c:1076 rpmh_rsc_probe() warn: always true condition '(drv->ver.minor >= 0) => (0-u32max >= 0)' Fixes: 88704a0cd719 ("soc: qcom: rpmh-rsc: Support RSC v3 minor versions") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230513112913.176009-1-krzysztof.kozlowski@linaro.org --- drivers/soc/qcom/rpmh-rsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c index f93544f6d796..0dd4363ebac8 100644 --- a/drivers/soc/qcom/rpmh-rsc.c +++ b/drivers/soc/qcom/rpmh-rsc.c @@ -1073,7 +1073,7 @@ static int rpmh_rsc_probe(struct platform_device *pdev) drv->ver.minor = rsc_id & (MINOR_VER_MASK << MINOR_VER_SHIFT); drv->ver.minor >>= MINOR_VER_SHIFT; - if (drv->ver.major == 3 && drv->ver.minor >= 0) + if (drv->ver.major == 3) drv->regs = rpmh_rsc_reg_offset_ver_3_0; else drv->regs = rpmh_rsc_reg_offset_ver_2_7; From 2d80b1e649dd74f5226484e244e57990348a9f18 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 11 Apr 2023 19:09:38 +0200 Subject: [PATCH 041/934] arm64: dts: qcom: sc8280xp: Revert "arm64: dts: qcom: sc8280xp: remove superfluous "input-enable"" This reverts commit 87e1f7b1a041a37390bf4fa7913683fb4f8d00d8 because it removed input-enable from the LPASS TLMM, where it is still correct (both bindings and Linux driver parse it). Reported-by: Johan Hovold Closes: https://lore.kernel.org/lkml/ZDVq1b0TjXH5LTYx@hovoldconsulting.com/ Signed-off-by: Krzysztof Kozlowski Reviewed-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230411170938.1657387-1-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 8fa9fbfe5d00..9fb0c7bcbfdf 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -2726,6 +2726,7 @@ pins = "gpio7"; function = "dmic1_data"; drive-strength = <8>; + input-enable; }; }; @@ -2743,6 +2744,7 @@ function = "dmic1_data"; drive-strength = <2>; bias-pull-down; + input-enable; }; }; @@ -2758,6 +2760,7 @@ pins = "gpio9"; function = "dmic2_data"; drive-strength = <8>; + input-enable; }; }; @@ -2775,6 +2778,7 @@ function = "dmic2_data"; drive-strength = <2>; bias-pull-down; + input-enable; }; }; From ce7c014937c442be677963848c7db62eccd94eac Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 12 May 2023 08:04:25 -0700 Subject: [PATCH 042/934] arm64: dts: qcom: sc8280xp: Flush RSC sleep & wake votes The rpmh driver will cache sleep and wake votes until the cluster power-domain is about to enter idle, to avoid unnecessary writes. So associate the apps_rsc with the cluster pd, so that it can be notified about this event. Without this, only AMC votes are being commited. Signed-off-by: Bjorn Andersson Reviewed-by: Konrad Dybcio Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230512150425.3171122-1-quic_bjorande@quicinc.com --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 9fb0c7bcbfdf..c2e6345a4041 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -3986,6 +3986,7 @@ qcom,tcs-config = , , , ; label = "apps_rsc"; + power-domains = <&CLUSTER_PD>; apps_bcm_voter: bcm-voter { compatible = "qcom,bcm-voter"; From 436eeae0411acdfc54521ddea80ee76d4ae8a7ea Mon Sep 17 00:00:00 2001 From: Rijo Thomas Date: Tue, 9 May 2023 13:02:40 +0530 Subject: [PATCH 043/934] tee: amdtee: Add return_origin to 'struct tee_cmd_load_ta' After TEE has completed processing of TEE_CMD_ID_LOAD_TA, set proper value in 'return_origin' argument passed by open_session() call. To do so, add 'return_origin' field to the structure tee_cmd_load_ta. The Trusted OS shall update return_origin as part of TEE processing. This change to 'struct tee_cmd_load_ta' interface requires a similar update in AMD-TEE Trusted OS's TEE_CMD_ID_LOAD_TA interface. This patch has been verified on Phoenix Birman setup. On older APUs, return_origin value will be 0. Cc: stable@vger.kernel.org Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Tested-by: Sourabh Das Signed-off-by: Rijo Thomas Acked-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/amdtee/amdtee_if.h | 10 ++++++---- drivers/tee/amdtee/call.c | 28 ++++++++++++++++------------ 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/tee/amdtee/amdtee_if.h b/drivers/tee/amdtee/amdtee_if.h index ff48c3e47375..e2014e21530a 100644 --- a/drivers/tee/amdtee/amdtee_if.h +++ b/drivers/tee/amdtee/amdtee_if.h @@ -118,16 +118,18 @@ struct tee_cmd_unmap_shared_mem { /** * struct tee_cmd_load_ta - load Trusted Application (TA) binary into TEE - * @low_addr: [in] bits [31:0] of the physical address of the TA binary - * @hi_addr: [in] bits [63:32] of the physical address of the TA binary - * @size: [in] size of TA binary in bytes - * @ta_handle: [out] return handle of the loaded TA + * @low_addr: [in] bits [31:0] of the physical address of the TA binary + * @hi_addr: [in] bits [63:32] of the physical address of the TA binary + * @size: [in] size of TA binary in bytes + * @ta_handle: [out] return handle of the loaded TA + * @return_origin: [out] origin of return code after TEE processing */ struct tee_cmd_load_ta { u32 low_addr; u32 hi_addr; u32 size; u32 ta_handle; + u32 return_origin; }; /** diff --git a/drivers/tee/amdtee/call.c b/drivers/tee/amdtee/call.c index e8cd9aaa3467..e9b63dcb3194 100644 --- a/drivers/tee/amdtee/call.c +++ b/drivers/tee/amdtee/call.c @@ -423,19 +423,23 @@ int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg) if (ret) { arg->ret_origin = TEEC_ORIGIN_COMMS; arg->ret = TEEC_ERROR_COMMUNICATION; - } else if (arg->ret == TEEC_SUCCESS) { - ret = get_ta_refcount(load_cmd.ta_handle); - if (!ret) { - arg->ret_origin = TEEC_ORIGIN_COMMS; - arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + } else { + arg->ret_origin = load_cmd.return_origin; - /* Unload the TA on error */ - unload_cmd.ta_handle = load_cmd.ta_handle; - psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA, - (void *)&unload_cmd, - sizeof(unload_cmd), &ret); - } else { - set_session_id(load_cmd.ta_handle, 0, &arg->session); + if (arg->ret == TEEC_SUCCESS) { + ret = get_ta_refcount(load_cmd.ta_handle); + if (!ret) { + arg->ret_origin = TEEC_ORIGIN_COMMS; + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + + /* Unload the TA on error */ + unload_cmd.ta_handle = load_cmd.ta_handle; + psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA, + (void *)&unload_cmd, + sizeof(unload_cmd), &ret); + } else { + set_session_id(load_cmd.ta_handle, 0, &arg->session); + } } } mutex_unlock(&ta_refcount_mutex); From 5b10ff013e8a57f8845615ac2cc37edf7f6eef05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Fri, 12 May 2023 08:49:25 +0200 Subject: [PATCH 044/934] pinctrl: meson-axg: add missing GPIOA_18 gpio group MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this, the gpio cannot be explicitly mux'ed to its gpio function. Fixes: 83c566806a68a ("pinctrl: meson-axg: Add new pinctrl driver for Meson AXG SoC") Cc: stable@vger.kernel.org Signed-off-by: Martin Hundebøll Reviewed-by: Neil Armstrong Reviewed-by: Dmitry Rokosov Link: https://lore.kernel.org/r/20230512064925.133516-1-martin@geanix.com Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson-axg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/meson/pinctrl-meson-axg.c b/drivers/pinctrl/meson/pinctrl-meson-axg.c index 7bfecdfba177..d249a035c2b9 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-axg.c +++ b/drivers/pinctrl/meson/pinctrl-meson-axg.c @@ -400,6 +400,7 @@ static struct meson_pmx_group meson_axg_periphs_groups[] = { GPIO_GROUP(GPIOA_15), GPIO_GROUP(GPIOA_16), GPIO_GROUP(GPIOA_17), + GPIO_GROUP(GPIOA_18), GPIO_GROUP(GPIOA_19), GPIO_GROUP(GPIOA_20), From 17eabd6a044b57b2c1b5459eb9d11af3ea909e63 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 15 May 2023 15:10:57 -0500 Subject: [PATCH 045/934] RDMA/rxe: Fix double unlock in rxe_qp.c A recent patch can cause a double spin_unlock_bh() in rxe_qp_to_attr() at line 715 in rxe_qp.c. Move the 2nd unlock into the if statement. Fixes: f605f26ea196 ("RDMA/rxe: Protect QP state with qp->state_lock") Link: https://lore.kernel.org/r/20230515201056.1591140-1-rpearsonhpe@gmail.com Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/27773078-40ce-414f-8b97-781954da9f25@kili.mountain Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index c5451a4488ca..245dd36638c7 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -712,8 +712,9 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) if (qp->attr.sq_draining) { spin_unlock_bh(&qp->state_lock); cond_resched(); + } else { + spin_unlock_bh(&qp->state_lock); } - spin_unlock_bh(&qp->state_lock); return 0; } From b5f3fe27c54b1fe77d4dd834ccd48a6d694f872e Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 10 May 2023 11:50:56 +0800 Subject: [PATCH 046/934] RDMA/rxe: Convert spin_{lock_bh,unlock_bh} to spin_{lock_irqsave,unlock_irqrestore} We need to call spin_lock_irqsave()/spin_unlock_irqrestore() for state_lock in rxe, otherwsie the callchain: ib_post_send_mad -> spin_lock_irqsave -> ib_post_send -> rxe_post_send -> spin_lock_bh -> spin_unlock_bh -> spin_unlock_irqrestore Causes below traces during run block nvmeof-mp/001 test due to mismatched spinlock nesting: WARNING: CPU: 0 PID: 94794 at kernel/softirq.c:376 __local_bh_enable_ip+0xc2/0x140 [ ... ] CPU: 0 PID: 94794 Comm: kworker/u4:1 Tainted: G E 6.4.0-rc1 #9 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014 Workqueue: rdma_cm cma_work_handler [rdma_cm] RIP: 0010:__local_bh_enable_ip+0xc2/0x140 Code: 48 85 c0 74 72 5b 41 5c 5d 31 c0 89 c2 89 c1 89 c6 89 c7 41 89 c0 e9 bd 0e 11 01 65 8b 05 f2 65 72 48 85 c0 0f 85 76 ff ff ff <0f> 0b e9 6f ff ff ff e8 d2 39 1c 00 eb 80 4c 89 e7 e8 68 ad 0a 00 RSP: 0018:ffffb7cf818539f0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000201 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000201 RDI: ffffffffc0f25f79 RBP: ffffb7cf81853a00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffc0f25f79 R13: ffff8db1f0fa6000 R14: ffff8db2c63ff000 R15: 00000000000000e8 FS: 0000000000000000(0000) GS:ffff8db33bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000559758db0f20 CR3: 0000000105124000 CR4: 00000000003506f0 Call Trace: _raw_spin_unlock_bh+0x31/0x40 rxe_post_send+0x59/0x8b0 [rdma_rxe] ib_send_mad+0x26b/0x470 [ib_core] ib_post_send_mad+0x150/0xb40 [ib_core] ? cm_form_tid+0x5b/0x90 [ib_cm] ib_send_cm_req+0x7c8/0xb70 [ib_cm] rdma_connect_locked+0x433/0x940 [rdma_cm] nvme_rdma_cm_handler+0x5d7/0x9c0 [nvme_rdma] cma_cm_event_handler+0x4f/0x170 [rdma_cm] cma_work_handler+0x6a/0xe0 [rdma_cm] process_one_work+0x2a9/0x580 worker_thread+0x52/0x3f0 ? __pfx_worker_thread+0x10/0x10 kthread+0x109/0x140 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 raw_local_irq_restore() called with IRQs enabled WARNING: CPU: 0 PID: 94794 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x37/0x60 [ ... ] CPU: 0 PID: 94794 Comm: kworker/u4:1 Tainted: G W E 6.4.0-rc1 #9 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014 Workqueue: rdma_cm cma_work_handler [rdma_cm] RIP: 0010:warn_bogus_irq_restore+0x37/0x60 Code: fb 01 77 36 83 e3 01 74 0e 48 8b 5d f8 c9 31 f6 89 f7 e9 ac ea 01 00 48 c7 c7 e0 52 33 b9 c6 05 bb 1c 69 01 01 e8 39 24 f0 fe <0f> 0b 48 8b 5d f8 c9 31 f6 89 f7 e9 89 ea 01 00 0f b6 f3 48 c7 c7 RSP: 0018:ffffb7cf81853a58 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffb7cf81853a60 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8db2cfb1a9e8 R13: ffff8db2cfb1a9d8 R14: ffff8db2c63ff000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8db33bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000559758db0f20 CR3: 0000000105124000 CR4: 00000000003506f0 Call Trace: _raw_spin_unlock_irqrestore+0x91/0xa0 ib_send_mad+0x1e3/0x470 [ib_core] ib_post_send_mad+0x150/0xb40 [ib_core] ? cm_form_tid+0x5b/0x90 [ib_cm] ib_send_cm_req+0x7c8/0xb70 [ib_cm] rdma_connect_locked+0x433/0x940 [rdma_cm] nvme_rdma_cm_handler+0x5d7/0x9c0 [nvme_rdma] cma_cm_event_handler+0x4f/0x170 [rdma_cm] cma_work_handler+0x6a/0xe0 [rdma_cm] process_one_work+0x2a9/0x580 worker_thread+0x52/0x3f0 ? __pfx_worker_thread+0x10/0x10 kthread+0x109/0x140 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 Fixes: f605f26ea196 ("RDMA/rxe: Protect QP state with qp->state_lock") Link: https://lore.kernel.org/r/20230510035056.881196-1-guoqing.jiang@linux.dev Signed-off-by: Guoqing Jiang Reviewed-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 26 +++++++++++-------- drivers/infiniband/sw/rxe/rxe_net.c | 7 +++--- drivers/infiniband/sw/rxe/rxe_qp.c | 36 +++++++++++++++++---------- drivers/infiniband/sw/rxe/rxe_recv.c | 9 ++++--- drivers/infiniband/sw/rxe/rxe_req.c | 30 ++++++++++++---------- drivers/infiniband/sw/rxe/rxe_resp.c | 14 ++++++----- drivers/infiniband/sw/rxe/rxe_verbs.c | 25 ++++++++++--------- 7 files changed, 86 insertions(+), 61 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index db18ace74d2b..f46c5a5fd0ae 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -115,15 +115,16 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) void retransmit_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, retrans_timer); + unsigned long flags; rxe_dbg_qp(qp, "retransmit timer fired\n"); - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (qp->valid) { qp->comp.timeout = 1; rxe_sched_task(&qp->comp.task); } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); } void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) @@ -481,11 +482,13 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) static void comp_check_sq_drain_done(struct rxe_qp *qp) { - spin_lock_bh(&qp->state_lock); + unsigned long flags; + + spin_lock_irqsave(&qp->state_lock, flags); if (unlikely(qp_state(qp) == IB_QPS_SQD)) { if (qp->attr.sq_draining && qp->comp.psn == qp->req.psn) { qp->attr.sq_draining = 0; - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); if (qp->ibqp.event_handler) { struct ib_event ev; @@ -499,7 +502,7 @@ static void comp_check_sq_drain_done(struct rxe_qp *qp) return; } } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); } static inline enum comp_state complete_ack(struct rxe_qp *qp, @@ -625,13 +628,15 @@ static void free_pkt(struct rxe_pkt_info *pkt) */ static void reset_retry_timer(struct rxe_qp *qp) { + unsigned long flags; + if (qp_type(qp) == IB_QPT_RC && qp->qp_timeout_jiffies) { - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (qp_state(qp) >= IB_QPS_RTS && psn_compare(qp->req.psn, qp->comp.psn) > 0) mod_timer(&qp->retrans_timer, jiffies + qp->qp_timeout_jiffies); - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); } } @@ -643,18 +648,19 @@ int rxe_completer(struct rxe_qp *qp) struct rxe_pkt_info *pkt = NULL; enum comp_state state; int ret; + unsigned long flags; - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (!qp->valid || qp_state(qp) == IB_QPS_ERR || qp_state(qp) == IB_QPS_RESET) { bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); drain_resp_pkts(qp); flush_send_queue(qp, notify); - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); goto exit; } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); if (qp->comp.timeout) { qp->comp.timeout_retry = 1; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 2bc7361152ea..a38fab19bed1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -412,15 +412,16 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, int err; int is_request = pkt->mask & RXE_REQ_MASK; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + unsigned long flags; - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if ((is_request && (qp_state(qp) < IB_QPS_RTS)) || (!is_request && (qp_state(qp) < IB_QPS_RTR))) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n"); goto drop; } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); rxe_icrc_generate(skb, pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 245dd36638c7..61a2eb77d999 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -300,6 +300,7 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct rxe_cq *rcq = to_rcq(init->recv_cq); struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; + unsigned long flags; rxe_get(pd); rxe_get(rcq); @@ -325,10 +326,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, if (err) goto err2; - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); qp->attr.qp_state = IB_QPS_RESET; qp->valid = 1; - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); return 0; @@ -492,24 +493,28 @@ static void rxe_qp_reset(struct rxe_qp *qp) /* move the qp to the error state */ void rxe_qp_error(struct rxe_qp *qp) { - spin_lock_bh(&qp->state_lock); + unsigned long flags; + + spin_lock_irqsave(&qp->state_lock, flags); qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ rxe_sched_task(&qp->resp.task); rxe_sched_task(&qp->comp.task); rxe_sched_task(&qp->req.task); - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); } static void rxe_qp_sqd(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) { - spin_lock_bh(&qp->state_lock); + unsigned long flags; + + spin_lock_irqsave(&qp->state_lock, flags); qp->attr.sq_draining = 1; rxe_sched_task(&qp->comp.task); rxe_sched_task(&qp->req.task); - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); } /* caller should hold qp->state_lock */ @@ -555,14 +560,16 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.cur_qp_state = attr->qp_state; if (mask & IB_QP_STATE) { - spin_lock_bh(&qp->state_lock); + unsigned long flags; + + spin_lock_irqsave(&qp->state_lock, flags); err = __qp_chk_state(qp, attr, mask); if (!err) { qp->attr.qp_state = attr->qp_state; rxe_dbg_qp(qp, "state -> %s\n", qps2str[attr->qp_state]); } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); if (err) return err; @@ -688,6 +695,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, /* called by the query qp verb */ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) { + unsigned long flags; + *attr = qp->attr; attr->rq_psn = qp->resp.psn; @@ -708,12 +717,12 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) /* Applications that get this state typically spin on it. * Yield the processor */ - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (qp->attr.sq_draining) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); cond_resched(); } else { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); } return 0; @@ -737,10 +746,11 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp) static void rxe_qp_do_cleanup(struct work_struct *work) { struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); + unsigned long flags; - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); qp->valid = 0; - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); qp->qp_timeout_jiffies = 0; if (qp_type(qp) == IB_QPT_RC) { diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 2f953cc74256..5861e4244049 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -14,6 +14,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct rxe_qp *qp) { unsigned int pkt_type; + unsigned long flags; if (unlikely(!qp->valid)) return -EINVAL; @@ -38,19 +39,19 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, return -EINVAL; } - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (pkt->mask & RXE_REQ_MASK) { if (unlikely(qp_state(qp) < IB_QPS_RTR)) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); return -EINVAL; } } else { if (unlikely(qp_state(qp) < IB_QPS_RTS)) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); return -EINVAL; } } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 65134a9aefe7..5fe7cbae3031 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -99,17 +99,18 @@ static void req_retry(struct rxe_qp *qp) void rnr_nak_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); + unsigned long flags; rxe_dbg_qp(qp, "nak timer fired\n"); - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (qp->valid) { /* request a send queue retry */ qp->req.need_retry = 1; qp->req.wait_for_rnr_timer = 0; rxe_sched_task(&qp->req.task); } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); } static void req_check_sq_drain_done(struct rxe_qp *qp) @@ -118,8 +119,9 @@ static void req_check_sq_drain_done(struct rxe_qp *qp) unsigned int index; unsigned int cons; struct rxe_send_wqe *wqe; + unsigned long flags; - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (qp_state(qp) == IB_QPS_SQD) { q = qp->sq.queue; index = qp->req.wqe_index; @@ -140,7 +142,7 @@ static void req_check_sq_drain_done(struct rxe_qp *qp) break; qp->attr.sq_draining = 0; - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); if (qp->ibqp.event_handler) { struct ib_event ev; @@ -154,7 +156,7 @@ static void req_check_sq_drain_done(struct rxe_qp *qp) return; } while (0); } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); } static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp) @@ -173,6 +175,7 @@ static struct rxe_send_wqe *__req_next_wqe(struct rxe_qp *qp) static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) { struct rxe_send_wqe *wqe; + unsigned long flags; req_check_sq_drain_done(qp); @@ -180,13 +183,13 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) if (wqe == NULL) return NULL; - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (unlikely((qp_state(qp) == IB_QPS_SQD) && (wqe->state != wqe_state_processing))) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); return NULL; } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); wqe->mask = wr_opcode_mask(wqe->wr.opcode, qp); return wqe; @@ -676,16 +679,17 @@ int rxe_requester(struct rxe_qp *qp) struct rxe_queue *q = qp->sq.queue; struct rxe_ah *ah; struct rxe_av *av; + unsigned long flags; - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (unlikely(!qp->valid)) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); goto exit; } if (unlikely(qp_state(qp) == IB_QPS_ERR)) { wqe = __req_next_wqe(qp); - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); if (wqe) goto err; else @@ -700,10 +704,10 @@ int rxe_requester(struct rxe_qp *qp) qp->req.wait_psn = 0; qp->req.need_retry = 0; qp->req.wait_for_rnr_timer = 0; - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); goto exit; } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); /* we come here if the retransmit timer has fired * or if the rnr timer has fired. If the retransmit diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 68f6cd188d8e..1da044f6b7d4 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1047,6 +1047,7 @@ static enum resp_states do_complete(struct rxe_qp *qp, struct ib_uverbs_wc *uwc = &cqe.uibwc; struct rxe_recv_wqe *wqe = qp->resp.wqe; struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + unsigned long flags; if (!wqe) goto finish; @@ -1137,12 +1138,12 @@ static enum resp_states do_complete(struct rxe_qp *qp, return RESPST_ERR_CQ_OVERFLOW; finish: - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (unlikely(qp_state(qp) == IB_QPS_ERR)) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); return RESPST_CHK_RESOURCE; } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); if (unlikely(!pkt)) return RESPST_DONE; @@ -1468,18 +1469,19 @@ int rxe_responder(struct rxe_qp *qp) enum resp_states state; struct rxe_pkt_info *pkt = NULL; int ret; + unsigned long flags; - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (!qp->valid || qp_state(qp) == IB_QPS_ERR || qp_state(qp) == IB_QPS_RESET) { bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); drain_req_pkts(qp); flush_recv_queue(qp, notify); - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); goto exit; } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index dea605b7f683..4d8f6b8051ff 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -904,10 +904,10 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, if (!err) rxe_sched_task(&qp->req.task); - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (qp_state(qp) == IB_QPS_ERR) rxe_sched_task(&qp->comp.task); - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); return err; } @@ -917,22 +917,23 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, { struct rxe_qp *qp = to_rqp(ibqp); int err; + unsigned long flags; - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); /* caller has already called destroy_qp */ if (WARN_ON_ONCE(!qp->valid)) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); rxe_err_qp(qp, "qp has been destroyed"); return -EINVAL; } if (unlikely(qp_state(qp) < IB_QPS_RTS)) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); *bad_wr = wr; rxe_err_qp(qp, "qp not ready to send"); return -EINVAL; } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); if (qp->is_user) { /* Utilize process context to do protocol processing */ @@ -1008,22 +1009,22 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, struct rxe_rq *rq = &qp->rq; unsigned long flags; - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); /* caller has already called destroy_qp */ if (WARN_ON_ONCE(!qp->valid)) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); rxe_err_qp(qp, "qp has been destroyed"); return -EINVAL; } /* see C10-97.2.1 */ if (unlikely((qp_state(qp) < IB_QPS_INIT))) { - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); *bad_wr = wr; rxe_dbg_qp(qp, "qp not ready to post recv"); return -EINVAL; } - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); if (unlikely(qp->srq)) { *bad_wr = wr; @@ -1044,10 +1045,10 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, spin_unlock_irqrestore(&rq->producer_lock, flags); - spin_lock_bh(&qp->state_lock); + spin_lock_irqsave(&qp->state_lock, flags); if (qp_state(qp) == IB_QPS_ERR) rxe_sched_task(&qp->resp.task); - spin_unlock_bh(&qp->state_lock); + spin_unlock_irqrestore(&qp->state_lock, flags); return err; } From 866422cdddcdf59d8c68e9472d49ba1be29b5fcf Mon Sep 17 00:00:00 2001 From: Yonatan Nachum Date: Thu, 11 May 2023 11:51:03 +0000 Subject: [PATCH 047/934] RDMA/efa: Fix unsupported page sizes in device Device uses 4KB size blocks for user pages indirect list while the driver creates those blocks with the size of PAGE_SIZE of the kernel. On kernels with PAGE_SIZE different than 4KB (ARM RHEL), this leads to a failure on register MR with indirect list because of the miss communication between driver and device. Fixes: 40909f664d27 ("RDMA/efa: Add EFA verbs implementation") Link: https://lore.kernel.org/r/20230511115103.13876-1-ynachum@amazon.com Reviewed-by: Firas Jahjah Reviewed-by: Michael Margolin Signed-off-by: Yonatan Nachum Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/efa/efa_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 8eca6c14d0cf..2a195c4b0f17 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1403,7 +1403,7 @@ static int pbl_continuous_initialize(struct efa_dev *dev, */ static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl) { - u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE); + u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE); struct scatterlist *sgl; int sg_dma_cnt, err; From 58caa2a51ad4fd21763696cc6c4defc9fc1b4b4f Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 12 May 2023 17:22:43 +0800 Subject: [PATCH 048/934] RDMA/hns: Fix timeout attr in query qp for HIP08 On HIP08, the queried timeout attr is different from the timeout attr configured by the user. It is found by rdma-core testcase test_rdmacm_async_traffic: ====================================================================== FAIL: test_rdmacm_async_traffic (tests.test_rdmacm.CMTestCase) ---------------------------------------------------------------------- Traceback (most recent call last): File "./tests/test_rdmacm.py", line 33, in test_rdmacm_async_traffic self.two_nodes_rdmacm_traffic(CMAsyncConnection, self.rdmacm_traffic, File "./tests/base.py", line 382, in two_nodes_rdmacm_traffic raise(res) AssertionError Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC") Link: https://lore.kernel.org/r/20230512092245.344442-2-huangjunxian6@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 17 ++++++++++++++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 ++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 84f1167de1d9..3a1c90406ed9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5012,7 +5012,6 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout) { #define QP_ACK_TIMEOUT_MAX_HIP08 20 -#define QP_ACK_TIMEOUT_OFFSET 10 #define QP_ACK_TIMEOUT_MAX 31 if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { @@ -5021,7 +5020,7 @@ static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout) "local ACK timeout shall be 0 to 20.\n"); return false; } - *timeout += QP_ACK_TIMEOUT_OFFSET; + *timeout += HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08; } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) { if (*timeout > QP_ACK_TIMEOUT_MAX) { ibdev_warn(&hr_dev->ib_dev, @@ -5307,6 +5306,18 @@ out: return ret; } +static u8 get_qp_timeout_attr(struct hns_roce_dev *hr_dev, + struct hns_roce_v2_qp_context *context) +{ + u8 timeout; + + timeout = (u8)hr_reg_read(context, QPC_AT); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + timeout -= HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08; + + return timeout; +} + static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) @@ -5384,7 +5395,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->max_dest_rd_atomic = 1 << hr_reg_read(&context, QPC_RR_MAX); qp_attr->min_rnr_timer = (u8)hr_reg_read(&context, QPC_MIN_RNR_TIME); - qp_attr->timeout = (u8)hr_reg_read(&context, QPC_AT); + qp_attr->timeout = get_qp_timeout_attr(hr_dev, &context); qp_attr->retry_cnt = hr_reg_read(&context, QPC_RETRY_NUM_INIT); qp_attr->rnr_retry = hr_reg_read(&context, QPC_RNR_NUM_INIT); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 1b44d2434ab4..7033eae2407c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -44,6 +44,8 @@ #define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000 #define HNS_ROCE_V2_RSV_XRCD_NUM 0 +#define HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08 10 + #define HNS_ROCE_V3_SCCC_SZ 64 #define HNS_ROCE_V3_GMV_ENTRY_SZ 32 From 7f3969b14f356dd65fa95b3528eb05c32e68bc06 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Fri, 12 May 2023 17:22:44 +0800 Subject: [PATCH 049/934] RDMA/hns: Fix base address table allocation For hns, the specification of an entry like resource (E.g. WQE/CQE/EQE) depends on BT page size, buf page size and hopnum. For user mode, the buf page size depends on UMEM. Therefore, the actual specification is controlled by BT page size and hopnum. The current BT page size and hopnum are obtained from firmware. This makes the driver inflexible and introduces unnecessary constraints. Resource allocation failures occur in many scenarios. This patch will calculate whether the BT page size set by firmware is sufficient before allocating BT, and increase the BT page size if it is insufficient. Fixes: 1133401412a9 ("RDMA/hns: Optimize base address table config flow for qp buffer") Link: https://lore.kernel.org/r/20230512092245.344442-3-huangjunxian6@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_mr.c | 43 +++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 37a5cf62f88b..14376490ac22 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -33,6 +33,7 @@ #include #include +#include #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" @@ -909,6 +910,44 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, return page_cnt; } +static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum) +{ + return int_pow(ba_per_bt, hopnum - 1); +} + +static unsigned int cal_best_bt_pg_sz(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr, + unsigned int pg_shift) +{ + unsigned long cap = hr_dev->caps.page_size_cap; + struct hns_roce_buf_region *re; + unsigned int pgs_per_l1ba; + unsigned int ba_per_bt; + unsigned int ba_num; + int i; + + for_each_set_bit_from(pg_shift, &cap, sizeof(cap) * BITS_PER_BYTE) { + if (!(BIT(pg_shift) & cap)) + continue; + + ba_per_bt = BIT(pg_shift) / BA_BYTE_LEN; + ba_num = 0; + for (i = 0; i < mtr->hem_cfg.region_count; i++) { + re = &mtr->hem_cfg.region[i]; + if (re->hopnum == 0) + continue; + + pgs_per_l1ba = cal_pages_per_l1ba(ba_per_bt, re->hopnum); + ba_num += DIV_ROUND_UP(re->count, pgs_per_l1ba); + } + + if (ba_num <= ba_per_bt) + return pg_shift; + } + + return 0; +} + static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, unsigned int ba_page_shift) { @@ -917,6 +956,10 @@ static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, hns_roce_hem_list_init(&mtr->hem_list); if (!cfg->is_direct) { + ba_page_shift = cal_best_bt_pg_sz(hr_dev, mtr, ba_page_shift); + if (!ba_page_shift) + return -ERANGE; + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, cfg->region, cfg->region_count, ba_page_shift); From 56518a603fd2bf74762d176ac980572db84a3e14 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Fri, 12 May 2023 17:22:45 +0800 Subject: [PATCH 050/934] RDMA/hns: Modify the value of long message loopback slice Long message loopback slice is used for achieving traffic balance between QPs. It prevents the problem that QPs with large traffic occupying the hardware pipeline for a long time and QPs with small traffic cannot be scheduled. Currently, its maximum value is set to 16K, which means only after a QP sends 16K will the second QP be scheduled. This value is too large, which will lead to unbalanced traffic scheduling, and thus it needs to be modified. The setting range of the long message loopback slice is modified to be from 1024 (the lower limit supported by hardware) to mtu. Actual testing shows that this value can significantly reduce error in hardware traffic scheduling. This solution is compatible with both HIP08 and HIP09. The modified lp_pktn_ini has a maximum value of 2 (when mtu is 256), so the range checking code for lp_pktn_ini is no longer necessary and needs to be deleted. Fixes: 0e60778efb07 ("RDMA/hns: Modify the value of MAX_LP_MSG_LEN to meet hardware compatibility") Link: https://lore.kernel.org/r/20230512092245.344442-4-huangjunxian6@hisilicon.com Signed-off-by: Yangyang Li Signed-off-by: Junxian Huang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3a1c90406ed9..d4c6b9bc0a4e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4583,11 +4583,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, mtu = ib_mtu_enum_to_int(ib_mtu); if (WARN_ON(mtu <= 0)) return -EINVAL; -#define MAX_LP_MSG_LEN 16384 - /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 16KB */ - lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); - if (WARN_ON(lp_pktn_ini >= 0xF)) - return -EINVAL; +#define MIN_LP_MSG_LEN 1024 + /* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */ + lp_pktn_ini = ilog2(max(mtu, MIN_LP_MSG_LEN) / mtu); if (attr_mask & IB_QP_PATH_MTU) { hr_reg_write(context, QPC_MTU, ib_mtu); From 3981514180c987a79ea98f0ae06a7cbf58a9ac0f Mon Sep 17 00:00:00 2001 From: Jim Wylder Date: Wed, 17 May 2023 10:20:11 -0500 Subject: [PATCH 051/934] regmap: Account for register length when chunking Currently, when regmap_raw_write() splits the data, it uses the max_raw_write value defined for the bus. For any bus that includes the target register address in the max_raw_write value, the chunked transmission will always exceed the maximum transmission length. To avoid this problem, subtract the length of the register and the padding from the maximum transmission. Signed-off-by: Jim Wylder max_raw_write - map->format.reg_bytes - + map->format.pad_bytes; int ret, i; if (!val_count) @@ -2089,8 +2091,8 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, if (map->use_single_write) chunk_regs = 1; - else if (map->max_raw_write && val_len > map->max_raw_write) - chunk_regs = map->max_raw_write / val_bytes; + else if (map->max_raw_write && val_len > max_data) + chunk_regs = max_data / val_bytes; chunk_count = val_count / chunk_regs; chunk_bytes = chunk_regs * val_bytes; From ca8fc6814844d8787e7fec61b2544a871ea8b675 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 20 Feb 2023 10:54:00 +0100 Subject: [PATCH 052/934] arm64: dts: qcom: sc7280-idp: drop incorrect dai-cells from WCD938x SDW The WCD938x audio codec Soundwire interface part is not a DAI and does not allow sound-dai-cells: sc7280-idp.dtb: codec@0,4: '#sound-dai-cells' does not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Douglas Anderson Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230220095401.64196-1-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sc7280-idp.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi b/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi index c6dc200c00ce..21027042cf13 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi @@ -480,7 +480,6 @@ wcd_rx: codec@0,4 { compatible = "sdw20217010d00"; reg = <0 4>; - #sound-dai-cells = <1>; qcom,rx-port-mapping = <1 2 3 4 5>; }; }; @@ -491,7 +490,6 @@ wcd_tx: codec@0,3 { compatible = "sdw20217010d00"; reg = <0 3>; - #sound-dai-cells = <1>; qcom,tx-port-mapping = <1 2 3 4>; }; }; From 16bd455d0897d1b8b7a9aee2ed51d75b14a34563 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 20 Feb 2023 10:54:01 +0100 Subject: [PATCH 053/934] arm64: dts: qcom: sc7280-qcard: drop incorrect dai-cells from WCD938x SDW The WCD938x audio codec Soundwire interface part is not a DAI and does not allow sound-dai-cells: sc7280-herobrine-crd.dtb: codec@0,4: '#sound-dai-cells' does not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Krzysztof Kozlowski Reviewed-by: Douglas Anderson Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230220095401.64196-2-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi b/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi index 88b3586e389f..9137db066d9e 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi @@ -414,7 +414,6 @@ wcd_rx: codec@0,4 { compatible = "sdw20217010d00"; reg = <0 4>; - #sound-dai-cells = <1>; qcom,rx-port-mapping = <1 2 3 4 5>; }; }; @@ -423,7 +422,6 @@ wcd_tx: codec@0,3 { compatible = "sdw20217010d00"; reg = <0 3>; - #sound-dai-cells = <1>; qcom,tx-port-mapping = <1 2 3 4>; }; }; From 0c7433dd81b5e771ef222f340fef03501131761b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 8 Apr 2023 15:08:08 +0200 Subject: [PATCH 054/934] arm64: dts: qcom: sm8250-xiaomi-elish-boe: fix panel compatible The bindings expect "novatek,nt36523" fallback in the panel compatible: sm8250-xiaomi-elish-boe.dtb: panel@0: compatible: ['xiaomi,elish-boe-nt36523'] is too shor Fixes: 51c4c2bd6f31 ("arm64: dts: qcom: sm8250-xiaomi-elish-boe: Add mdss and dsi panel") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230408130809.52319-1-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts index 8b2ae39950ff..de6101ddebe7 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts +++ b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-boe.dts @@ -13,6 +13,6 @@ }; &display_panel { - compatible = "xiaomi,elish-boe-nt36523"; + compatible = "xiaomi,elish-boe-nt36523", "novatek,nt36523"; status = "okay"; }; From fae7d907a75eaad4bfb17e7990e357938c5b1351 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 8 Apr 2023 15:08:09 +0200 Subject: [PATCH 055/934] arm64: dts: qcom: sm8250-xiaomi-elish-csot: fix panel compatible The bindings expect "novatek,nt36523" fallback in the panel compatible: sm8250-xiaomi-elish-csot.dtb: panel@0: compatible: ['xiaomi,elish-csot-nt36523'] is too short Fixes: 8a786036c7b6 ("arm64: dts: qcom: sm8250-xiaomi-elish-csot: Add Xiaomi Mi Pad 5 Pro CSOT variant") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230408130809.52319-2-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts index a4d5341495cf..4cffe9c703df 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts +++ b/arch/arm64/boot/dts/qcom/sm8250-xiaomi-elish-csot.dts @@ -13,6 +13,6 @@ }; &display_panel { - compatible = "xiaomi,elish-csot-nt36523"; + compatible = "xiaomi,elish-csot-nt36523", "novatek,nt36523"; status = "okay"; }; From 18f341061262a7a63bc8b5c9ef94a342ac2673a2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 10 Apr 2023 20:10:03 +0200 Subject: [PATCH 056/934] ARM: dts: qcom: apq8026: remove superfluous "input-enable" Pin configuration property "input-enable" was used with the intention to disable the output, but this is done by default by Linux drivers. Since patch ("dt-bindings: pinctrl: qcom: tlmm should use output-disable, not input-enable") the property is not accepted anymore. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230410181005.25853-1-krzysztof.kozlowski@linaro.org --- arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts | 2 -- arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts | 1 - arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts | 3 --- 3 files changed, 6 deletions(-) diff --git a/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts b/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts index 7a80e1c9f126..aa0e0e8d2a97 100644 --- a/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts +++ b/arch/arm/boot/dts/qcom-apq8026-asus-sparrow.dts @@ -268,7 +268,6 @@ function = "gpio"; drive-strength = <8>; bias-disable; - input-enable; }; wlan_hostwake_default_state: wlan-hostwake-default-state { @@ -276,7 +275,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; wlan_regulator_default_state: wlan-regulator-default-state { diff --git a/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts b/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts index d64096028ab1..5593a3a60d6c 100644 --- a/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts +++ b/arch/arm/boot/dts/qcom-apq8026-huawei-sturgeon.dts @@ -352,7 +352,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; wlan_regulator_default_state: wlan-regulator-default-state { diff --git a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts index b82381229adf..b887e5361ec3 100644 --- a/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts +++ b/arch/arm/boot/dts/qcom-apq8026-lg-lenok.dts @@ -307,7 +307,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; touch_pins: touch-state { @@ -317,7 +316,6 @@ drive-strength = <8>; bias-pull-down; - input-enable; }; reset-pins { @@ -335,7 +333,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; wlan_regulator_default_state: wlan-regulator-default-state { From da50416fb1fa7502a52ad38c0e9ff196d9f56ac9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 10 Apr 2023 20:10:04 +0200 Subject: [PATCH 057/934] ARM: dts: qcom: mdm9615: remove superfluous "input-enable" Pin configuration property "input-enable" was used with the intention to disable the output, but this is done by default by Linux drivers. Since patch ("dt-bindings: pinctrl: qcom: tlmm should use output-disable, not input-enable") the property is not accepted anymore. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230410181005.25853-2-krzysztof.kozlowski@linaro.org --- arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts b/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts index a8304769b509..b269fdca1460 100644 --- a/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts +++ b/arch/arm/boot/dts/qcom-mdm9615-wp8548-mangoh-green.dts @@ -49,7 +49,6 @@ gpioext1-pins { pins = "gpio2"; function = "gpio"; - input-enable; bias-disable; }; }; From 033553c04b02b450261f60d5f1b0e5948cd197bb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 10 Apr 2023 20:10:05 +0200 Subject: [PATCH 058/934] ARM: dts: qcom: msm8974: remove superfluous "input-enable" Pin configuration property "input-enable" was used with the intention to disable the output, but this is done by default by Linux drivers. Since patch ("dt-bindings: pinctrl: qcom: tlmm should use output-disable, not input-enable") the property is not accepted anymore. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230410181005.25853-3-krzysztof.kozlowski@linaro.org --- arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts | 2 -- arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi | 1 - arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts | 1 - arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts | 4 ---- .../boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts | 1 - 5 files changed, 9 deletions(-) diff --git a/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts b/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts index ab35f2d644c0..861695cecf84 100644 --- a/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts +++ b/arch/arm/boot/dts/qcom-msm8974-lge-nexus5-hammerhead.dts @@ -592,7 +592,6 @@ pins = "gpio73"; function = "gpio"; bias-disable; - input-enable; }; touch_pin: touch-state { @@ -602,7 +601,6 @@ drive-strength = <2>; bias-disable; - input-enable; }; reset-pins { diff --git a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi index d3bec03b126c..68a2f9094e53 100644 --- a/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974-sony-xperia-rhine.dtsi @@ -433,7 +433,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; sdc1_on: sdc1-on-state { diff --git a/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts b/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts index 8d2a054d8fee..8230d0e1d95d 100644 --- a/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts +++ b/arch/arm/boot/dts/qcom-msm8974pro-oneplus-bacon.dts @@ -461,7 +461,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; reset-pins { diff --git a/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts b/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts index b9698ffb66ca..eb505d6d7f31 100644 --- a/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts +++ b/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts @@ -704,7 +704,6 @@ pins = "gpio75"; function = "gpio"; drive-strength = <16>; - input-enable; }; devwake-pins { @@ -760,14 +759,12 @@ i2c_touchkey_pins: i2c-touchkey-state { pins = "gpio95", "gpio96"; function = "gpio"; - input-enable; bias-pull-up; }; i2c_led_gpioex_pins: i2c-led-gpioex-state { pins = "gpio120", "gpio121"; function = "gpio"; - input-enable; bias-pull-down; }; @@ -781,7 +778,6 @@ wifi_pin: wifi-state { pins = "gpio92"; function = "gpio"; - input-enable; bias-pull-down; }; diff --git a/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts b/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts index 04bc58d87abf..0f650ed31005 100644 --- a/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts +++ b/arch/arm/boot/dts/qcom-msm8974pro-sony-xperia-shinano-castor.dts @@ -631,7 +631,6 @@ function = "gpio"; drive-strength = <2>; bias-disable; - input-enable; }; bt_host_wake_pin: bt-host-wake-state { From f34fbb71ce9eb0936c0b56fe8b3866d76618c433 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 16 Apr 2023 12:11:31 +0200 Subject: [PATCH 059/934] arm64: dts: qcom: fix indentation Correct indentation to use only tabs. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230416101134.95686-1-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 8 ++-- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 4 +- arch/arm64/boot/dts/qcom/sdm670.dtsi | 2 +- arch/arm64/boot/dts/qcom/sdm845.dtsi | 4 +- arch/arm64/boot/dts/qcom/sm6375.dtsi | 34 ++++++++--------- arch/arm64/boot/dts/qcom/sm8150.dtsi | 4 +- arch/arm64/boot/dts/qcom/sm8350.dtsi | 52 +++++++++++++------------- arch/arm64/boot/dts/qcom/sm8450.dtsi | 52 +++++++++++++------------- 8 files changed, 80 insertions(+), 80 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 2b35cb3f5292..2c5780008c84 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -53,8 +53,8 @@ #cooling-cells = <2>; next-level-cache = <&L2_0>; L2_0: l2-cache { - compatible = "cache"; - cache-level = <2>; + compatible = "cache"; + cache-level = <2>; }; }; @@ -83,8 +83,8 @@ #cooling-cells = <2>; next-level-cache = <&L2_1>; L2_1: l2-cache { - compatible = "cache"; - cache-level = <2>; + compatible = "cache"; + cache-level = <2>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index c2e6345a4041..2fd92b93d356 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -60,8 +60,8 @@ cache-level = <2>; next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; + compatible = "cache"; + cache-level = <3>; }; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm670.dtsi b/arch/arm64/boot/dts/qcom/sdm670.dtsi index c5f839dd1c6e..49c07cb76b20 100644 --- a/arch/arm64/boot/dts/qcom/sdm670.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm670.dtsi @@ -42,7 +42,7 @@ compatible = "cache"; next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; + compatible = "cache"; }; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 90424442bb4a..ae0510e687b4 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -110,8 +110,8 @@ cache-level = <2>; next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; + compatible = "cache"; + cache-level = <3>; }; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi index ae9b6bc446cb..4e8b99e7cf66 100644 --- a/arch/arm64/boot/dts/qcom/sm6375.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi @@ -48,10 +48,10 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_0: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; + compatible = "cache"; }; }; }; @@ -68,8 +68,8 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_100: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + next-level-cache = <&L3_0>; }; }; @@ -85,8 +85,8 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_200: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + next-level-cache = <&L3_0>; }; }; @@ -102,8 +102,8 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_300: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + next-level-cache = <&L3_0>; }; }; @@ -119,8 +119,8 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_400: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + next-level-cache = <&L3_0>; }; }; @@ -136,8 +136,8 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_500: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + next-level-cache = <&L3_0>; }; }; @@ -153,8 +153,8 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_600: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + next-level-cache = <&L3_0>; }; }; @@ -170,8 +170,8 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_700: l2-cache { - compatible = "cache"; - next-level-cache = <&L3_0>; + compatible = "cache"; + next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 2273fa571988..1a229caad8aa 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -65,8 +65,8 @@ cache-level = <2>; next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; + compatible = "cache"; + cache-level = <3>; }; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index ebcb481571c2..ebe59bd7bcc7 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -58,12 +58,12 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_0: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; + compatible = "cache"; + cache-level = <3>; }; }; }; @@ -80,9 +80,9 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_100: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -98,9 +98,9 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_200: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -116,9 +116,9 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_300: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -134,9 +134,9 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_400: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -152,9 +152,9 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_500: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -170,9 +170,9 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_600: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -188,9 +188,9 @@ power-domain-names = "psci"; #cooling-cells = <2>; L2_700: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 595533aeafc4..b15b585f3548 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -57,12 +57,12 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 0>; L2_0: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; L3_0: l3-cache { - compatible = "cache"; - cache-level = <3>; + compatible = "cache"; + cache-level = <3>; }; }; }; @@ -79,9 +79,9 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 0>; L2_100: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -97,9 +97,9 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 0>; L2_200: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -115,9 +115,9 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 0>; L2_300: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -133,9 +133,9 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 1>; L2_400: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -151,9 +151,9 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 1>; L2_500: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -169,9 +169,9 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 1>; L2_600: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; @@ -187,9 +187,9 @@ #cooling-cells = <2>; clocks = <&cpufreq_hw 2>; L2_700: l2-cache { - compatible = "cache"; - cache-level = <2>; - next-level-cache = <&L3_0>; + compatible = "cache"; + cache-level = <2>; + next-level-cache = <&L3_0>; }; }; From 084657090aacd8a9269931a2879420bf614511fb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 16 Apr 2023 12:11:32 +0200 Subject: [PATCH 060/934] arm64: dts: qcom: use decimal for cache level Cache level is by convention a decimal number, not hex. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230416101134.95686-2-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/ipq6018.dtsi | 2 +- arch/arm64/boot/dts/qcom/ipq8074.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index 9ff4e9d45065..ece652a0728a 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -83,7 +83,7 @@ L2_0: l2-cache { compatible = "cache"; - cache-level = <0x2>; + cache-level = <2>; }; }; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 84e715aa4310..4056ce59d43f 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -66,7 +66,7 @@ L2_0: l2-cache { compatible = "cache"; - cache-level = <0x2>; + cache-level = <2>; }; }; From 9c6e72fb2058dc06da9d278c4bff94430677d48a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 16 Apr 2023 12:11:33 +0200 Subject: [PATCH 061/934] arm64: dts: qcom: add missing cache properties Add required cache-level and cache-unified properties to fix warnings like: qdu1000-idp.dtb: l3-cache: 'cache-unified' is a required property qdu1000-idp.dtb: l2-cache: 'cache-level' is a required property Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230416101134.95686-3-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/ipq5332.dtsi | 1 + arch/arm64/boot/dts/qcom/ipq6018.dtsi | 1 + arch/arm64/boot/dts/qcom/ipq8074.dtsi | 1 + arch/arm64/boot/dts/qcom/ipq9574.dtsi | 1 + arch/arm64/boot/dts/qcom/msm8916.dtsi | 1 + arch/arm64/boot/dts/qcom/msm8953.dtsi | 2 ++ arch/arm64/boot/dts/qcom/msm8976.dtsi | 2 ++ arch/arm64/boot/dts/qcom/msm8994.dtsi | 2 ++ arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 ++ arch/arm64/boot/dts/qcom/msm8998.dtsi | 2 ++ arch/arm64/boot/dts/qcom/qcm2290.dtsi | 1 + arch/arm64/boot/dts/qcom/qcs404.dtsi | 1 + arch/arm64/boot/dts/qcom/qdu1000.dtsi | 10 ++++++++++ arch/arm64/boot/dts/qcom/sa8775p.dtsi | 20 ++++++++++++++++++++ arch/arm64/boot/dts/qcom/sc7180.dtsi | 9 +++++++++ arch/arm64/boot/dts/qcom/sc7280.dtsi | 9 +++++++++ arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 9 +++++++++ arch/arm64/boot/dts/qcom/sdm630.dtsi | 2 ++ arch/arm64/boot/dts/qcom/sdm670.dtsi | 18 ++++++++++++++++++ arch/arm64/boot/dts/qcom/sdm845.dtsi | 9 +++++++++ arch/arm64/boot/dts/qcom/sm6115.dtsi | 2 ++ arch/arm64/boot/dts/qcom/sm6125.dtsi | 2 ++ arch/arm64/boot/dts/qcom/sm6350.dtsi | 9 +++++++++ arch/arm64/boot/dts/qcom/sm6375.dtsi | 18 ++++++++++++++++++ arch/arm64/boot/dts/qcom/sm8150.dtsi | 9 +++++++++ arch/arm64/boot/dts/qcom/sm8350.dtsi | 9 +++++++++ arch/arm64/boot/dts/qcom/sm8450.dtsi | 9 +++++++++ arch/arm64/boot/dts/qcom/sm8550.dtsi | 9 +++++++++ 28 files changed, 170 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/ipq5332.dtsi b/arch/arm64/boot/dts/qcom/ipq5332.dtsi index 12e0e179e139..af4d97143bcf 100644 --- a/arch/arm64/boot/dts/qcom/ipq5332.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq5332.dtsi @@ -73,6 +73,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index ece652a0728a..f531797f2619 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -84,6 +84,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 4056ce59d43f..5b2c1986c8f4 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -67,6 +67,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/ipq9574.dtsi b/arch/arm64/boot/dts/qcom/ipq9574.dtsi index 3bb7435f5e7f..0ed19fbf7d87 100644 --- a/arch/arm64/boot/dts/qcom/ipq9574.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq9574.dtsi @@ -72,6 +72,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 7e0fa37a3adf..834e0b66b7f2 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -180,6 +180,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; idle-states { diff --git a/arch/arm64/boot/dts/qcom/msm8953.dtsi b/arch/arm64/boot/dts/qcom/msm8953.dtsi index 602cb188a635..d44cfa0471e9 100644 --- a/arch/arm64/boot/dts/qcom/msm8953.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8953.dtsi @@ -153,11 +153,13 @@ L2_0: l2-cache-0 { compatible = "cache"; cache-level = <2>; + cache-unified; }; L2_1: l2-cache-1 { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8976.dtsi b/arch/arm64/boot/dts/qcom/msm8976.dtsi index 1f0bd24a074a..f47fb8ea71e2 100644 --- a/arch/arm64/boot/dts/qcom/msm8976.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8976.dtsi @@ -193,11 +193,13 @@ l2_0: l2-cache0 { compatible = "cache"; cache-level = <2>; + cache-unified; }; l2_1: l2-cache1 { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 2831966be960..bdc3f2ba1755 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -52,6 +52,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -88,6 +89,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 2c5780008c84..30257c07e127 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -55,6 +55,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -85,6 +86,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index b150437a8355..3ec941fed14f 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -146,6 +146,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -190,6 +191,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/qcm2290.dtsi b/arch/arm64/boot/dts/qcom/qcm2290.dtsi index ae5abc76bcc7..b29bc4e4b837 100644 --- a/arch/arm64/boot/dts/qcom/qcm2290.dtsi +++ b/arch/arm64/boot/dts/qcom/qcm2290.dtsi @@ -51,6 +51,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi index eefed585738c..972f753847e1 100644 --- a/arch/arm64/boot/dts/qcom/qcs404.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi @@ -95,6 +95,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; idle-states { diff --git a/arch/arm64/boot/dts/qcom/qdu1000.dtsi b/arch/arm64/boot/dts/qcom/qdu1000.dtsi index 734438113bba..fb553f0bb17a 100644 --- a/arch/arm64/boot/dts/qcom/qdu1000.dtsi +++ b/arch/arm64/boot/dts/qcom/qdu1000.dtsi @@ -35,9 +35,13 @@ next-level-cache = <&L2_0>; L2_0: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -54,6 +58,8 @@ next-level-cache = <&L2_100>; L2_100: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -70,6 +76,8 @@ next-level-cache = <&L2_200>; L2_200: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -86,6 +94,8 @@ next-level-cache = <&L2_300>; L2_300: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sa8775p.dtsi b/arch/arm64/boot/dts/qcom/sa8775p.dtsi index 2343df7e0ea4..c3310caf9f68 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p.dtsi @@ -42,9 +42,13 @@ next-level-cache = <&L2_0>; L2_0: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -58,6 +62,8 @@ next-level-cache = <&L2_1>; L2_1: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -71,6 +77,8 @@ next-level-cache = <&L2_2>; L2_2: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -84,6 +92,8 @@ next-level-cache = <&L2_3>; L2_3: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -97,9 +107,13 @@ next-level-cache = <&L2_4>; L2_4: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_1>; L3_1: l3-cache { compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; @@ -114,6 +128,8 @@ next-level-cache = <&L2_5>; L2_5: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_1>; }; }; @@ -127,6 +143,8 @@ next-level-cache = <&L2_6>; L2_6: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_1>; }; }; @@ -140,6 +158,8 @@ next-level-cache = <&L2_7>; L2_7: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_1>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index ea1ffade1aa1..f479cab8ab45 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -92,10 +92,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -120,6 +122,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -144,6 +147,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -168,6 +172,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -192,6 +197,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -216,6 +222,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -240,6 +247,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -264,6 +272,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 31728f461422..2fd1d3c0eb34 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -182,10 +182,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -208,6 +210,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -230,6 +233,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -252,6 +256,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -274,6 +279,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -296,6 +302,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -318,6 +325,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -340,6 +348,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 2fd92b93d356..cc4aef21e617 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -58,10 +58,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -83,6 +85,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -104,6 +107,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -125,6 +129,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -146,6 +151,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -167,6 +173,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -188,6 +195,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -209,6 +217,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm630.dtsi b/arch/arm64/boot/dts/qcom/sdm630.dtsi index 37e72b1c56dc..eaead2f7beb4 100644 --- a/arch/arm64/boot/dts/qcom/sdm630.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm630.dtsi @@ -63,6 +63,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -127,6 +128,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm670.dtsi b/arch/arm64/boot/dts/qcom/sdm670.dtsi index 49c07cb76b20..b61e13db89bd 100644 --- a/arch/arm64/boot/dts/qcom/sdm670.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm670.dtsi @@ -41,8 +41,12 @@ L2_0: l2-cache { compatible = "cache"; next-level-cache = <&L3_0>; + cache-level = <2>; + cache-unified; L3_0: l3-cache { compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -57,6 +61,8 @@ next-level-cache = <&L2_100>; L2_100: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -71,6 +77,8 @@ next-level-cache = <&L2_200>; L2_200: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -85,6 +93,8 @@ next-level-cache = <&L2_300>; L2_300: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -99,6 +109,8 @@ next-level-cache = <&L2_400>; L2_400: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -113,6 +125,8 @@ next-level-cache = <&L2_500>; L2_500: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -127,6 +141,8 @@ next-level-cache = <&L2_600>; L2_600: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -141,6 +157,8 @@ next-level-cache = <&L2_700>; L2_700: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index ae0510e687b4..cdeb05e95674 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -108,10 +108,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -135,6 +137,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -158,6 +161,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -181,6 +185,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -204,6 +209,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -227,6 +233,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -250,6 +257,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -273,6 +281,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6115.dtsi b/arch/arm64/boot/dts/qcom/sm6115.dtsi index 631ca327e064..43f31c1b9d5a 100644 --- a/arch/arm64/boot/dts/qcom/sm6115.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6115.dtsi @@ -50,6 +50,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -102,6 +103,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6125.dtsi b/arch/arm64/boot/dts/qcom/sm6125.dtsi index 9484752fb850..2aa093d16858 100644 --- a/arch/arm64/boot/dts/qcom/sm6125.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6125.dtsi @@ -47,6 +47,7 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; @@ -87,6 +88,7 @@ L2_1: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6350.dtsi b/arch/arm64/boot/dts/qcom/sm6350.dtsi index 18c4616848ce..ad34301f6cdd 100644 --- a/arch/arm64/boot/dts/qcom/sm6350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6350.dtsi @@ -60,10 +60,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -86,6 +88,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -108,6 +111,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -130,6 +134,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -152,6 +157,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -174,6 +180,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -196,6 +203,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -218,6 +226,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6375.dtsi b/arch/arm64/boot/dts/qcom/sm6375.dtsi index 4e8b99e7cf66..f8d9c34d3b2f 100644 --- a/arch/arm64/boot/dts/qcom/sm6375.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6375.dtsi @@ -49,9 +49,13 @@ #cooling-cells = <2>; L2_0: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; + cache-level = <3>; + cache-unified; }; }; }; @@ -69,6 +73,8 @@ #cooling-cells = <2>; L2_100: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -86,6 +92,8 @@ #cooling-cells = <2>; L2_200: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -103,6 +111,8 @@ #cooling-cells = <2>; L2_300: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -120,6 +130,8 @@ #cooling-cells = <2>; L2_400: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -137,6 +149,8 @@ #cooling-cells = <2>; L2_500: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -154,6 +168,8 @@ #cooling-cells = <2>; L2_600: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -171,6 +187,8 @@ #cooling-cells = <2>; L2_700: l2-cache { compatible = "cache"; + cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 1a229caad8aa..27dcda0d4288 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -63,10 +63,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -90,6 +92,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -113,6 +116,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -136,6 +140,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -159,6 +164,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -182,6 +188,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -205,6 +212,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -228,6 +236,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index ebe59bd7bcc7..3efdc03ed0f1 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -60,10 +60,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -82,6 +84,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -100,6 +103,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -118,6 +122,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -136,6 +141,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -154,6 +160,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -172,6 +179,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -190,6 +198,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index b15b585f3548..d59ea8ee7111 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -59,10 +59,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -81,6 +83,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -99,6 +102,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -117,6 +121,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -135,6 +140,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -153,6 +159,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -171,6 +178,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -189,6 +197,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 6e9bad8f6f33..43192ef21aec 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -80,10 +80,12 @@ L2_0: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; L3_0: l3-cache { compatible = "cache"; cache-level = <3>; + cache-unified; }; }; }; @@ -104,6 +106,7 @@ L2_100: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -124,6 +127,7 @@ L2_200: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -144,6 +148,7 @@ L2_300: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -164,6 +169,7 @@ L2_400: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -184,6 +190,7 @@ L2_500: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -204,6 +211,7 @@ L2_600: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; @@ -224,6 +232,7 @@ L2_700: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; next-level-cache = <&L3_0>; }; }; From 925a80af6106a55fa6bcfedc5bd2f02b0b835495 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 16 Apr 2023 12:11:34 +0200 Subject: [PATCH 062/934] ARM: dts: qcom: add missing cache properties Add required cache-unified properties to fix warnings like: qcom-ipq4019-ap.dk01.1-c1.dtb: l2-cache: 'cache-unified' is a required property Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230416101134.95686-4-krzysztof.kozlowski@linaro.org --- arch/arm/boot/dts/qcom-apq8064.dtsi | 1 + arch/arm/boot/dts/qcom-apq8084.dtsi | 1 + arch/arm/boot/dts/qcom-ipq4019.dtsi | 1 + arch/arm/boot/dts/qcom-ipq8064.dtsi | 1 + arch/arm/boot/dts/qcom-msm8660.dtsi | 1 + arch/arm/boot/dts/qcom-msm8960.dtsi | 1 + arch/arm/boot/dts/qcom-msm8974.dtsi | 1 + 7 files changed, 7 insertions(+) diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi index 672b246afbba..d2289205ff81 100644 --- a/arch/arm/boot/dts/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom-apq8064.dtsi @@ -83,6 +83,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; idle-states { diff --git a/arch/arm/boot/dts/qcom-apq8084.dtsi b/arch/arm/boot/dts/qcom-apq8084.dtsi index b653ea40c441..83839e1ec4d1 100644 --- a/arch/arm/boot/dts/qcom-apq8084.dtsi +++ b/arch/arm/boot/dts/qcom-apq8084.dtsi @@ -74,6 +74,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; qcom,saw = <&saw_l2>; }; diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index dfcfb3339c23..f0ef86fadc9d 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -102,6 +102,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; qcom,saw = <&saw_l2>; }; }; diff --git a/arch/arm/boot/dts/qcom-ipq8064.dtsi b/arch/arm/boot/dts/qcom-ipq8064.dtsi index af6764770fd1..7581845737a8 100644 --- a/arch/arm/boot/dts/qcom-ipq8064.dtsi +++ b/arch/arm/boot/dts/qcom-ipq8064.dtsi @@ -45,6 +45,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm/boot/dts/qcom-msm8660.dtsi b/arch/arm/boot/dts/qcom-msm8660.dtsi index f601b40ebcf4..78023ed2fdf7 100644 --- a/arch/arm/boot/dts/qcom-msm8660.dtsi +++ b/arch/arm/boot/dts/qcom-msm8660.dtsi @@ -36,6 +36,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm/boot/dts/qcom-msm8960.dtsi b/arch/arm/boot/dts/qcom-msm8960.dtsi index 2a668cd535cc..616fef2ea682 100644 --- a/arch/arm/boot/dts/qcom-msm8960.dtsi +++ b/arch/arm/boot/dts/qcom-msm8960.dtsi @@ -42,6 +42,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; }; }; diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index 8208012684d4..7ed0d925a4e9 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -80,6 +80,7 @@ L2: l2-cache { compatible = "cache"; cache-level = <2>; + cache-unified; qcom,saw = <&saw_l2>; }; From a14da6144d16ef27e3022835fa282a3740b8ad7b Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 17 May 2023 19:06:13 +0200 Subject: [PATCH 063/934] arm64: dts: qcom: sm6375-pdx225: Fix remoteproc firmware paths They were previously missing the SoC name. Fix it. Fixes: a2ad207c412b ("arm64: dts: qcom: sm6375-pdx225: Enable ADSP & CDSP") Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230517-topic-murray-fwname-v1-1-923e87312249@linaro.org --- arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts b/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts index 8220e6f44117..b2f1bb1d58e9 100644 --- a/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts +++ b/arch/arm64/boot/dts/qcom/sm6375-sony-xperia-murray-pdx225.dts @@ -178,12 +178,12 @@ }; &remoteproc_adsp { - firmware-name = "qcom/Sony/murray/adsp.mbn"; + firmware-name = "qcom/sm6375/Sony/murray/adsp.mbn"; status = "okay"; }; &remoteproc_cdsp { - firmware-name = "qcom/Sony/murray/cdsp.mbn"; + firmware-name = "qcom/sm6375/Sony/murray/cdsp.mbn"; status = "okay"; }; From 59112e9c390be595224e427827475a6cd3726021 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 18 May 2023 11:09:15 +0100 Subject: [PATCH 064/934] KVM: arm64: vgic: Fix a circular locking issue Lockdep reports a circular lock dependency between the srcu and the config_lock: [ 262.179917] -> #1 (&kvm->srcu){.+.+}-{0:0}: [ 262.182010] __synchronize_srcu+0xb0/0x224 [ 262.183422] synchronize_srcu_expedited+0x24/0x34 [ 262.184554] kvm_io_bus_register_dev+0x324/0x50c [ 262.185650] vgic_register_redist_iodev+0x254/0x398 [ 262.186740] vgic_v3_set_redist_base+0x3b0/0x724 [ 262.188087] kvm_vgic_addr+0x364/0x600 [ 262.189189] vgic_set_common_attr+0x90/0x544 [ 262.190278] vgic_v3_set_attr+0x74/0x9c [ 262.191432] kvm_device_ioctl+0x2a0/0x4e4 [ 262.192515] __arm64_sys_ioctl+0x7ac/0x1ba8 [ 262.193612] invoke_syscall.constprop.0+0x70/0x1e0 [ 262.195006] do_el0_svc+0xe4/0x2d4 [ 262.195929] el0_svc+0x44/0x8c [ 262.196917] el0t_64_sync_handler+0xf4/0x120 [ 262.198238] el0t_64_sync+0x190/0x194 [ 262.199224] [ 262.199224] -> #0 (&kvm->arch.config_lock){+.+.}-{3:3}: [ 262.201094] __lock_acquire+0x2b70/0x626c [ 262.202245] lock_acquire+0x454/0x778 [ 262.203132] __mutex_lock+0x190/0x8b4 [ 262.204023] mutex_lock_nested+0x24/0x30 [ 262.205100] vgic_mmio_write_v3_misc+0x5c/0x2a0 [ 262.206178] dispatch_mmio_write+0xd8/0x258 [ 262.207498] __kvm_io_bus_write+0x1e0/0x350 [ 262.208582] kvm_io_bus_write+0xe0/0x1cc [ 262.209653] io_mem_abort+0x2ac/0x6d8 [ 262.210569] kvm_handle_guest_abort+0x9b8/0x1f88 [ 262.211937] handle_exit+0xc4/0x39c [ 262.212971] kvm_arch_vcpu_ioctl_run+0x90c/0x1c04 [ 262.214154] kvm_vcpu_ioctl+0x450/0x12f8 [ 262.215233] __arm64_sys_ioctl+0x7ac/0x1ba8 [ 262.216402] invoke_syscall.constprop.0+0x70/0x1e0 [ 262.217774] do_el0_svc+0xe4/0x2d4 [ 262.218758] el0_svc+0x44/0x8c [ 262.219941] el0t_64_sync_handler+0xf4/0x120 [ 262.221110] el0t_64_sync+0x190/0x194 Note that the current report, which can be triggered by the vgic_irq kselftest, is a triple chain that includes slots_lock, but after inverting the slots_lock/config_lock dependency, the actual problem reported above remains. In several places, the vgic code calls kvm_io_bus_register_dev(), which synchronizes the srcu, while holding config_lock (#1). And the MMIO handler takes the config_lock while holding the srcu read lock (#0). Break dependency #1, by registering the distributor and redistributors without holding config_lock. The ITS also uses kvm_io_bus_register_dev() but already relies on slots_lock to serialize calls. The distributor iodev is created on the first KVM_RUN call. Multiple threads will race for vgic initialization, and only the first one will see !vgic_ready() under the lock. To serialize those threads, rely on slots_lock rather than config_lock. Redistributors are created earlier, through KVM_DEV_ARM_VGIC_GRP_ADDR ioctls and vCPU creation. Similarly, serialize the iodev creation with slots_lock, and the rest with config_lock. Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state") Signed-off-by: Jean-Philippe Brucker Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230518100914.2837292-2-jean-philippe@linaro.org --- arch/arm64/kvm/vgic/vgic-init.c | 25 ++++++++++++++++----- arch/arm64/kvm/vgic/vgic-kvm-device.c | 10 +++++++-- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 31 ++++++++++++++++++--------- arch/arm64/kvm/vgic/vgic-mmio.c | 9 ++------ arch/arm64/kvm/vgic/vgic-v2.c | 6 ------ arch/arm64/kvm/vgic/vgic-v3.c | 7 ------ 6 files changed, 51 insertions(+), 37 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 9d42c7cb2b58..c199ba2f192e 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -235,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * KVM io device for the redistributor that belongs to this VCPU. */ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - mutex_lock(&vcpu->kvm->arch.config_lock); + mutex_lock(&vcpu->kvm->slots_lock); ret = vgic_register_redist_iodev(vcpu); - mutex_unlock(&vcpu->kvm->arch.config_lock); + mutex_unlock(&vcpu->kvm->slots_lock); } return ret; } @@ -446,11 +446,13 @@ int vgic_lazy_init(struct kvm *kvm) int kvm_vgic_map_resources(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + gpa_t dist_base; int ret = 0; if (likely(vgic_ready(kvm))) return 0; + mutex_lock(&kvm->slots_lock); mutex_lock(&kvm->arch.config_lock); if (vgic_ready(kvm)) goto out; @@ -463,13 +465,26 @@ int kvm_vgic_map_resources(struct kvm *kvm) else ret = vgic_v3_map_resources(kvm); - if (ret) + if (ret) { __kvm_vgic_destroy(kvm); - else - dist->ready = true; + goto out; + } + dist->ready = true; + dist_base = dist->vgic_dist_base; + mutex_unlock(&kvm->arch.config_lock); + + ret = vgic_register_dist_iodev(kvm, dist_base, + kvm_vgic_global_state.type); + if (ret) { + kvm_err("Unable to register VGIC dist MMIO regions\n"); + kvm_vgic_destroy(kvm); + } + mutex_unlock(&kvm->slots_lock); + return ret; out: mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->slots_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 35cfa268fd5d..212b73a715c1 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -102,7 +102,11 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri if (get_user(addr, uaddr)) return -EFAULT; - mutex_lock(&kvm->arch.config_lock); + /* + * Since we can't hold config_lock while registering the redistributor + * iodevs, take the slots_lock immediately. + */ + mutex_lock(&kvm->slots_lock); switch (attr->attr) { case KVM_VGIC_V2_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); @@ -182,6 +186,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri if (r) goto out; + mutex_lock(&kvm->arch.config_lock); if (write) { r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size); if (!r) @@ -189,9 +194,10 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri } else { addr = *addr_ptr; } + mutex_unlock(&kvm->arch.config_lock); out: - mutex_unlock(&kvm->arch.config_lock); + mutex_unlock(&kvm->slots_lock); if (!r && !write) r = put_user(addr, uaddr); diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 472b18ac92a2..188d2187eede 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -769,10 +769,13 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; struct vgic_redist_region *rdreg; gpa_t rd_base; - int ret; + int ret = 0; + + lockdep_assert_held(&kvm->slots_lock); + mutex_lock(&kvm->arch.config_lock); if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) - return 0; + goto out_unlock; /* * We may be creating VCPUs before having set the base address for the @@ -782,10 +785,12 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) */ rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions); if (!rdreg) - return 0; + goto out_unlock; - if (!vgic_v3_check_base(kvm)) - return -EINVAL; + if (!vgic_v3_check_base(kvm)) { + ret = -EINVAL; + goto out_unlock; + } vgic_cpu->rdreg = rdreg; vgic_cpu->rdreg_index = rdreg->free_index; @@ -799,16 +804,20 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); rd_dev->redist_vcpu = vcpu; - mutex_lock(&kvm->slots_lock); + mutex_unlock(&kvm->arch.config_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, 2 * SZ_64K, &rd_dev->dev); - mutex_unlock(&kvm->slots_lock); - if (ret) return ret; + /* Protected by slots_lock */ rdreg->free_index++; return 0; + +out_unlock: + mutex_unlock(&kvm->arch.config_lock); + return ret; } static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) @@ -834,12 +843,10 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) /* The current c failed, so iterate over the previous ones. */ int i; - mutex_lock(&kvm->slots_lock); for (i = 0; i < c; i++) { vcpu = kvm_get_vcpu(kvm, i); vgic_unregister_redist_iodev(vcpu); } - mutex_unlock(&kvm->slots_lock); } return ret; @@ -938,7 +945,9 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) { int ret; + mutex_lock(&kvm->arch.config_lock); ret = vgic_v3_alloc_redist_region(kvm, index, addr, count); + mutex_unlock(&kvm->arch.config_lock); if (ret) return ret; @@ -950,8 +959,10 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) if (ret) { struct vgic_redist_region *rdreg; + mutex_lock(&kvm->arch.config_lock); rdreg = vgic_v3_rdist_region_from_index(kvm, index); vgic_v3_free_redist_region(rdreg); + mutex_unlock(&kvm->arch.config_lock); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 1939c94e0b24..ff558c05e990 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -1096,7 +1096,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, enum vgic_type type) { struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev; - int ret = 0; unsigned int len; switch (type) { @@ -1114,10 +1113,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, io_device->iodev_type = IODEV_DIST; io_device->redist_vcpu = NULL; - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, - len, &io_device->dev); - mutex_unlock(&kvm->slots_lock); - - return ret; + return kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, + len, &io_device->dev); } diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 645648349c99..7e9cdb78f7ce 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -312,12 +312,6 @@ int vgic_v2_map_resources(struct kvm *kvm) return ret; } - ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); - if (ret) { - kvm_err("Unable to register VGIC MMIO regions\n"); - return ret; - } - if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, kvm_vgic_global_state.vcpu_base, diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 93a47a515c13..c3b8e132d599 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -539,7 +539,6 @@ int vgic_v3_map_resources(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; - int ret = 0; unsigned long c; kvm_for_each_vcpu(c, vcpu, kvm) { @@ -569,12 +568,6 @@ int vgic_v3_map_resources(struct kvm *kvm) return -EBUSY; } - ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); - if (ret) { - kvm_err("Unable to register VGICv3 dist MMIO regions\n"); - return ret; - } - if (kvm_vgic_global_state.has_gicv4_1) vgic_v4_configure_vsgis(kvm); From 9cf2f840c439b6b23bd99f584f2917ca425ae406 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 18 May 2023 11:09:16 +0100 Subject: [PATCH 065/934] KVM: arm64: vgic: Wrap vgic_its_create() with config_lock vgic_its_create() changes the vgic state without holding the config_lock, which triggers a lockdep warning in vgic_v4_init(): [ 358.667941] WARNING: CPU: 3 PID: 178 at arch/arm64/kvm/vgic/vgic-v4.c:245 vgic_v4_init+0x15c/0x7a8 ... [ 358.707410] vgic_v4_init+0x15c/0x7a8 [ 358.708550] vgic_its_create+0x37c/0x4a4 [ 358.709640] kvm_vm_ioctl+0x1518/0x2d80 [ 358.710688] __arm64_sys_ioctl+0x7ac/0x1ba8 [ 358.711960] invoke_syscall.constprop.0+0x70/0x1e0 [ 358.713245] do_el0_svc+0xe4/0x2d4 [ 358.714289] el0_svc+0x44/0x8c [ 358.715329] el0t_64_sync_handler+0xf4/0x120 [ 358.716615] el0t_64_sync+0x190/0x194 Wrap the whole of vgic_its_create() with config_lock since, in addition to calling vgic_v4_init(), it also modifies the global kvm->arch.vgic state. Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state") Signed-off-by: Jean-Philippe Brucker Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230518100914.2837292-3-jean-philippe@linaro.org --- arch/arm64/kvm/vgic/vgic-its.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 750e51e3779a..5fe2365a629f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -1936,6 +1936,7 @@ void vgic_lpi_translation_cache_destroy(struct kvm *kvm) static int vgic_its_create(struct kvm_device *dev, u32 type) { + int ret; struct vgic_its *its; if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) @@ -1945,9 +1946,12 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) if (!its) return -ENOMEM; + mutex_lock(&dev->kvm->arch.config_lock); + if (vgic_initialized(dev->kvm)) { - int ret = vgic_v4_init(dev->kvm); + ret = vgic_v4_init(dev->kvm); if (ret < 0) { + mutex_unlock(&dev->kvm->arch.config_lock); kfree(its); return ret; } @@ -1960,12 +1964,10 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) /* Yep, even more trickery for lock ordering... */ #ifdef CONFIG_LOCKDEP - mutex_lock(&dev->kvm->arch.config_lock); mutex_lock(&its->cmd_lock); mutex_lock(&its->its_lock); mutex_unlock(&its->its_lock); mutex_unlock(&its->cmd_lock); - mutex_unlock(&dev->kvm->arch.config_lock); #endif its->vgic_its_base = VGIC_ADDR_UNDEF; @@ -1986,7 +1988,11 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) dev->private = its; - return vgic_its_set_abi(its, NR_ITS_ABIS - 1); + ret = vgic_its_set_abi(its, NR_ITS_ABIS - 1); + + mutex_unlock(&dev->kvm->arch.config_lock); + + return ret; } static void vgic_its_destroy(struct kvm_device *kvm_dev) From c38b8400aef99d63be2b1ff131bb993465dcafe1 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 18 May 2023 11:09:17 +0100 Subject: [PATCH 066/934] KVM: arm64: vgic: Fix locking comment It is now config_lock that must be held, not kvm lock. Replace the comment with a lockdep annotation. Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state") Signed-off-by: Jean-Philippe Brucker Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230518100914.2837292-4-jean-philippe@linaro.org --- arch/arm64/kvm/vgic/vgic-v4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 3bb003478060..c1c28fe680ba 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -184,13 +184,14 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) } } -/* Must be called with the kvm lock held */ void vgic_v4_configure_vsgis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; unsigned long i; + lockdep_assert_held(&kvm->arch.config_lock); + kvm_arm_halt_guest(kvm); kvm_for_each_vcpu(i, vcpu, kvm) { From 62548732260976ca88fcb17ef98ab661e7ce7504 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 18 May 2023 11:09:18 +0100 Subject: [PATCH 067/934] KVM: arm64: vgic: Fix a comment It is host userspace, not the guest, that issues KVM_DEV_ARM_VGIC_GRP_CTRL Signed-off-by: Jean-Philippe Brucker Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230518100914.2837292-5-jean-philippe@linaro.org --- arch/arm64/kvm/vgic/vgic-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index c199ba2f192e..6eafc2c45cfc 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -406,7 +406,7 @@ void kvm_vgic_destroy(struct kvm *kvm) /** * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest - * is a GICv2. A GICv3 must be explicitly initialized by the guest using the + * is a GICv2. A GICv3 must be explicitly initialized by userspace using the * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group. * @kvm: kvm struct pointer */ From 09cce60bddd6461a93a5bf434265a47827d1bc6f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 May 2023 10:58:44 +0100 Subject: [PATCH 068/934] KVM: arm64: Prevent unconditional donation of unmapped regions from the host Since host stage-2 mappings are created lazily, we cannot rely solely on the pte in order to recover the target physical address when checking a host-initiated memory transition as this permits donation of unmapped regions corresponding to MMIO or "no-map" memory. Instead of inspecting the pte, move the addr_is_allowed_memory() check into the host callback function where it is passed the physical address directly from the walker. Cc: Quentin Perret Fixes: e82edcc75c4e ("KVM: arm64: Implement do_share() helper for sharing memory") Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230518095844.1178-1-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 2e9ec4a2a4a3..a8813b212996 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -575,7 +575,7 @@ struct pkvm_mem_donation { struct check_walk_data { enum pkvm_page_state desired; - enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); + enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr); }; static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, @@ -583,10 +583,7 @@ static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, { struct check_walk_data *d = ctx->arg; - if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old))) - return -EINVAL; - - return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM; + return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM; } static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, @@ -601,8 +598,11 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, return kvm_pgtable_walk(pgt, addr, size, &walker); } -static enum pkvm_page_state host_get_page_state(kvm_pte_t pte) +static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr) { + if (!addr_is_allowed_memory(addr)) + return PKVM_NOPAGE; + if (!kvm_pte_valid(pte) && pte) return PKVM_NOPAGE; @@ -709,7 +709,7 @@ static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx return host_stage2_set_owner_locked(addr, size, host_id); } -static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) +static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr) { if (!kvm_pte_valid(pte)) return PKVM_NOPAGE; From 349e3c0cf239cc01d58a1e6c749e171de014cd6a Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 18 May 2023 01:10:59 -0700 Subject: [PATCH 069/934] RDMA/bnxt_re: Fix a possible memory leak Inside bnxt_qplib_create_cq(), when the check for NULL DPI fails, driver returns directly without freeing the memory allocated inside bnxt_qplib_alloc_init_hwq() routine. Fixed this by moving the check for NULL DPI before invoking bnxt_qplib_alloc_init_hwq(). Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://lore.kernel.org/r/1684397461-23082-2-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Kashyap Desai Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index f139d4cd1712..8974f6235cfa 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2056,6 +2056,12 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) u32 pg_sz_lvl; int rc; + if (!cq->dpi) { + dev_err(&rcfw->pdev->dev, + "FP: CREATE_CQ failed due to NULL DPI\n"); + return -EINVAL; + } + hwq_attr.res = res; hwq_attr.depth = cq->max_wqe; hwq_attr.stride = sizeof(struct cq_base); @@ -2069,11 +2075,6 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) CMDQ_BASE_OPCODE_CREATE_CQ, sizeof(req)); - if (!cq->dpi) { - dev_err(&rcfw->pdev->dev, - "FP: CREATE_CQ failed due to NULL DPI\n"); - return -EINVAL; - } req.dpi = cpu_to_le32(cq->dpi->dpi); req.cq_handle = cpu_to_le64(cq->cq_handle); req.cq_size = cpu_to_le32(cq->hwq.max_elements); From 0fa0d520e2a878cb4c94c4dc84395905d3f14f54 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 18 May 2023 01:11:00 -0700 Subject: [PATCH 070/934] RDMA/bnxt_re: Fix return value of bnxt_re_process_raw_qp_pkt_rx bnxt_re_process_raw_qp_pkt_rx() always return 0 and ignores the return value of bnxt_re_post_send_shadow_qp(). Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://lore.kernel.org/r/1684397461-23082-3-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Hongguang Gao Reviewed-by: Ajit Khaparde Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e86afecfbe46..b1c36412025f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3341,9 +3341,7 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp, udwr.remote_qkey = gsi_sqp->qplib_qp.qkey; /* post data received in the send queue */ - rc = bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr); - - return 0; + return bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr); } static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc, From dd5fb04857d7346c9ea4901ec3ebe5b90b0e8868 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 18 May 2023 01:11:01 -0700 Subject: [PATCH 071/934] RDMA/bnxt_re: Do not enable congestion control on VFs Congestion control needs to be enabled only on the PFs. FW fails the command if issued on VFs. Avoid sending the command on VFs. Fixes: f13bcef04ba0 ("RDMA/bnxt_re: Enable congestion control by default") Link: https://lore.kernel.org/r/1684397461-23082-4-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Kalesh AP Reviewed-by: Selvin Thyparampil Xavier Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index b9e2f89337e8..e34eccd178d0 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1336,6 +1336,10 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) { struct bnxt_qplib_cc_param cc_param = {}; + /* Do not enable congestion control on VFs */ + if (rdev->is_virtfn) + return; + /* Currently enabling only for GenP5 adapters */ if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) return; From 626d312028bec44209d0ecd5beaa9b1aa8945f7d Mon Sep 17 00:00:00 2001 From: Masahiro Honda Date: Thu, 18 May 2023 20:08:16 +0900 Subject: [PATCH 072/934] iio: adc: ad_sigma_delta: Fix IRQ issue by setting IRQ_DISABLE_UNLAZY flag The Sigma-Delta ADCs supported by this driver can use SDO as an interrupt line to indicate the completion of a conversion. However, some devices cannot properly detect the completion of a conversion by an interrupt. This is for the reason mentioned in the following commit. commit e9849777d0e2 ("genirq: Add flag to force mask in disable_irq[_nosync]()") A read operation is performed by an extra interrupt before the completion of a conversion. At this time, the value read from the ADC data register is the same as the previous conversion result. This patch fixes the issue by setting IRQ_DISABLE_UNLAZY flag. Fixes: 0c6ef985a1fd ("iio: adc: ad7791: fix IRQ flags") Fixes: 1a913270e57a ("iio: adc: ad7793: Fix IRQ flag") Fixes: e081102f3077 ("iio: adc: ad7780: Fix IRQ flag") Fixes: 89a86da5cb8e ("iio: adc: ad7192: Add IRQ flag") Fixes: 79ef91493f54 ("iio: adc: ad7124: Set IRQ type to falling") Signed-off-by: Masahiro Honda Link: https://lore.kernel.org/r/20230518110816.248-1-honda@mechatrax.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index d8570f620785..7e2192870743 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -584,6 +584,10 @@ static int devm_ad_sd_probe_trigger(struct device *dev, struct iio_dev *indio_de init_completion(&sigma_delta->completion); sigma_delta->irq_dis = true; + + /* the IRQ core clears IRQ_DISABLE_UNLAZY flag when freeing an IRQ */ + irq_set_status_flags(sigma_delta->spi->irq, IRQ_DISABLE_UNLAZY); + ret = devm_request_irq(dev, sigma_delta->spi->irq, ad_sd_data_rdy_trig_poll, sigma_delta->info->irq_flags | IRQF_NO_AUTOEN, From bbaae0c79ebd49f61ad942a8bf9e12bfc7f821bb Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Tue, 9 May 2023 15:22:02 +0000 Subject: [PATCH 073/934] iio: imu: inv_icm42600: fix timestamp reset Timestamp reset is not done in the correct place. It must be done before enabling buffer. The reason is that interrupt timestamping is always happening when the chip is on, even if the corresponding sensor is off. When the sensor restarts, timestamp is wrong if you don't do a reset first. Fixes: ec74ae9fd37c ("iio: imu: inv_icm42600: add accurate timestamping") Signed-off-by: Jean-Baptiste Maneyrol Cc: Link: https://lore.kernel.org/r/20230509152202.245444-1-inv.git-commit@tdk.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c index 99576b2c171f..32d7f8364230 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.c @@ -275,9 +275,14 @@ static int inv_icm42600_buffer_preenable(struct iio_dev *indio_dev) { struct inv_icm42600_state *st = iio_device_get_drvdata(indio_dev); struct device *dev = regmap_get_device(st->map); + struct inv_icm42600_timestamp *ts = iio_priv(indio_dev); pm_runtime_get_sync(dev); + mutex_lock(&st->lock); + inv_icm42600_timestamp_reset(ts); + mutex_unlock(&st->lock); + return 0; } @@ -375,7 +380,6 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev) struct device *dev = regmap_get_device(st->map); unsigned int sensor; unsigned int *watermark; - struct inv_icm42600_timestamp *ts; struct inv_icm42600_sensor_conf conf = INV_ICM42600_SENSOR_CONF_INIT; unsigned int sleep_temp = 0; unsigned int sleep_sensor = 0; @@ -385,11 +389,9 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev) if (indio_dev == st->indio_gyro) { sensor = INV_ICM42600_SENSOR_GYRO; watermark = &st->fifo.watermark.gyro; - ts = iio_priv(st->indio_gyro); } else if (indio_dev == st->indio_accel) { sensor = INV_ICM42600_SENSOR_ACCEL; watermark = &st->fifo.watermark.accel; - ts = iio_priv(st->indio_accel); } else { return -EINVAL; } @@ -417,8 +419,6 @@ static int inv_icm42600_buffer_postdisable(struct iio_dev *indio_dev) if (!st->fifo.on) ret = inv_icm42600_set_temp_conf(st, false, &sleep_temp); - inv_icm42600_timestamp_reset(ts); - out_unlock: mutex_unlock(&st->lock); From 1f8b6df6a997a430b0c48b504638154b520781ad Mon Sep 17 00:00:00 2001 From: Benedict Wong Date: Wed, 10 May 2023 01:30:21 +0000 Subject: [PATCH 074/934] xfrm: Treat already-verified secpath entries as optional This change allows inbound traffic through nested IPsec tunnels to successfully match policies and templates, while retaining the secpath stack trace as necessary for netfilter policies. Specifically, this patch marks secpath entries that have already matched against a relevant policy as having been verified, allowing it to be treated as optional and skipped after a tunnel decapsulation (during which the src/dst/proto/etc may have changed, and the correct policy chain no long be resolvable). This approach is taken as opposed to the iteration in b0355dbbf13c, where the secpath was cleared, since that breaks subsequent validations that rely on the existence of the secpath entries (netfilter policies, or transport-in-tunnel mode, where policies remain resolvable). Fixes: b0355dbbf13c ("Fix XFRM-I support for nested ESP tunnels") Test: Tested against Android Kernel Unit Tests Test: Tested against Android CTS Signed-off-by: Benedict Wong Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + net/xfrm/xfrm_input.c | 1 + net/xfrm/xfrm_policy.c | 12 ++++++++++++ 3 files changed, 14 insertions(+) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 33ee3f5936e6..151ca95dd08d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1054,6 +1054,7 @@ struct xfrm_offload { struct sec_path { int len; int olen; + int verified_cnt; struct xfrm_state *xvec[XFRM_MAX_DEPTH]; struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 39fb91ff23d9..b731687b5f3e 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -131,6 +131,7 @@ struct sec_path *secpath_set(struct sk_buff *skb) memset(sp->ovec, 0, sizeof(sp->ovec)); sp->olen = 0; sp->len = 0; + sp->verified_cnt = 0; return sp; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6d15788b5123..ff58ce6c030c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3349,6 +3349,13 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id)) return ++idx; if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { + if (idx < sp->verified_cnt) { + /* Secpath entry previously verified, consider optional and + * continue searching + */ + continue; + } + if (start == -1) start = -2-idx; break; @@ -3723,6 +3730,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. + * Upon success, marks secpath entries as having been + * verified to allow them to be skipped in future policy + * checks (e.g. nested tunnels). */ for (i = xfrm_nr-1, k = 0; i >= 0; i--) { k = xfrm_policy_ok(tpp[i], sp, k, family, if_id); @@ -3741,6 +3751,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } xfrm_pols_put(pols, npols); + sp->verified_cnt = k; + return 1; } XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); From a287f5b0cfc6804c5b12a4be13c7c9fe27869e90 Mon Sep 17 00:00:00 2001 From: Benedict Wong Date: Wed, 10 May 2023 01:30:22 +0000 Subject: [PATCH 075/934] xfrm: Ensure policies always checked on XFRM-I input path This change adds methods in the XFRM-I input path that ensures that policies are checked prior to processing of the subsequent decapsulated packet, after which the relevant policies may no longer be resolvable (due to changing src/dst/proto/etc). Notably, raw ESP/AH packets did not perform policy checks inherently, whereas all other encapsulated packets (UDP, TCP encapsulated) do policy checks after calling xfrm_input handling in the respective encapsulation layer. Fixes: b0355dbbf13c ("Fix XFRM-I support for nested ESP tunnels") Test: Verified with additional Android Kernel Unit tests Test: Verified against Android CTS Signed-off-by: Benedict Wong Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface_core.c | 54 +++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 1f99dc469027..35279c220bd7 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -310,6 +310,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) skb->mark = 0; } +static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type, unsigned short family) +{ + struct sec_path *sp; + + sp = skb_sec_path(skb); + if (sp && (sp->len || sp->olen) && + !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + goto discard; + + XFRM_SPI_SKB_CB(skb)->family = family; + if (family == AF_INET) { + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + } else { + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + } + + return xfrm_input(skb, nexthdr, spi, encap_type); +discard: + kfree_skb(skb); + return 0; +} + +static int xfrmi4_rcv(struct sk_buff *skb) +{ + return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET); +} + +static int xfrmi6_rcv(struct sk_buff *skb) +{ + return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], + 0, 0, AF_INET6); +} + +static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) +{ + return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET); +} + +static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) +{ + return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6); +} + static int xfrmi_rcv_cb(struct sk_buff *skb, int err) { const struct xfrm_mode *inner_mode; @@ -945,8 +991,8 @@ static struct pernet_operations xfrmi_net_ops = { }; static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { - .handler = xfrm6_rcv, - .input_handler = xfrm_input, + .handler = xfrmi6_rcv, + .input_handler = xfrmi6_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, @@ -996,8 +1042,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = { #endif static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { - .handler = xfrm4_rcv, - .input_handler = xfrm_input, + .handler = xfrmi4_rcv, + .input_handler = xfrmi4_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, .priority = 10, From 70c2e03e9aaf17496c63f6e42333c012f5ae5307 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 29 Mar 2023 13:23:04 +0300 Subject: [PATCH 076/934] thunderbolt: dma_test: Use correct value for absent rings when creating paths Both tb_xdomain_enable_paths() and tb_xdomain_disable_paths() expect -1, not 0, if the corresponding ring is not needed. For this reason change the driver to use correct value for the rings that are not needed. Fixes: 180b0689425c ("thunderbolt: Allow multiple DMA tunnels over a single XDomain connection") Cc: stable@vger.kernel.org Reported-by: Pengfei Xu Tested-by: Pengfei Xu Signed-off-by: Mika Westerberg --- drivers/thunderbolt/dma_test.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/dma_test.c b/drivers/thunderbolt/dma_test.c index 3bedecb236e0..14bb6dec6c4b 100644 --- a/drivers/thunderbolt/dma_test.c +++ b/drivers/thunderbolt/dma_test.c @@ -192,9 +192,9 @@ static int dma_test_start_rings(struct dma_test *dt) } ret = tb_xdomain_enable_paths(dt->xd, dt->tx_hopid, - dt->tx_ring ? dt->tx_ring->hop : 0, + dt->tx_ring ? dt->tx_ring->hop : -1, dt->rx_hopid, - dt->rx_ring ? dt->rx_ring->hop : 0); + dt->rx_ring ? dt->rx_ring->hop : -1); if (ret) { dma_test_free_rings(dt); return ret; @@ -218,9 +218,9 @@ static void dma_test_stop_rings(struct dma_test *dt) tb_ring_stop(dt->tx_ring); ret = tb_xdomain_disable_paths(dt->xd, dt->tx_hopid, - dt->tx_ring ? dt->tx_ring->hop : 0, + dt->tx_ring ? dt->tx_ring->hop : -1, dt->rx_hopid, - dt->rx_ring ? dt->rx_ring->hop : 0); + dt->rx_ring ? dt->rx_ring->hop : -1); if (ret) dev_warn(&dt->svc->dev, "failed to disable DMA paths\n"); From 89c0c62e947a01e7a36b54582fd9c9e346170255 Mon Sep 17 00:00:00 2001 From: Vineeth Vijayan Date: Thu, 4 May 2023 20:53:20 +0200 Subject: [PATCH 077/934] s390/cio: unregister device when the only path is gone Currently, if the device is offline and all the channel paths are either configured or varied offline, the associated subchannel gets unregistered. Don't unregister the subchannel, instead unregister offline device. Signed-off-by: Vineeth Vijayan Reviewed-by: Peter Oberparleiter Signed-off-by: Alexander Gordeev --- drivers/s390/cio/device.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index d5c43e9b5128..c0d620ffea61 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1376,6 +1376,7 @@ void ccw_device_set_notoper(struct ccw_device *cdev) enum io_sch_action { IO_SCH_UNREG, IO_SCH_ORPH_UNREG, + IO_SCH_UNREG_CDEV, IO_SCH_ATTACH, IO_SCH_UNREG_ATTACH, IO_SCH_ORPH_ATTACH, @@ -1408,7 +1409,7 @@ static enum io_sch_action sch_get_action(struct subchannel *sch) } if ((sch->schib.pmcw.pam & sch->opm) == 0) { if (ccw_device_notify(cdev, CIO_NO_PATH) != NOTIFY_OK) - return IO_SCH_UNREG; + return IO_SCH_UNREG_CDEV; return IO_SCH_DISC; } if (device_is_disconnected(cdev)) @@ -1470,6 +1471,7 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process) case IO_SCH_ORPH_ATTACH: ccw_device_set_disconnected(cdev); break; + case IO_SCH_UNREG_CDEV: case IO_SCH_UNREG_ATTACH: case IO_SCH_UNREG: if (!cdev) @@ -1503,6 +1505,7 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process) if (rc) goto out; break; + case IO_SCH_UNREG_CDEV: case IO_SCH_UNREG_ATTACH: spin_lock_irqsave(sch->lock, flags); sch_set_cdev(sch, NULL); From e332003bb216a9f91e08004b9e2de0745f321290 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 30 Mar 2023 09:58:17 -0700 Subject: [PATCH 078/934] iommu: Make IPMMU_VMSA dependencies more strict On riscv64, linux-next-20233030 (and for several days earlier), there is a kconfig warning: WARNING: unmet direct dependencies detected for IOMMU_IO_PGTABLE_LPAE Depends on [n]: IOMMU_SUPPORT [=y] && (ARM || ARM64 || COMPILE_TEST [=n]) && !GENERIC_ATOMIC64 [=n] Selected by [y]: - IPMMU_VMSA [=y] && IOMMU_SUPPORT [=y] && (ARCH_RENESAS [=y] || COMPILE_TEST [=n]) && !GENERIC_ATOMIC64 [=n] and build errors: riscv64-linux-ld: drivers/iommu/io-pgtable-arm.o: in function `.L140': io-pgtable-arm.c:(.init.text+0x1e8): undefined reference to `alloc_io_pgtable_ops' riscv64-linux-ld: drivers/iommu/io-pgtable-arm.o: in function `.L168': io-pgtable-arm.c:(.init.text+0xab0): undefined reference to `free_io_pgtable_ops' riscv64-linux-ld: drivers/iommu/ipmmu-vmsa.o: in function `.L140': ipmmu-vmsa.c:(.text+0xbc4): undefined reference to `free_io_pgtable_ops' riscv64-linux-ld: drivers/iommu/ipmmu-vmsa.o: in function `.L0 ': ipmmu-vmsa.c:(.text+0x145e): undefined reference to `alloc_io_pgtable_ops' Add ARM || ARM64 || COMPILE_TEST dependencies to IPMMU_VMSA to prevent these issues, i.e., so that ARCH_RENESAS on RISC-V is not allowed. This makes the ARCH dependencies become: depends on (ARCH_RENESAS && (ARM || ARM64)) || COMPILE_TEST but that can be a bit hard to read. Fixes: 8292493c22c8 ("riscv: Kconfig.socs: Add ARCH_RENESAS kconfig option") Signed-off-by: Randy Dunlap Suggested-by: Geert Uytterhoeven Cc: Joerg Roedel Cc: Will Deacon Cc: Robin Murphy Cc: iommu@lists.linux.dev Cc: Conor Dooley Cc: Lad Prabhakar Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20230330165817.21920-1-rdunlap@infradead.org Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 6de900776e24..4d800601e8ec 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -282,6 +282,7 @@ config EXYNOS_IOMMU_DEBUG config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" depends on ARCH_RENESAS || COMPILE_TEST + depends on ARM || ARM64 || COMPILE_TEST depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE select IOMMU_API select IOMMU_IO_PGTABLE_LPAE From ec014683c564fb74fc68e8f5e84691d3b3839d24 Mon Sep 17 00:00:00 2001 From: Chao Wang Date: Mon, 17 Apr 2023 03:04:21 +0000 Subject: [PATCH 079/934] iommu/rockchip: Fix unwind goto issue Smatch complains that drivers/iommu/rockchip-iommu.c:1306 rk_iommu_probe() warn: missing unwind goto? The rk_iommu_probe function, after obtaining the irq value through platform_get_irq, directly returns an error if the returned value is negative, without releasing any resources. Fix this by adding a new error handling label "err_pm_disable" and use a goto statement to redirect to the error handling process. In order to preserve the original semantics, set err to the value of irq. Fixes: 1aa55ca9b14a ("iommu/rockchip: Move irq request past pm_runtime_enable") Signed-off-by: Chao Wang Reviewed-by: Dongliang Mu Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230417030421.2777-1-D202280639@hust.edu.cn Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index ea5a3088bb7e..4054030c3237 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1335,20 +1335,22 @@ static int rk_iommu_probe(struct platform_device *pdev) for (i = 0; i < iommu->num_irq; i++) { int irq = platform_get_irq(pdev, i); - if (irq < 0) - return irq; + if (irq < 0) { + err = irq; + goto err_pm_disable; + } err = devm_request_irq(iommu->dev, irq, rk_iommu_irq, IRQF_SHARED, dev_name(dev), iommu); - if (err) { - pm_runtime_disable(dev); - goto err_remove_sysfs; - } + if (err) + goto err_pm_disable; } dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask); return 0; +err_pm_disable: + pm_runtime_disable(dev); err_remove_sysfs: iommu_device_sysfs_remove(&iommu->iommu); err_put_group: From ed8a2f4ddef2eaaf864ab1efbbca9788187036ab Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 19 Apr 2023 21:11:53 +0100 Subject: [PATCH 080/934] iommu/amd: Don't block updates to GATag if guest mode is on On KVM GSI routing table updates, specially those where they have vIOMMUs with interrupt remapping enabled (to boot >255vcpus setups without relying on KVM_FEATURE_MSI_EXT_DEST_ID), a VMM may update the backing VF MSIs with a new VCPU affinity. On AMD with AVIC enabled, the new vcpu affinity info is updated via: avic_pi_update_irte() irq_set_vcpu_affinity() amd_ir_set_vcpu_affinity() amd_iommu_{de}activate_guest_mode() Where the IRTE[GATag] is updated with the new vcpu affinity. The GATag contains VM ID and VCPU ID, and is used by IOMMU hardware to signal KVM (via GALog) when interrupt cannot be delivered due to vCPU is in blocking state. The issue is that amd_iommu_activate_guest_mode() will essentially only change IRTE fields on transitions from non-guest-mode to guest-mode and otherwise returns *with no changes to IRTE* on already configured guest-mode interrupts. To the guest this means that the VF interrupts remain affined to the first vCPU they were first configured, and guest will be unable to issue VF interrupts and receive messages like this from spurious interrupts (e.g. from waking the wrong vCPU in GALog): [ 167.759472] __common_interrupt: 3.34 No irq handler for vector [ 230.680927] mlx5_core 0000:00:02.0: mlx5_cmd_eq_recover:247:(pid 3122): Recovered 1 EQEs on cmd_eq [ 230.681799] mlx5_core 0000:00:02.0: wait_func_handle_exec_timeout:1113:(pid 3122): cmd[0]: CREATE_CQ(0x400) recovered after timeout [ 230.683266] __common_interrupt: 3.34 No irq handler for vector Given the fact that amd_ir_set_vcpu_affinity() uses amd_iommu_activate_guest_mode() underneath it essentially means that VCPU affinity changes of IRTEs are nops. Fix it by dropping the check for guest-mode at amd_iommu_activate_guest_mode(). Same thing is applicable to amd_iommu_deactivate_guest_mode() although, even if the IRTE doesn't change underlying DestID on the host, the VFIO IRQ handler will still be able to poke at the right guest-vCPU. Fixes: b9c6ff94e43a ("iommu/amd: Re-factor guest virtual APIC (de-)activation code") Signed-off-by: Joao Martins Reviewed-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20230419201154.83880-2-joao.m.martins@oracle.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4a314647d1f7..72845541ef2e 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3493,8 +3493,7 @@ int amd_iommu_activate_guest_mode(void *data) struct irte_ga *entry = (struct irte_ga *) ir_data->entry; u64 valid; - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || - !entry || entry->lo.fields_vapic.guest_mode) + if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry) return 0; valid = entry->lo.fields_vapic.valid; From af47b0a24058e56e983881993752f88288ca6511 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 19 Apr 2023 21:11:54 +0100 Subject: [PATCH 081/934] iommu/amd: Handle GALog overflows GALog exists to propagate interrupts into all vCPUs in the system when interrupts are marked as non running (e.g. when vCPUs aren't running). A GALog overflow happens when there's in no space in the log to record the GATag of the interrupt. So when the GALOverflow condition happens, the GALog queue is processed and the GALog is restarted, as the IOMMU manual indicates in section "2.7.4 Guest Virtual APIC Log Restart Procedure": | * Wait until MMIO Offset 2020h[GALogRun]=0b so that all request | entries are completed as circumstances allow. GALogRun must be 0b to | modify the guest virtual APIC log registers safely. | * Write MMIO Offset 0018h[GALogEn]=0b. | * As necessary, change the following values (e.g., to relocate or | resize the guest virtual APIC event log): | - the Guest Virtual APIC Log Base Address Register | [MMIO Offset 00E0h], | - the Guest Virtual APIC Log Head Pointer Register | [MMIO Offset 2040h][GALogHead], and | - the Guest Virtual APIC Log Tail Pointer Register | [MMIO Offset 2048h][GALogTail]. | * Write MMIO Offset 2020h[GALOverflow] = 1b to clear the bit (W1C). | * Write MMIO Offset 0018h[GALogEn] = 1b, and either set | MMIO Offset 0018h[GAIntEn] to enable the GA log interrupt or clear | the bit to disable it. Failing to handle the GALog overflow means that none of the VFs (in any guest) will work with IOMMU AVIC forcing the user to power cycle the host. When handling the event it resumes the GALog without resizing much like how it is done in the event handler overflow. The [MMIO Offset 2020h][GALOverflow] bit might be set in status register without the [MMIO Offset 2020h][GAInt] bit, so when deciding to poll for GA events (to clear space in the galog), also check the overflow bit. [suravee: Check for GAOverflow without GAInt, toggle CONTROL_GAINT_EN] Co-developed-by: Suravee Suthikulpanit Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joao Martins Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20230419201154.83880-3-joao.m.martins@oracle.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 1 + drivers/iommu/amd/init.c | 24 ++++++++++++++++++++++++ drivers/iommu/amd/iommu.c | 9 ++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index e98f20a9bdd8..e7ee2577f98d 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -15,6 +15,7 @@ extern irqreturn_t amd_iommu_int_thread(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu); +extern void amd_iommu_restart_ga_log(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); extern void amd_iommu_init_notifier(void); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 329a406cc37d..c2d80a4e5fb0 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -758,6 +758,30 @@ void amd_iommu_restart_event_logging(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); } +/* + * This function restarts event logging in case the IOMMU experienced + * an GA log overflow. + */ +void amd_iommu_restart_ga_log(struct amd_iommu *iommu) +{ + u32 status; + + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (status & MMIO_STATUS_GALOG_RUN_MASK) + return; + + pr_info_ratelimited("IOMMU GA Log restarting\n"); + + iommu_feature_disable(iommu, CONTROL_GALOG_EN); + iommu_feature_disable(iommu, CONTROL_GAINT_EN); + + writel(MMIO_STATUS_GALOG_OVERFLOW_MASK, + iommu->mmio_base + MMIO_STATUS_OFFSET); + + iommu_feature_enable(iommu, CONTROL_GAINT_EN); + iommu_feature_enable(iommu, CONTROL_GALOG_EN); +} + /* * This function resets the command buffer if the IOMMU stopped fetching * commands from it. diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 72845541ef2e..8c08d8273326 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -845,6 +845,7 @@ amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { } (MMIO_STATUS_EVT_OVERFLOW_INT_MASK | \ MMIO_STATUS_EVT_INT_MASK | \ MMIO_STATUS_PPR_INT_MASK | \ + MMIO_STATUS_GALOG_OVERFLOW_MASK | \ MMIO_STATUS_GALOG_INT_MASK) irqreturn_t amd_iommu_int_thread(int irq, void *data) @@ -868,10 +869,16 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data) } #ifdef CONFIG_IRQ_REMAP - if (status & MMIO_STATUS_GALOG_INT_MASK) { + if (status & (MMIO_STATUS_GALOG_INT_MASK | + MMIO_STATUS_GALOG_OVERFLOW_MASK)) { pr_devel("Processing IOMMU GA Log\n"); iommu_poll_ga_log(iommu); } + + if (status & MMIO_STATUS_GALOG_OVERFLOW_MASK) { + pr_info_ratelimited("IOMMU GA Log overflow\n"); + amd_iommu_restart_ga_log(iommu); + } #endif if (status & MMIO_STATUS_EVT_OVERFLOW_INT_MASK) { From 8ec4e2befef10c7679cd59251956a428e783c0b5 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Thu, 20 Apr 2023 12:20:13 -0700 Subject: [PATCH 082/934] iommu/amd: Fix up merge conflict resolution Merge commit e17c6debd4b2 ("Merge branches 'arm/mediatek', 'arm/msm', 'arm/renesas', 'arm/rockchip', 'arm/smmu', 'x86/vt-d' and 'x86/amd' into next") added amd_iommu_init_devices, amd_iommu_uninit_devices, and amd_iommu_init_notifier back to drivers/iommu/amd/amd_iommu.h. The only references to them are here, so clean them up. Fixes: e17c6debd4b2 ("Merge branches 'arm/mediatek', 'arm/msm', 'arm/renesas', 'arm/rockchip', 'arm/smmu', 'x86/vt-d' and 'x86/amd' into next") Cc: Joerg Roedel Cc: Suravee Suthikulpanit Cc: Will Deacon Cc: Robin Murphy Signed-off-by: Jerry Snitselaar Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20230420192013.733331-1-jsnitsel@redhat.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index e7ee2577f98d..9beeceb9d825 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -16,9 +16,6 @@ extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu); extern void amd_iommu_restart_ga_log(struct amd_iommu *iommu); -extern int amd_iommu_init_devices(void); -extern void amd_iommu_uninit_devices(void); -extern void amd_iommu_init_notifier(void); extern void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); #ifdef CONFIG_AMD_IOMMU_DEBUGFS From 29f54745f24547a84b18582e054df9bea1a7bf3e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Apr 2023 16:04:15 -0300 Subject: [PATCH 083/934] iommu/amd: Add missing domain type checks Drivers are supposed to list the domain types they support in their domain_alloc() ops so when we add new domain types, like BLOCKING or SVA, they don't start breaking. This ended up providing an empty UNMANAGED domain when the core code asked for a BLOCKING domain, which happens to be the fallback for drivers that don't support it, but this is completely wrong for SVA. Check for the DMA types AMD supports and reject every other kind. Fixes: 136467962e49 ("iommu: Add IOMMU SVA domain support") Signed-off-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/0-v1-2ac37b893728+da-amd_check_types_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 8c08d8273326..3797e35df035 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2074,7 +2074,7 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) { struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; - int pgtable = amd_iommu_pgtable; + int pgtable; int mode = DEFAULT_PGTABLE_LEVEL; int ret; @@ -2091,6 +2091,10 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) mode = PAGE_MODE_NONE; } else if (type == IOMMU_DOMAIN_UNMANAGED) { pgtable = AMD_IOMMU_V1; + } else if (type == IOMMU_DOMAIN_DMA || type == IOMMU_DOMAIN_DMA_FQ) { + pgtable = amd_iommu_pgtable; + } else { + return NULL; } switch (pgtable) { From 2212fc2acf3f6ee690ea36506fb882a19d1bfcab Mon Sep 17 00:00:00 2001 From: Jon Pan-Doh Date: Wed, 26 Apr 2023 13:32:56 -0700 Subject: [PATCH 084/934] iommu/amd: Fix domain flush size when syncing iotlb When running on an AMD vIOMMU, we observed multiple invalidations (of decreasing power of 2 aligned sizes) when unmapping a single page. Domain flush takes gather bounds (end-start) as size param. However, gather->end is defined as the last inclusive address (start + size - 1). This leads to an off by 1 error. With this patch, verified that 1 invalidation occurs when unmapping a single page. Fixes: a270be1b3fdf ("iommu/amd: Use only natural aligned flushes in a VM") Cc: stable@vger.kernel.org # >= 5.15 Signed-off-by: Jon Pan-Doh Tested-by: Sudheer Dantuluri Suggested-by: Gary Zibrat Reviewed-by: Vasant Hegde Acked-by: Nadav Amit Link: https://lore.kernel.org/r/20230426203256.237116-1-pandoh@google.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3797e35df035..0f3ac4b489d6 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2398,7 +2398,7 @@ static void amd_iommu_iotlb_sync(struct iommu_domain *domain, unsigned long flags; spin_lock_irqsave(&dom->lock, flags); - domain_flush_pages(dom, gather->start, gather->end - gather->start, 1); + domain_flush_pages(dom, gather->start, gather->end - gather->start + 1, 1); amd_iommu_domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } From ab1de7ead871ebe6d12a774c3c25de0388cde082 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Wed, 17 May 2023 07:45:45 +0000 Subject: [PATCH 085/934] cgroup: fix missing cpus_read_{lock,unlock}() in cgroup_transfer_tasks() The commit 4f7e7236435c ("cgroup: Fix threadgroup_rwsem <-> cpus_read_lock() deadlock") fixed the deadlock between cgroup_threadgroup_rwsem and cpus_read_lock() by introducing cgroup_attach_{lock,unlock}() and removing cpus_read_{lock,unlock}() from cpuset_attach(). But cgroup_transfer_tasks() was missed and not handled, which will cause th following warning: WARNING: CPU: 0 PID: 589 at kernel/cpu.c:526 lockdep_assert_cpus_held+0x32/0x40 CPU: 0 PID: 589 Comm: kworker/1:4 Not tainted 6.4.0-rc2-next-20230517 #50 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 Workqueue: events cpuset_hotplug_workfn RIP: 0010:lockdep_assert_cpus_held+0x32/0x40 <...> Call Trace: cpuset_attach+0x40/0x240 cgroup_migrate_execute+0x452/0x5e0 ? _raw_spin_unlock_irq+0x28/0x40 cgroup_transfer_tasks+0x1f3/0x360 ? find_held_lock+0x32/0x90 ? cpuset_hotplug_workfn+0xc81/0xed0 cpuset_hotplug_workfn+0xcb1/0xed0 ? process_one_work+0x248/0x5b0 process_one_work+0x2b9/0x5b0 worker_thread+0x56/0x3b0 ? process_one_work+0x5b0/0x5b0 kthread+0xf1/0x120 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 So just use the cgroup_attach_{lock,unlock}() helper to fix it. Reported-by: Zhao Gongyi Signed-off-by: Qi Zheng Acked-by: Muchun Song Fixes: 05c7b7a92cc8 ("cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug") Cc: stable@vger.kernel.org # v5.17+ Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index aeef06c465ef..5407241dbb45 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -108,7 +108,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) cgroup_lock(); - percpu_down_write(&cgroup_threadgroup_rwsem); + cgroup_attach_lock(true); /* all tasks in @from are being moved, all csets are source */ spin_lock_irq(&css_set_lock); @@ -144,7 +144,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) } while (task && !ret); out_err: cgroup_migrate_finish(&mgctx); - percpu_up_write(&cgroup_threadgroup_rwsem); + cgroup_attach_unlock(true); cgroup_unlock(); return ret; } From 13247018d68f21e7132924b9853f7e2c423588b6 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 8 May 2023 18:22:17 +0200 Subject: [PATCH 086/934] scsi: target: iscsi: Fix hang in the iSCSI login code If the initiator suddenly stops sending data during a login while keeping the TCP connection open, the login_work won't be scheduled and will never release the login semaphore; concurrent login operations will therefore get stuck and fail. The bug is due to the inability of the login timeout code to properly handle this particular case. Fix the problem by replacing the old per-NP login timer with a new per-connection timer. The timer is started when an initiator connects to the target; if it expires, it sends a SIGINT signal to the thread pointed at by the conn->login_kworker pointer. conn->login_kworker is set by calling the iscsit_set_login_timer_kworker() helper, initially it will point to the np thread; When the login operation's control is in the process of being passed from the NP-thread to login_work, the conn->login_worker pointer is set to NULL. Finally, login_kworker will be changed to point to the worker thread executing the login_work job. If conn->login_kworker is NULL when the timer expires, it means that the login operation hasn't been completed yet but login_work isn't running, in this case the timer will mark the login process as failed and will schedule login_work so the latter will be forced to free the resources it holds. Signed-off-by: Maurizio Lombardi Link: https://lore.kernel.org/r/20230508162219.1731964-2-mlombard@redhat.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target.c | 2 - drivers/target/iscsi/iscsi_target_login.c | 63 ++------------------ drivers/target/iscsi/iscsi_target_nego.c | 70 +++++++++++++---------- drivers/target/iscsi/iscsi_target_util.c | 51 +++++++++++++++++ drivers/target/iscsi/iscsi_target_util.h | 4 ++ include/target/iscsi/iscsi_target_core.h | 6 +- 6 files changed, 103 insertions(+), 93 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 834cce50f9b0..b516c2893420 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -364,8 +364,6 @@ struct iscsi_np *iscsit_add_np( init_completion(&np->np_restart_comp); INIT_LIST_HEAD(&np->np_list); - timer_setup(&np->np_login_timer, iscsi_handle_login_thread_timeout, 0); - ret = iscsi_target_setup_login_socket(np, sockaddr); if (ret != 0) { kfree(np); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 274bdd7845ca..90b870f234f0 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -811,59 +811,6 @@ void iscsi_post_login_handler( iscsit_dec_conn_usage_count(conn); } -void iscsi_handle_login_thread_timeout(struct timer_list *t) -{ - struct iscsi_np *np = from_timer(np, t, np_login_timer); - - spin_lock_bh(&np->np_thread_lock); - pr_err("iSCSI Login timeout on Network Portal %pISpc\n", - &np->np_sockaddr); - - if (np->np_login_timer_flags & ISCSI_TF_STOP) { - spin_unlock_bh(&np->np_thread_lock); - return; - } - - if (np->np_thread) - send_sig(SIGINT, np->np_thread, 1); - - np->np_login_timer_flags &= ~ISCSI_TF_RUNNING; - spin_unlock_bh(&np->np_thread_lock); -} - -static void iscsi_start_login_thread_timer(struct iscsi_np *np) -{ - /* - * This used the TA_LOGIN_TIMEOUT constant because at this - * point we do not have access to ISCSI_TPG_ATTRIB(tpg)->login_timeout - */ - spin_lock_bh(&np->np_thread_lock); - np->np_login_timer_flags &= ~ISCSI_TF_STOP; - np->np_login_timer_flags |= ISCSI_TF_RUNNING; - mod_timer(&np->np_login_timer, jiffies + TA_LOGIN_TIMEOUT * HZ); - - pr_debug("Added timeout timer to iSCSI login request for" - " %u seconds.\n", TA_LOGIN_TIMEOUT); - spin_unlock_bh(&np->np_thread_lock); -} - -static void iscsi_stop_login_thread_timer(struct iscsi_np *np) -{ - spin_lock_bh(&np->np_thread_lock); - if (!(np->np_login_timer_flags & ISCSI_TF_RUNNING)) { - spin_unlock_bh(&np->np_thread_lock); - return; - } - np->np_login_timer_flags |= ISCSI_TF_STOP; - spin_unlock_bh(&np->np_thread_lock); - - del_timer_sync(&np->np_login_timer); - - spin_lock_bh(&np->np_thread_lock); - np->np_login_timer_flags &= ~ISCSI_TF_RUNNING; - spin_unlock_bh(&np->np_thread_lock); -} - int iscsit_setup_np( struct iscsi_np *np, struct sockaddr_storage *sockaddr) @@ -1123,10 +1070,13 @@ static struct iscsit_conn *iscsit_alloc_conn(struct iscsi_np *np) spin_lock_init(&conn->nopin_timer_lock); spin_lock_init(&conn->response_queue_lock); spin_lock_init(&conn->state_lock); + spin_lock_init(&conn->login_worker_lock); + spin_lock_init(&conn->login_timer_lock); timer_setup(&conn->nopin_response_timer, iscsit_handle_nopin_response_timeout, 0); timer_setup(&conn->nopin_timer, iscsit_handle_nopin_timeout, 0); + timer_setup(&conn->login_timer, iscsit_login_timeout, 0); if (iscsit_conn_set_transport(conn, np->np_transport) < 0) goto free_conn; @@ -1304,7 +1254,7 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) goto new_sess_out; } - iscsi_start_login_thread_timer(np); + iscsit_start_login_timer(conn, current); pr_debug("Moving to TARG_CONN_STATE_XPT_UP.\n"); conn->conn_state = TARG_CONN_STATE_XPT_UP; @@ -1417,8 +1367,6 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) if (ret < 0) goto new_sess_out; - iscsi_stop_login_thread_timer(np); - if (ret == 1) { tpg_np = conn->tpg_np; @@ -1434,7 +1382,7 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) new_sess_out: new_sess = true; old_sess_out: - iscsi_stop_login_thread_timer(np); + iscsit_stop_login_timer(conn); tpg_np = conn->tpg_np; iscsi_target_login_sess_out(conn, zero_tsih, new_sess); new_sess = false; @@ -1448,7 +1396,6 @@ old_sess_out: return 1; exit: - iscsi_stop_login_thread_timer(np); spin_lock_bh(&np->np_thread_lock); np->np_thread_state = ISCSI_NP_THREAD_EXIT; spin_unlock_bh(&np->np_thread_lock); diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 24040c118e49..e3a5644a70b3 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -535,25 +535,6 @@ static void iscsi_target_login_drop(struct iscsit_conn *conn, struct iscsi_login iscsi_target_login_sess_out(conn, zero_tsih, true); } -struct conn_timeout { - struct timer_list timer; - struct iscsit_conn *conn; -}; - -static void iscsi_target_login_timeout(struct timer_list *t) -{ - struct conn_timeout *timeout = from_timer(timeout, t, timer); - struct iscsit_conn *conn = timeout->conn; - - pr_debug("Entering iscsi_target_login_timeout >>>>>>>>>>>>>>>>>>>\n"); - - if (conn->login_kworker) { - pr_debug("Sending SIGINT to conn->login_kworker %s/%d\n", - conn->login_kworker->comm, conn->login_kworker->pid); - send_sig(SIGINT, conn->login_kworker, 1); - } -} - static void iscsi_target_do_login_rx(struct work_struct *work) { struct iscsit_conn *conn = container_of(work, @@ -562,12 +543,15 @@ static void iscsi_target_do_login_rx(struct work_struct *work) struct iscsi_np *np = login->np; struct iscsi_portal_group *tpg = conn->tpg; struct iscsi_tpg_np *tpg_np = conn->tpg_np; - struct conn_timeout timeout; int rc, zero_tsih = login->zero_tsih; bool state; pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n", conn, current->comm, current->pid); + + spin_lock(&conn->login_worker_lock); + set_bit(LOGIN_FLAGS_WORKER_RUNNING, &conn->login_flags); + spin_unlock(&conn->login_worker_lock); /* * If iscsi_target_do_login_rx() has been invoked by ->sk_data_ready() * before initial PDU processing in iscsi_target_start_negotiation() @@ -597,19 +581,16 @@ static void iscsi_target_do_login_rx(struct work_struct *work) goto err; } - conn->login_kworker = current; allow_signal(SIGINT); - - timeout.conn = conn; - timer_setup_on_stack(&timeout.timer, iscsi_target_login_timeout, 0); - mod_timer(&timeout.timer, jiffies + TA_LOGIN_TIMEOUT * HZ); - pr_debug("Starting login timer for %s/%d\n", current->comm, current->pid); + rc = iscsit_set_login_timer_kworker(conn, current); + if (rc < 0) { + /* The login timer has already expired */ + pr_debug("iscsi_target_do_login_rx, login failed\n"); + goto err; + } rc = conn->conn_transport->iscsit_get_login_rx(conn, login); - del_timer_sync(&timeout.timer); - destroy_timer_on_stack(&timeout.timer); flush_signals(current); - conn->login_kworker = NULL; if (rc < 0) goto err; @@ -646,7 +627,17 @@ static void iscsi_target_do_login_rx(struct work_struct *work) if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_WRITE_ACTIVE)) goto err; + + /* + * Set the login timer thread pointer to NULL to prevent the + * login process from getting stuck if the initiator + * stops sending data. + */ + rc = iscsit_set_login_timer_kworker(conn, NULL); + if (rc < 0) + goto err; } else if (rc == 1) { + iscsit_stop_login_timer(conn); cancel_delayed_work(&conn->login_work); iscsi_target_nego_release(conn); iscsi_post_login_handler(np, conn, zero_tsih); @@ -656,6 +647,7 @@ static void iscsi_target_do_login_rx(struct work_struct *work) err: iscsi_target_restore_sock_callbacks(conn); + iscsit_stop_login_timer(conn); cancel_delayed_work(&conn->login_work); iscsi_target_login_drop(conn, login); iscsit_deaccess_np(np, tpg, tpg_np); @@ -1368,14 +1360,30 @@ int iscsi_target_start_negotiation( * and perform connection cleanup now. */ ret = iscsi_target_do_login(conn, login); - if (!ret && iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU)) - ret = -1; + if (!ret) { + spin_lock(&conn->login_worker_lock); + + if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU)) + ret = -1; + else if (!test_bit(LOGIN_FLAGS_WORKER_RUNNING, &conn->login_flags)) { + if (iscsit_set_login_timer_kworker(conn, NULL) < 0) { + /* + * The timeout has expired already. + * Schedule login_work to perform the cleanup. + */ + schedule_delayed_work(&conn->login_work, 0); + } + } + + spin_unlock(&conn->login_worker_lock); + } if (ret < 0) { iscsi_target_restore_sock_callbacks(conn); iscsi_remove_failed_auth_entry(conn); } if (ret != 0) { + iscsit_stop_login_timer(conn); cancel_delayed_work_sync(&conn->login_work); iscsi_target_nego_release(conn); } diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 26dc8ed3045b..b14835fcb033 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1040,6 +1040,57 @@ void iscsit_stop_nopin_timer(struct iscsit_conn *conn) spin_unlock_bh(&conn->nopin_timer_lock); } +void iscsit_login_timeout(struct timer_list *t) +{ + struct iscsit_conn *conn = from_timer(conn, t, login_timer); + struct iscsi_login *login = conn->login; + + pr_debug("Entering iscsi_target_login_timeout >>>>>>>>>>>>>>>>>>>\n"); + + spin_lock_bh(&conn->login_timer_lock); + login->login_failed = 1; + + if (conn->login_kworker) { + pr_debug("Sending SIGINT to conn->login_kworker %s/%d\n", + conn->login_kworker->comm, conn->login_kworker->pid); + send_sig(SIGINT, conn->login_kworker, 1); + } else { + schedule_delayed_work(&conn->login_work, 0); + } + spin_unlock_bh(&conn->login_timer_lock); +} + +void iscsit_start_login_timer(struct iscsit_conn *conn, struct task_struct *kthr) +{ + pr_debug("Login timer started\n"); + + conn->login_kworker = kthr; + mod_timer(&conn->login_timer, jiffies + TA_LOGIN_TIMEOUT * HZ); +} + +int iscsit_set_login_timer_kworker(struct iscsit_conn *conn, struct task_struct *kthr) +{ + struct iscsi_login *login = conn->login; + int ret = 0; + + spin_lock_bh(&conn->login_timer_lock); + if (login->login_failed) { + /* The timer has already expired */ + ret = -1; + } else { + conn->login_kworker = kthr; + } + spin_unlock_bh(&conn->login_timer_lock); + + return ret; +} + +void iscsit_stop_login_timer(struct iscsit_conn *conn) +{ + pr_debug("Login timer stopped\n"); + timer_delete_sync(&conn->login_timer); +} + int iscsit_send_tx_data( struct iscsit_cmd *cmd, struct iscsit_conn *conn, diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h index 33ea799a0850..24b8e577575a 100644 --- a/drivers/target/iscsi/iscsi_target_util.h +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -56,6 +56,10 @@ extern void iscsit_handle_nopin_timeout(struct timer_list *t); extern void __iscsit_start_nopin_timer(struct iscsit_conn *); extern void iscsit_start_nopin_timer(struct iscsit_conn *); extern void iscsit_stop_nopin_timer(struct iscsit_conn *); +extern void iscsit_login_timeout(struct timer_list *t); +extern void iscsit_start_login_timer(struct iscsit_conn *, struct task_struct *kthr); +extern void iscsit_stop_login_timer(struct iscsit_conn *); +extern int iscsit_set_login_timer_kworker(struct iscsit_conn *, struct task_struct *kthr); extern int iscsit_send_tx_data(struct iscsit_cmd *, struct iscsit_conn *, int); extern int iscsit_fe_sendpage_sg(struct iscsit_cmd *, struct iscsit_conn *); extern int iscsit_tx_login_rsp(struct iscsit_conn *, u8, u8); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 229118156a1f..42f4a4c0c100 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -562,12 +562,14 @@ struct iscsit_conn { #define LOGIN_FLAGS_READ_ACTIVE 2 #define LOGIN_FLAGS_WRITE_ACTIVE 3 #define LOGIN_FLAGS_CLOSED 4 +#define LOGIN_FLAGS_WORKER_RUNNING 5 unsigned long login_flags; struct delayed_work login_work; struct iscsi_login *login; struct timer_list nopin_timer; struct timer_list nopin_response_timer; struct timer_list transport_timer; + struct timer_list login_timer; struct task_struct *login_kworker; /* Spinlock used for add/deleting cmd's from conn_cmd_list */ spinlock_t cmd_lock; @@ -576,6 +578,8 @@ struct iscsit_conn { spinlock_t nopin_timer_lock; spinlock_t response_queue_lock; spinlock_t state_lock; + spinlock_t login_timer_lock; + spinlock_t login_worker_lock; /* libcrypto RX and TX contexts for crc32c */ struct ahash_request *conn_rx_hash; struct ahash_request *conn_tx_hash; @@ -792,7 +796,6 @@ struct iscsi_np { enum np_thread_state_table np_thread_state; bool enabled; atomic_t np_reset_count; - enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; spinlock_t np_thread_lock; @@ -800,7 +803,6 @@ struct iscsi_np { struct socket *np_socket; struct sockaddr_storage np_sockaddr; struct task_struct *np_thread; - struct timer_list np_login_timer; void *np_context; struct iscsit_transport *np_transport; struct list_head np_list; From 98a8c2bf938a5973716f280da618077a3d255976 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 8 May 2023 18:22:18 +0200 Subject: [PATCH 087/934] scsi: target: iscsi: Remove unused transport_timer Signed-off-by: Maurizio Lombardi Link: https://lore.kernel.org/r/20230508162219.1731964-3-mlombard@redhat.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- include/target/iscsi/iscsi_target_core.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 42f4a4c0c100..4c15420e8965 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -568,7 +568,6 @@ struct iscsit_conn { struct iscsi_login *login; struct timer_list nopin_timer; struct timer_list nopin_response_timer; - struct timer_list transport_timer; struct timer_list login_timer; struct task_struct *login_kworker; /* Spinlock used for add/deleting cmd's from conn_cmd_list */ From 2a737d3b8c792400118d6cf94958f559de9c5e59 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 8 May 2023 18:22:19 +0200 Subject: [PATCH 088/934] scsi: target: iscsi: Prevent login threads from racing between each other The tpg->np_login_sem is a semaphore that is used to serialize the login process when multiple login threads run concurrently against the same target portal group. The iscsi_target_locate_portal() function finds the tpg, calls iscsit_access_np() against the np_login_sem semaphore and saves the tpg pointer in conn->tpg; If iscsi_target_locate_portal() fails, the caller will check for the conn->tpg pointer and, if it's not NULL, then it will assume that iscsi_target_locate_portal() called iscsit_access_np() on the semaphore. Make sure that conn->tpg gets initialized only if iscsit_access_np() was successful, otherwise iscsit_deaccess_np() may end up being called against a semaphore we never took, allowing more than one thread to access the same tpg. Signed-off-by: Maurizio Lombardi Link: https://lore.kernel.org/r/20230508162219.1731964-4-mlombard@redhat.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_nego.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index e3a5644a70b3..fa3fb5f4e6bc 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -1122,6 +1122,7 @@ int iscsi_target_locate_portal( iscsi_target_set_sock_callbacks(conn); login->np = np; + conn->tpg = NULL; login_req = (struct iscsi_login_req *) login->req; payload_length = ntoh24(login_req->dlength); @@ -1189,7 +1190,6 @@ int iscsi_target_locate_portal( */ sessiontype = strncmp(s_buf, DISCOVERY, 9); if (!sessiontype) { - conn->tpg = iscsit_global->discovery_tpg; if (!login->leading_connection) goto get_target; @@ -1206,9 +1206,11 @@ int iscsi_target_locate_portal( * Serialize access across the discovery struct iscsi_portal_group to * process login attempt. */ + conn->tpg = iscsit_global->discovery_tpg; if (iscsit_access_np(np, conn->tpg) < 0) { iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + conn->tpg = NULL; ret = -1; goto out; } From d54820b22e404b06b2b65877ff802cc7b31688bc Mon Sep 17 00:00:00 2001 From: Gleb Chesnokov Date: Wed, 17 May 2023 11:22:35 +0300 Subject: [PATCH 089/934] scsi: qla2xxx: Fix NULL pointer dereference in target mode When target mode is enabled, the pci_irq_get_affinity() function may return a NULL value in qla_mapq_init_qp_cpu_map() due to the qla24xx_enable_msix() code that handles IRQ settings for target mode. This leads to a crash due to a NULL pointer dereference. This patch fixes the issue by adding a check for the NULL value returned by pci_irq_get_affinity() and introducing a 'cpu_mapped' boolean flag to the qla_qpair structure, ensuring that the qpair's CPU affinity is updated when it has not been mapped to a CPU. Fixes: 1d201c81d4cc ("scsi: qla2xxx: Select qpair depending on which CPU post_cmd() gets called") Signed-off-by: Gleb Chesnokov Link: https://lore.kernel.org/r/56b416f2-4e0f-b6cf-d6d5-b7c372e3c6a2@scst.dev Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_def.h | 1 + drivers/scsi/qla2xxx/qla_init.c | 3 +++ drivers/scsi/qla2xxx/qla_inline.h | 3 +++ drivers/scsi/qla2xxx/qla_isr.c | 3 +++ 4 files changed, 10 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index df5e5b7fdcfe..84aa3571be6d 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3796,6 +3796,7 @@ struct qla_qpair { uint64_t retry_term_jiff; struct qla_tgt_counters tgt_counters; uint16_t cpuid; + bool cpu_mapped; struct qla_fw_resources fwres ____cacheline_aligned; struct qla_buf_pool buf_pool; u32 cmd_cnt; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index ec0423ec6681..1a955c3ff3d6 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -9426,6 +9426,9 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos, qpair->rsp->req = qpair->req; qpair->rsp->qpair = qpair; + if (!qpair->cpu_mapped) + qla_cpu_update(qpair, raw_smp_processor_id()); + if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif) { if (ha->fw_attributes & BIT_4) qpair->difdix_supported = 1; diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index cce6e425c121..7b42558a8839 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -539,11 +539,14 @@ qla_mapq_init_qp_cpu_map(struct qla_hw_data *ha, if (!ha->qp_cpu_map) return; mask = pci_irq_get_affinity(ha->pdev, msix->vector_base0); + if (!mask) + return; qpair->cpuid = cpumask_first(mask); for_each_cpu(cpu, mask) { ha->qp_cpu_map[cpu] = qpair; } msix->cpuid = qpair->cpuid; + qpair->cpu_mapped = true; } static inline void diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 71feda2cdb63..245e3a5d81fd 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3770,6 +3770,9 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha, if (rsp->qpair->cpuid != smp_processor_id() || !rsp->qpair->rcv_intr) { rsp->qpair->rcv_intr = 1; + + if (!rsp->qpair->cpu_mapped) + qla_cpu_update(rsp->qpair, raw_smp_processor_id()); } #define __update_rsp_in(_is_shadow_hba, _rsp, _rsp_in) \ From 2bd110339288c18823dcace602b63b0d8627e520 Mon Sep 17 00:00:00 2001 From: John Sperbeck Date: Sun, 21 May 2023 19:29:53 +0000 Subject: [PATCH 090/934] cgroup: always put cset in cgroup_css_set_put_fork A successful call to cgroup_css_set_fork() will always have taken a ref on kargs->cset (regardless of CLONE_INTO_CGROUP), so always do a corresponding put in cgroup_css_set_put_fork(). Without this, a cset and its contained css structures will be leaked for some fork failures. The following script reproduces the leak for a fork failure due to exceeding pids.max in the pids controller. A similar thing can happen if we jump to the bad_fork_cancel_cgroup label in copy_process(). [ -z "$1" ] && echo "Usage $0 pids-root" && exit 1 PID_ROOT=$1 CGROUP=$PID_ROOT/foo [ -e $CGROUP ] && rmdir -f $CGROUP mkdir $CGROUP echo 5 > $CGROUP/pids.max echo $$ > $CGROUP/cgroup.procs fork_bomb() { set -e for i in $(seq 10); do /bin/sleep 3600 & done } (fork_bomb) & wait echo $$ > $PID_ROOT/cgroup.procs kill $(cat $CGROUP/cgroup.procs) rmdir $CGROUP Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups") Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: John Sperbeck Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 625d7483951c..245cf62ce85a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6486,19 +6486,18 @@ err: static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs) __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex) { + struct cgroup *cgrp = kargs->cgrp; + struct css_set *cset = kargs->cset; + cgroup_threadgroup_change_end(current); + if (cset) { + put_css_set(cset); + kargs->cset = NULL; + } + if (kargs->flags & CLONE_INTO_CGROUP) { - struct cgroup *cgrp = kargs->cgrp; - struct css_set *cset = kargs->cset; - cgroup_unlock(); - - if (cset) { - put_css_set(cset); - kargs->cset = NULL; - } - if (cgrp) { cgroup_put(cgrp); kargs->cgrp = NULL; From 11c439a19466e7feaccdbce148a75372fddaf4e9 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 18 May 2023 05:43:51 +0000 Subject: [PATCH 091/934] iommu/amd/pgtbl_v2: Fix domain max address IOMMU v2 page table supports 4 level (47 bit) or 5 level (56 bit) virtual address space. Current code assumes it can support 64bit IOVA address space. If IOVA allocator allocates virtual address > 47/56 bit (depending on page table level) then it will do wrong mapping and cause invalid translation. Hence adjust aperture size to use max address supported by the page table. Reported-by: Jerry Snitselaar Fixes: aaac38f61487 ("iommu/amd: Initial support for AMD IOMMU v2 page table") Cc: # v6.0+ Cc: Suravee Suthikulpanit Signed-off-by: Vasant Hegde Reviewed-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20230518054351.9626-1-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 0f3ac4b489d6..dc1ec6849775 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2129,6 +2129,15 @@ out_err: return NULL; } +static inline u64 dma_max_address(void) +{ + if (amd_iommu_pgtable == AMD_IOMMU_V1) + return ~0ULL; + + /* V2 with 4/5 level page table */ + return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); +} + static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *domain; @@ -2145,7 +2154,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) return NULL; domain->domain.geometry.aperture_start = 0; - domain->domain.geometry.aperture_end = ~0ULL; + domain->domain.geometry.aperture_end = dma_max_address(); domain->domain.geometry.force_aperture = true; return &domain->domain; From d9eef346b601afb0bd74b49e0db06f6a5cebd030 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Fri, 14 Apr 2023 11:22:10 -0700 Subject: [PATCH 092/934] HID: wacom: Check for string overflow from strscpy calls The strscpy function is able to return an error code when a copy would overflow the size of the destination. The copy is stopped and the buffer terminated before overflow actually occurs so it is safe to continue execution, but we should still produce a warning should this occur. Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Reviewed-by: Peter Hutterer Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 8214896adada..7192970d199a 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2224,7 +2224,9 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) } else if (strstr(product_name, "Wacom") || strstr(product_name, "wacom") || strstr(product_name, "WACOM")) { - strscpy(name, product_name, sizeof(name)); + if (strscpy(name, product_name, sizeof(name)) < 0) { + hid_warn(wacom->hdev, "String overflow while assembling device name"); + } } else { snprintf(name, sizeof(name), "Wacom %s", product_name); } @@ -2242,7 +2244,9 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) if (name[strlen(name)-1] == ' ') name[strlen(name)-1] = '\0'; } else { - strscpy(name, features->name, sizeof(name)); + if (strscpy(name, features->name, sizeof(name)) < 0) { + hid_warn(wacom->hdev, "String overflow while assembling device name"); + } } snprintf(wacom_wac->name, sizeof(wacom_wac->name), "%s%s", @@ -2500,8 +2504,10 @@ static void wacom_wireless_work(struct work_struct *work) goto fail; } - strscpy(wacom_wac->name, wacom_wac1->name, - sizeof(wacom_wac->name)); + if (strscpy(wacom_wac->name, wacom_wac1->name, + sizeof(wacom_wac->name)) < 0) { + hid_warn(wacom->hdev, "String overflow while assembling device name"); + } } return; From bd249b91977b768ea02bf84d04625d2690ad2b98 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Mon, 17 Apr 2023 09:01:48 -0700 Subject: [PATCH 093/934] HID: wacom: avoid integer overflow in wacom_intuos_inout() If high bit is set to 1 in ((data[3] & 0x0f << 28), after all arithmetic operations and integer promotions are done, high bits in wacom->serial[idx] will be filled with 1s as well. Avoid this, albeit unlikely, issue by specifying left operand's __u64 type for the right operand. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 3bea733ab212 ("USB: wacom tablet driver reorganization") Signed-off-by: Nikita Zhandarovich Reviewed-by: Ping Cheng Cc: stable@vger.kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index dc0f7d9a992c..2ccf83837134 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -831,7 +831,7 @@ static int wacom_intuos_inout(struct wacom_wac *wacom) /* Enter report */ if ((data[1] & 0xfc) == 0xc0) { /* serial number of the tool */ - wacom->serial[idx] = ((data[3] & 0x0f) << 28) + + wacom->serial[idx] = ((__u64)(data[3] & 0x0f) << 28) + (data[4] << 20) + (data[5] << 12) + (data[6] << 4) + (data[7] >> 4); From ed84c4517a5bc536e8572a01dfa11bc22a280d06 Mon Sep 17 00:00:00 2001 From: Sung-Chi Li Date: Mon, 24 Apr 2023 10:37:36 +0800 Subject: [PATCH 094/934] HID: google: add jewel USB id Add 1 additional hammer-like device. Signed-off-by: Sung-Chi Li Signed-off-by: Jiri Kosina --- drivers/hid/hid-google-hammer.c | 2 ++ drivers/hid/hid-ids.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 7ae5f27df54d..c6bdb9c4ef3e 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -586,6 +586,8 @@ static const struct hid_device_id hammer_devices[] = { USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_EEL) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_JEWEL) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MAGNEMITE) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index d79e946acdcb..5d29abac2300 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -529,6 +529,7 @@ #define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 #define USB_DEVICE_ID_GOOGLE_DON 0x5050 #define USB_DEVICE_ID_GOOGLE_EEL 0x5057 +#define USB_DEVICE_ID_GOOGLE_JEWEL 0x5061 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f From 16a9c24f24fbe4564284eb575b18cc20586b9270 Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Thu, 27 Apr 2023 14:47:45 +0300 Subject: [PATCH 095/934] HID: wacom: Add error check to wacom_parse_and_register() Added a variable check and transition in case of an error Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Denis Arefev Reviewed-by: Ping Cheng Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 7192970d199a..76e5353aca0c 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2414,8 +2414,13 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail_quirks; } - if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR) + if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR) { error = hid_hw_open(hdev); + if (error) { + hid_err(hdev, "hw open failed\n"); + goto fail_quirks; + } + } wacom_set_shared_values(wacom_wac); devres_close_group(&hdev->dev, wacom); From d53d70084d27f56bcdf5074328f2c9ec861be596 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 17 May 2023 12:26:44 -0400 Subject: [PATCH 096/934] nfsd: make a copy of struct iattr before calling notify_change notify_change can modify the iattr structure. In particular it can end up setting ATTR_MODE when ATTR_KILL_SUID is already set, causing a BUG() if the same iattr is passed to notify_change more than once. Make a copy of the struct iattr before calling notify_change. Reported-by: Zhi Li Link: https://bugzilla.redhat.com/show_bug.cgi?id=2207969 Tested-by: Zhi Li Fixes: 34b91dda7124 ("NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY") Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index bb9d47172162..db67f8e19344 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -536,7 +536,15 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, inode_lock(inode); for (retries = 1;;) { - host_err = __nfsd_setattr(dentry, iap); + struct iattr attrs; + + /* + * notify_change() can alter its iattr argument, making + * @iap unsuitable for submission multiple times. Make a + * copy for every loop iteration. + */ + attrs = *iap; + host_err = __nfsd_setattr(dentry, &attrs); if (host_err != -EAGAIN || !retries--) break; if (!nfsd_wait_for_delegreturn(rqstp, inode)) From 9728fb3ce11729aa8c276825ddf504edeb00611d Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Fri, 5 May 2023 14:35:57 +0800 Subject: [PATCH 097/934] spi: lpspi: disable lpspi module irq in DMA mode When all bits of IER are set to 0, we still can observe the lpspi irq events when using DMA mode to transfer data. So disable irq to avoid the too much irq events. Signed-off-by: Clark Wang Link: https://lore.kernel.org/r/20230505063557.3962220-1-xiaoning.wang@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-lpspi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index f2341ab99556..4b70038ceb6b 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -910,9 +910,14 @@ static int fsl_lpspi_probe(struct platform_device *pdev) ret = fsl_lpspi_dma_init(&pdev->dev, fsl_lpspi, controller); if (ret == -EPROBE_DEFER) goto out_pm_get; - if (ret < 0) dev_err(&pdev->dev, "dma setup error %d, use pio\n", ret); + else + /* + * disable LPSPI module IRQ when enable DMA mode successfully, + * to prevent the unexpected LPSPI module IRQ events. + */ + disable_irq(irq); ret = devm_spi_register_controller(&pdev->dev, controller); if (ret < 0) { From 320805ab61e5f1e2a5729ae266e16bec2904050c Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 18 May 2023 08:13:52 -0700 Subject: [PATCH 098/934] Drivers: hv: vmbus: Fix vmbus_wait_for_unload() to scan present CPUs vmbus_wait_for_unload() may be called in the panic path after other CPUs are stopped. vmbus_wait_for_unload() currently loops through online CPUs looking for the UNLOAD response message. But the values of CONFIG_KEXEC_CORE and crash_kexec_post_notifiers affect the path used to stop the other CPUs, and in one of the paths the stopped CPUs are removed from cpu_online_mask. This removal happens in both x86/x64 and arm64 architectures. In such a case, vmbus_wait_for_unload() only checks the panic'ing CPU, and misses the UNLOAD response message except when the panic'ing CPU is CPU 0. vmbus_wait_for_unload() eventually times out, but only after waiting 100 seconds. Fix this by looping through *present* CPUs in vmbus_wait_for_unload(). The cpu_present_mask is not modified by stopping the other CPUs in the panic path, nor should it be. Also, in a CoCo VM the synic_message_page is not allocated in hv_synic_alloc(), but is set and cleared in hv_synic_enable_regs() and hv_synic_disable_regs() such that it is set only when the CPU is online. If not all present CPUs are online when vmbus_wait_for_unload() is called, the synic_message_page might be NULL. Add a check for this. Fixes: cd95aad55793 ("Drivers: hv: vmbus: handle various crash scenarios") Cc: stable@vger.kernel.org Reported-by: John Starks Signed-off-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/1684422832-38476-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 007f26d5f1a4..2f4d09ce027a 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -829,11 +829,22 @@ static void vmbus_wait_for_unload(void) if (completion_done(&vmbus_connection.unload_event)) goto completed; - for_each_online_cpu(cpu) { + for_each_present_cpu(cpu) { struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); + /* + * In a CoCo VM the synic_message_page is not allocated + * in hv_synic_alloc(). Instead it is set/cleared in + * hv_synic_enable_regs() and hv_synic_disable_regs() + * such that it is set only when the CPU is online. If + * not all present CPUs are online, the message page + * might be NULL, so skip such CPUs. + */ page_addr = hv_cpu->synic_message_page; + if (!page_addr) + continue; + msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; @@ -867,11 +878,14 @@ completed: * maybe-pending messages on all CPUs to be able to receive new * messages after we reconnect. */ - for_each_online_cpu(cpu) { + for_each_present_cpu(cpu) { struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); page_addr = hv_cpu->synic_message_page; + if (!page_addr) + continue; + msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; msg->header.message_type = HVMSG_NONE; } From feee70f4568650cf44c573488798ffc0a2faeea3 Mon Sep 17 00:00:00 2001 From: Ismael Ferreras Morezuelas Date: Tue, 23 May 2023 14:45:10 -0700 Subject: [PATCH 099/934] Input: xpad - delete a Razer DeathAdder mouse VID/PID entry While doing my research to improve the xpad device names I noticed that the 1532:0037 VID/PID seems to be used by the DeathAdder 2013, so that Razer Sabertooth instance looked wrong and very suspect to me. I didn't see any mention in the official drivers, either. After doing more research, it turns out that the xpad list is used by many other projects (like Steam) as-is [1], this issue was reported [2] and Valve/Sam Lantinga fixed it [3]: [1]: https://github.com/libsdl-org/SDL/blob/dcc5eef0e2395854b254ea2873a4899edab347c6/src/joystick/controller_type.h#L251 [2]: https://steamcommunity.com/app/353380/discussions/0/1743392486228754770/ [3]: https://hg.libsdl.org/SDL/rev/29809f6f0271 (With multiple Internet users reporting similar issues, not linked here) After not being able to find the correct VID/PID combination anywhere on the Internet and not receiving any reply from Razer support I did some additional detective work, it seems like it presents itself as "Razer Sabertooth Gaming Controller (XBOX360)", code 1689:FE00. Leaving us with this: * Razer Sabertooth (1689:fe00) * Razer Sabertooth Elite (24c6:5d04) * Razer DeathAdder 2013 (1532:0037) [note: not a gamepad] So, to sum things up; remove this conflicting/duplicate entry: { 0x1532, 0x0037, "Razer Sabertooth", 0, XTYPE_XBOX360 }, As the real/correct one is already present there, even if the Internet as a whole insists on presenting it as the Razer Sabertooth Elite, which (by all accounts) is not: { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, Actual change in SDL2 referencing this kernel issue: https://github.com/libsdl-org/SDL/commit/e5e54169754ca5d3e86339d968b20126d9da0a15 For more information of the device, take a look here: https://github.com/xboxdrv/xboxdrv/pull/59 You can see a lsusb dump here: https://github.com/xboxdrv/xboxdrv/files/76581/Qa6dBcrv.txt Fixes: f554f619b70 ("Input: xpad - sync device IDs with xboxdrv") Signed-off-by: Ismael Ferreras Morezuelas Reviewed-by: Cameron Gutman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/5c12dbdb-5774-fc68-5c58-ca596383663e@gmail.com Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 28be88e0e96a..f33622fe946f 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -281,7 +281,6 @@ static const struct xpad_device { { 0x1430, 0xf801, "RedOctane Controller", 0, XTYPE_XBOX360 }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, { 0x146b, 0x0604, "Bigben Interactive DAIJA Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, - { 0x1532, 0x0037, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x1532, 0x0a00, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE }, { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 }, From 4be47a5d59cbc9396a6ffd327913eb4c8d67a32f Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Mon, 1 May 2023 19:33:14 +0100 Subject: [PATCH 100/934] spi: mt65xx: make sure operations completed before unloading When unloading the spi-mt65xx kernel module during an ongoing spi-mem operation the kernel will Oops shortly after unloading the module. This is because wait_for_completion_timeout was still running and returning into the no longer loaded module: Internal error: Oops: 0000000096000005 [#1] SMP Modules linked in: [many, but spi-mt65xx is no longer there] CPU: 0 PID: 2578 Comm: block Tainted: G W O 6.3.0-next-20230428+ #0 Hardware name: Bananapi BPI-R3 (DT) pstate: 804000c5 (Nzcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __lock_acquire+0x18c/0x20e8 lr : __lock_acquire+0x9b8/0x20e8 sp : ffffffc009ec3400 x29: ffffffc009ec3400 x28: 0000000000000001 x27: 0000000000000004 x26: ffffff80082888c8 x25: 0000000000000000 x24: 0000000000000000 x23: ffffffc009609da8 x22: ffffff8008288000 x21: ffffff8008288968 x20: 00000000000003c2 x19: ffffff8008be7990 x18: 00000000000002af x17: 0000000000000000 x16: 0000000000000000 x15: ffffffc008d78970 x14: 000000000000080d x13: 00000000000002af x12: 00000000ffffffea x11: 00000000ffffefff x10: ffffffc008dd0970 x9 : ffffffc008d78918 x8 : 0000000000017fe8 x7 : 0000000000000001 x6 : 0000000000000000 x5 : ffffff807fb53910 x4 : 0000000000000000 x3 : 0000000000000027 x2 : 0000000000000027 x1 : 0000000000000000 x0 : 00000000000c03c2 Call trace: __lock_acquire+0x18c/0x20e8 lock_acquire+0x100/0x2a4 _raw_spin_lock_irq+0x58/0x74 __wait_for_common+0xe0/0x1b4 wait_for_completion_timeout+0x1c/0x24 0xffffffc000acc8a4 <--- used to be mtk_spi_transfer_wait spi_mem_exec_op+0x390/0x3ec spi_mem_no_dirmap_read+0x6c/0x88 spi_mem_dirmap_read+0xcc/0x12c spinand_read_page+0xf8/0x1dc spinand_mtd_read+0x1b4/0x2fc mtd_read_oob_std+0x58/0x7c mtd_read_oob+0x8c/0x148 mtd_read+0x50/0x6c ... Prevent this by completing in mtk_spi_remove if needed. Fixes: 9f763fd20da7 ("spi: mediatek: add spi memory support for ipm design") Signed-off-by: Daniel Golle Link: https://lore.kernel.org/r/ZFAF6pJxMu1z6k4w@makrotopia.org Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 21c321f43766..d7432e2219d8 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -1275,6 +1275,9 @@ static int mtk_spi_remove(struct platform_device *pdev) struct mtk_spi *mdata = spi_master_get_devdata(master); int ret; + if (mdata->use_spimem && !completion_done(&mdata->spimem_done)) + complete(&mdata->spimem_done); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) return ret; From ccd4923d18d5698a5910d516646ce125b9155d47 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 18 May 2023 09:25:11 +0300 Subject: [PATCH 101/934] ARM: at91: pm: fix imbalanced reference counter for ethernet devices The of_find_device_by_node() function is returning a struct platform_device object with the embedded struct device member's reference counter incremented. This needs to be dropped when done with the platform device returned by of_find_device_by_node(). at91_pm_eth_quirk_is_valid() calls of_find_device_by_node() on suspend and resume path. On suspend it calls of_find_device_by_node() and on resume and failure paths it drops the counter of struct platform_device::dev. In case ethernet device may not wakeup there is a put_device() on at91_pm_eth_quirk_is_valid() which is wrong as it colides with put_device() on resume path leading to the reference counter of struct device embedded in struct platform_device to be messed, stack trace to be displayed (after 5 consecutive suspend/resume cycles) and execution to hang. Along with this the error path of at91_pm_config_quirks() had been also adapted to decrement propertly the reference counter of struct device embedded in struct platform_device. Fixes: b7fc72c63399 ("ARM: at91: pm: add quirks for pm") Signed-off-by: Claudiu Beznea Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20230518062511.2988500-1-claudiu.beznea@microchip.com --- arch/arm/mach-at91/pm.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 60dc56d8acfb..437dd0352fd4 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -334,16 +334,14 @@ static bool at91_pm_eth_quirk_is_valid(struct at91_pm_quirk_eth *eth) pdev = of_find_device_by_node(eth->np); if (!pdev) return false; + /* put_device(eth->dev) is called at the end of suspend. */ eth->dev = &pdev->dev; } /* No quirks if device isn't a wakeup source. */ - if (!device_may_wakeup(eth->dev)) { - put_device(eth->dev); + if (!device_may_wakeup(eth->dev)) return false; - } - /* put_device(eth->dev) is called at the end of suspend. */ return true; } @@ -439,14 +437,14 @@ clk_unconfigure: pr_err("AT91: PM: failed to enable %s clocks\n", j == AT91_PM_G_ETH ? "geth" : "eth"); } - } else { - /* - * Release the reference to eth->dev taken in - * at91_pm_eth_quirk_is_valid(). - */ - put_device(eth->dev); - eth->dev = NULL; } + + /* + * Release the reference to eth->dev taken in + * at91_pm_eth_quirk_is_valid(). + */ + put_device(eth->dev); + eth->dev = NULL; } return ret; From 6b0db163ff9200a55dc77a652dad1d4b0a853f63 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 23 May 2023 08:27:50 +0300 Subject: [PATCH 102/934] ARM: dts: at91: sama7g5ek: fix debounce delay property for shdwc There is no atmel,shdwc-debouncer property for SHDWC. The right DT property is debounce-delay-us. Use it. Fixes: 16b161bcf5d4 ("ARM: dts: at91: sama7g5: add shdwc node") Signed-off-by: Claudiu Beznea Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20230523052750.184223-1-claudiu.beznea@microchip.com --- arch/arm/boot/dts/at91-sama7g5ek.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/at91-sama7g5ek.dts b/arch/arm/boot/dts/at91-sama7g5ek.dts index aa5cc0e98bba..217e9b96c61e 100644 --- a/arch/arm/boot/dts/at91-sama7g5ek.dts +++ b/arch/arm/boot/dts/at91-sama7g5ek.dts @@ -792,7 +792,7 @@ }; &shdwc { - atmel,shdwc-debouncer = <976>; + debounce-delay-us = <976>; status = "okay"; input@0 { From 40ba0411074485e2cf1bf8ee0f3db27bdff88394 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 23 May 2023 16:46:04 +0100 Subject: [PATCH 103/934] ASoC: codecs: wsa883x: do not set can_multi_write flag regmap-sdw does not support multi register writes, so there is no point in setting this flag. This also leads to incorrect programming of WSA codecs with regmap_multi_reg_write() call. This invalid configuration should have been rejected by regmap-sdw. Fixes: 43b8c7dc85a1 ("ASoC: codecs: add wsa883x amplifier support") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230523154605.4284-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa883x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index c609cb63dae6..e80b53143569 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -946,7 +946,6 @@ static struct regmap_config wsa883x_regmap_config = { .writeable_reg = wsa883x_writeable_register, .reg_format_endian = REGMAP_ENDIAN_NATIVE, .val_format_endian = REGMAP_ENDIAN_NATIVE, - .can_multi_write = true, .use_single_read = true, }; From 6e7a6d4797ef521c0762914610ed682e102b9d36 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 23 May 2023 16:46:05 +0100 Subject: [PATCH 104/934] ASoC: codecs: wsa881x: do not set can_multi_write flag regmap-sdw does not support multi register writes, so there is no point in setting this flag. This also leads to incorrect programming of WSA codecs with regmap_multi_reg_write() call. This invalid configuration should have been rejected by regmap-sdw. Fixes: a0aab9e1404a ("ASoC: codecs: add wsa881x amplifier support") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230523154605.4284-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wsa881x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index f709231b1277..97f6873a0a8c 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -645,7 +645,6 @@ static struct regmap_config wsa881x_regmap_config = { .readable_reg = wsa881x_readable_register, .reg_format_endian = REGMAP_ENDIAN_NATIVE, .val_format_endian = REGMAP_ENDIAN_NATIVE, - .can_multi_write = true, }; enum { From 2d7c2f9272de6347a9cec0fc07708913692c0ae3 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 23 May 2023 17:54:14 +0100 Subject: [PATCH 105/934] ASoC: codecs: wcd938x-sdw: do not set can_multi_write flag regmap-sdw does not support multi register writes, so there is no point in setting this flag. This also leads to incorrect programming of WSA codecs with regmap_multi_reg_write() call. This invalid configuration should have been rejected by regmap-sdw. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230523165414.14560-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x-sdw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index 402286dfaea4..9c10200ff34b 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1190,7 +1190,6 @@ static const struct regmap_config wcd938x_regmap_config = { .readable_reg = wcd938x_readable_register, .writeable_reg = wcd938x_writeable_register, .volatile_reg = wcd938x_volatile_register, - .can_multi_write = true, }; static const struct sdw_slave_ops wcd9380_slave_ops = { From 95856d1f3c223c015780fffb8373a827fc4efd2e Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 23 May 2023 16:47:47 +0100 Subject: [PATCH 106/934] regmap: sdw: check for invalid multi-register writes config SoundWire code as it is only supports Bulk register writes and it does not support multi-register writes. Any drivers that set can_multi_write and use regmap_multi_reg_write() will easily endup with programming the hardware incorrectly without any errors. So, add this check in bus code to be able to validate the drivers config. Fixes: 522272047dc6 ("regmap: sdw: Remove 8-bit value size restriction") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230523154747.5429-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-sdw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/base/regmap/regmap-sdw.c b/drivers/base/regmap/regmap-sdw.c index 09899ae99fc1..159c0b740b00 100644 --- a/drivers/base/regmap/regmap-sdw.c +++ b/drivers/base/regmap/regmap-sdw.c @@ -59,6 +59,10 @@ static int regmap_sdw_config_check(const struct regmap_config *config) if (config->pad_bits != 0) return -ENOTSUPP; + /* Only bulk writes are supported not multi-register writes */ + if (config->can_multi_write) + return -ENOTSUPP; + return 0; } From 0cc6578048e0980d254aee345130cced4912f723 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 23 May 2023 23:18:19 +0100 Subject: [PATCH 107/934] regmap: maple: Drop the RCU read lock while syncing registers Unfortunately the maple tree requires us to explicitly lock it so we need to take the RCU read lock while iterating. When syncing this means that we end up trying to write out register values while holding the RCU read lock which triggers lockdep issues since that is an atomic context but most buses can't be used in atomic context. Pause the iteration and drop the lock for each register we check to avoid this. Reported-by: Pierre-Louis Bossart Tested-by: Pierre-Louis Bossart Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230523-regcache-maple-sync-lock-v1-1-530e4d68dfab@kernel.org Signed-off-by: Mark Brown --- drivers/base/regmap/regcache-maple.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c index 9b1b559107ef..c2e3a0f6c218 100644 --- a/drivers/base/regmap/regcache-maple.c +++ b/drivers/base/regmap/regcache-maple.c @@ -203,15 +203,18 @@ static int regcache_maple_sync(struct regmap *map, unsigned int min, mas_for_each(&mas, entry, max) { for (r = max(mas.index, lmin); r <= min(mas.last, lmax); r++) { + mas_pause(&mas); + rcu_read_unlock(); ret = regcache_sync_val(map, r, entry[r - mas.index]); if (ret != 0) goto out; + rcu_read_lock(); } } -out: rcu_read_unlock(); +out: map->cache_bypass = false; return ret; From e79d85c6c217221ea32354a5ac0587a7ccea02b9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 23 May 2023 17:06:20 +0200 Subject: [PATCH 108/934] drm/fb-helper: Fix height, width, and accel_flags in fb_var Fbtest contains some very simple validation of the fbdev userspace API contract. When used with shmob-drm, it reports the following warnings and errors: height changed from 68 to 0 height was rounded down width changed from 111 to 0 width was rounded down accel_flags changed from 0 to 1 The first part happens because __fill_var() resets the physical dimensions of the first connector, as filled in by drm_setup_crtcs_fb(). Fix this by retaining the original values. The last part happens because __fill_var() forces the FB_ACCELF_TEXT flag on, while fbtest disables all acceleration on purpose, so it can draw safely to the frame buffer. Fix this by setting accel_flags to zero, as DRM does not implement any text console acceleration. Note that this issue can also be seen in the output of fbset, which reports "accel true". Fixes: ee4cce0a8f03a333 ("drm/fb-helper: fix input validation gaps in check_var") Signed-off-by: Geert Uytterhoeven Reviewed-by: Daniel Vetter Tested-by: Sui Jingfeng Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/57e6b334dae8148b1b8ae6ef308ce9a83810a850.1684854344.git.geert+renesas@glider.be --- drivers/gpu/drm/drm_fb_helper.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 6bb1b8b27d7a..fd27f1978635 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1545,17 +1545,19 @@ static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var, } } -static void __fill_var(struct fb_var_screeninfo *var, +static void __fill_var(struct fb_var_screeninfo *var, struct fb_info *info, struct drm_framebuffer *fb) { int i; var->xres_virtual = fb->width; var->yres_virtual = fb->height; - var->accel_flags = FB_ACCELF_TEXT; + var->accel_flags = 0; var->bits_per_pixel = drm_format_info_bpp(fb->format, 0); - var->height = var->width = 0; + var->height = info->var.height; + var->width = info->var.width; + var->left_margin = var->right_margin = 0; var->upper_margin = var->lower_margin = 0; var->hsync_len = var->vsync_len = 0; @@ -1618,7 +1620,7 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, return -EINVAL; } - __fill_var(var, fb); + __fill_var(var, info, fb); /* * fb_pan_display() validates this, but fb_set_par() doesn't and just @@ -2074,7 +2076,7 @@ static void drm_fb_helper_fill_var(struct fb_info *info, info->pseudo_palette = fb_helper->pseudo_palette; info->var.xoffset = 0; info->var.yoffset = 0; - __fill_var(&info->var, fb); + __fill_var(&info->var, info, fb); info->var.activate = FB_ACTIVATE_NOW; drm_fb_helper_fill_pixel_fmt(&info->var, format); From a99d21cefd351c8aaa20b83a3c942340e5789d45 Mon Sep 17 00:00:00 2001 From: Deren Wu Date: Sat, 13 May 2023 22:48:15 +0800 Subject: [PATCH 109/934] mmc: vub300: fix invalid response handling We may get an empty response with zero length at the beginning of the driver start and get following UBSAN error. Since there is no content(SDRT_NONE) for the response, just return and skip the response handling to avoid this problem. Test pass : SDIO wifi throughput test with this patch [ 126.980684] UBSAN: array-index-out-of-bounds in drivers/mmc/host/vub300.c:1719:12 [ 126.980709] index -1 is out of range for type 'u32 [4]' [ 126.980729] CPU: 4 PID: 9 Comm: kworker/u16:0 Tainted: G E 6.3.0-rc4-mtk-local-202304272142 #1 [ 126.980754] Hardware name: Intel(R) Client Systems NUC8i7BEH/NUC8BEB, BIOS BECFL357.86A.0081.2020.0504.1834 05/04/2020 [ 126.980770] Workqueue: kvub300c vub300_cmndwork_thread [vub300] [ 126.980833] Call Trace: [ 126.980845] [ 126.980860] dump_stack_lvl+0x48/0x70 [ 126.980895] dump_stack+0x10/0x20 [ 126.980916] ubsan_epilogue+0x9/0x40 [ 126.980944] __ubsan_handle_out_of_bounds+0x70/0x90 [ 126.980979] vub300_cmndwork_thread+0x58e7/0x5e10 [vub300] [ 126.981018] ? _raw_spin_unlock+0x18/0x40 [ 126.981042] ? finish_task_switch+0x175/0x6f0 [ 126.981070] ? __switch_to+0x42e/0xda0 [ 126.981089] ? __switch_to_asm+0x3a/0x80 [ 126.981129] ? __pfx_vub300_cmndwork_thread+0x10/0x10 [vub300] [ 126.981174] ? __kasan_check_read+0x11/0x20 [ 126.981204] process_one_work+0x7ee/0x13d0 [ 126.981246] worker_thread+0x53c/0x1240 [ 126.981291] kthread+0x2b8/0x370 [ 126.981312] ? __pfx_worker_thread+0x10/0x10 [ 126.981336] ? __pfx_kthread+0x10/0x10 [ 126.981359] ret_from_fork+0x29/0x50 [ 126.981400] Fixes: 88095e7b473a ("mmc: Add new VUB300 USB-to-SD/SDIO/MMC driver") Signed-off-by: Deren Wu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/048cd6972c50c33c2e8f81d5228fed928519918b.1683987673.git.deren.wu@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/vub300.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index e4c4bfac3763..9ec593d52f0f 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -1713,6 +1713,9 @@ static void construct_request_response(struct vub300_mmc_host *vub300, int bytes = 3 & less_cmd; int words = less_cmd >> 2; u8 *r = vub300->resp.response.command_response; + + if (!resp_len) + return; if (bytes == 3) { cmd->resp[words] = (r[1 + (words << 2)] << 24) | (r[2 + (words << 2)] << 16) From 0b5d5c436a5c572a45f976cfd34a6741e143e5d9 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 13 May 2023 21:23:52 +0200 Subject: [PATCH 110/934] mmc: pwrseq: sd8787: Fix WILC CHIP_EN and RESETN toggling order Chapter "5.3 Power-Up/Down Sequence" of WILC1000 [1] and WILC3000 [2] states that CHIP_EN must be pulled HIGH first, RESETN second. Fix the order of these signals in the driver. Use the mmc_pwrseq_ops as driver data as the delay between signals is specific to SDIO card type anyway. [1] https://ww1.microchip.com/downloads/aemDocuments/documents/WSG/ProductDocuments/DataSheets/ATWILC1000-MR110XB-IEEE-802.11-b-g-n-Link-Controller-Module-DS70005326E.pdf [2] https://ww1.microchip.com/downloads/aemDocuments/documents/OTH/ProductDocuments/DataSheets/IEEE-802.11-b-g-n-Link-Controller-Module-with-Integrated-Bluetooth-5.0-DS70005327B.pdf Fixes: b2832b96fcf5 ("mmc: pwrseq: sd8787: add support for wilc1000") Signed-off-by: Marek Vasut Reviewed-by: Claudiu Beznea Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230513192352.479627-1-marex@denx.de Signed-off-by: Ulf Hansson --- drivers/mmc/core/pwrseq_sd8787.c | 34 ++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/core/pwrseq_sd8787.c b/drivers/mmc/core/pwrseq_sd8787.c index 2e120ad83020..0c5f5e371e1f 100644 --- a/drivers/mmc/core/pwrseq_sd8787.c +++ b/drivers/mmc/core/pwrseq_sd8787.c @@ -28,7 +28,6 @@ struct mmc_pwrseq_sd8787 { struct mmc_pwrseq pwrseq; struct gpio_desc *reset_gpio; struct gpio_desc *pwrdn_gpio; - u32 reset_pwrdwn_delay_ms; }; #define to_pwrseq_sd8787(p) container_of(p, struct mmc_pwrseq_sd8787, pwrseq) @@ -39,7 +38,7 @@ static void mmc_pwrseq_sd8787_pre_power_on(struct mmc_host *host) gpiod_set_value_cansleep(pwrseq->reset_gpio, 1); - msleep(pwrseq->reset_pwrdwn_delay_ms); + msleep(300); gpiod_set_value_cansleep(pwrseq->pwrdn_gpio, 1); } @@ -51,17 +50,37 @@ static void mmc_pwrseq_sd8787_power_off(struct mmc_host *host) gpiod_set_value_cansleep(pwrseq->reset_gpio, 0); } +static void mmc_pwrseq_wilc1000_pre_power_on(struct mmc_host *host) +{ + struct mmc_pwrseq_sd8787 *pwrseq = to_pwrseq_sd8787(host->pwrseq); + + /* The pwrdn_gpio is really CHIP_EN, reset_gpio is RESETN */ + gpiod_set_value_cansleep(pwrseq->pwrdn_gpio, 1); + msleep(5); + gpiod_set_value_cansleep(pwrseq->reset_gpio, 1); +} + +static void mmc_pwrseq_wilc1000_power_off(struct mmc_host *host) +{ + struct mmc_pwrseq_sd8787 *pwrseq = to_pwrseq_sd8787(host->pwrseq); + + gpiod_set_value_cansleep(pwrseq->reset_gpio, 0); + gpiod_set_value_cansleep(pwrseq->pwrdn_gpio, 0); +} + static const struct mmc_pwrseq_ops mmc_pwrseq_sd8787_ops = { .pre_power_on = mmc_pwrseq_sd8787_pre_power_on, .power_off = mmc_pwrseq_sd8787_power_off, }; -static const u32 sd8787_delay_ms = 300; -static const u32 wilc1000_delay_ms = 5; +static const struct mmc_pwrseq_ops mmc_pwrseq_wilc1000_ops = { + .pre_power_on = mmc_pwrseq_wilc1000_pre_power_on, + .power_off = mmc_pwrseq_wilc1000_power_off, +}; static const struct of_device_id mmc_pwrseq_sd8787_of_match[] = { - { .compatible = "mmc-pwrseq-sd8787", .data = &sd8787_delay_ms }, - { .compatible = "mmc-pwrseq-wilc1000", .data = &wilc1000_delay_ms }, + { .compatible = "mmc-pwrseq-sd8787", .data = &mmc_pwrseq_sd8787_ops }, + { .compatible = "mmc-pwrseq-wilc1000", .data = &mmc_pwrseq_wilc1000_ops }, {/* sentinel */}, }; MODULE_DEVICE_TABLE(of, mmc_pwrseq_sd8787_of_match); @@ -77,7 +96,6 @@ static int mmc_pwrseq_sd8787_probe(struct platform_device *pdev) return -ENOMEM; match = of_match_node(mmc_pwrseq_sd8787_of_match, pdev->dev.of_node); - pwrseq->reset_pwrdwn_delay_ms = *(u32 *)match->data; pwrseq->pwrdn_gpio = devm_gpiod_get(dev, "powerdown", GPIOD_OUT_LOW); if (IS_ERR(pwrseq->pwrdn_gpio)) @@ -88,7 +106,7 @@ static int mmc_pwrseq_sd8787_probe(struct platform_device *pdev) return PTR_ERR(pwrseq->reset_gpio); pwrseq->pwrseq.dev = dev; - pwrseq->pwrseq.ops = &mmc_pwrseq_sd8787_ops; + pwrseq->pwrseq.ops = match->data; pwrseq->pwrseq.owner = THIS_MODULE; platform_set_drvdata(pdev, pwrseq); From 8d0f019e4c4f2ee2de81efd9bf1c27e9fb3c0460 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 May 2023 21:46:00 +0100 Subject: [PATCH 111/934] arm64: Add missing Set/Way CMO encodings Add the missing Set/Way CMOs that apply to tagged memory. Signed-off-by: Marc Zyngier Reviewed-by: Cornelia Huck Reviewed-by: Steven Price Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20230515204601.1270428-2-maz@kernel.org --- arch/arm64/include/asm/sysreg.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e72d9aaab6b1..eefd712f2430 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -115,8 +115,14 @@ #define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) +#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4) +#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) +#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4) +#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6) #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) +#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4) +#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6) /* * Automatically generated definitions for system registers, the From d282fa3c5ccb7a0029c418f358143689553b6447 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 May 2023 21:46:01 +0100 Subject: [PATCH 112/934] KVM: arm64: Handle trap of tagged Set/Way CMOs We appear to have missed the Set/Way CMOs when adding MTE support. Not that we really expect anyone to use them, but you never know what stupidity some people can come up with... Treat these mostly like we deal with the classic S/W CMOs, only with an additional check that MTE really is enabled. Signed-off-by: Marc Zyngier Reviewed-by: Cornelia Huck Reviewed-by: Steven Price Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20230515204601.1270428-3-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 71b12094d613..753aa7418149 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -211,6 +211,19 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, return true; } +static bool access_dcgsw(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!kvm_has_mte(vcpu->kvm)) { + kvm_inject_undefined(vcpu); + return false; + } + + /* Treat MTE S/W ops as we treat the classic ones: with contempt */ + return access_dcsw(vcpu, p, r); +} + static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift) { switch (r->aarch32_map) { @@ -1756,8 +1769,14 @@ static bool access_spsr(struct kvm_vcpu *vcpu, */ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DC_ISW), access_dcsw }, + { SYS_DESC(SYS_DC_IGSW), access_dcgsw }, + { SYS_DESC(SYS_DC_IGDSW), access_dcgsw }, { SYS_DESC(SYS_DC_CSW), access_dcsw }, + { SYS_DESC(SYS_DC_CGSW), access_dcgsw }, + { SYS_DESC(SYS_DC_CGDSW), access_dcgsw }, { SYS_DESC(SYS_DC_CISW), access_dcsw }, + { SYS_DESC(SYS_DC_CIGSW), access_dcgsw }, + { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw }, DBG_BCR_BVR_WCR_WVR_EL1(0), DBG_BCR_BVR_WCR_WVR_EL1(1), From a9f0e3d5a089d0844abb679a5e99f15010d53e25 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 22 May 2023 11:32:58 +0100 Subject: [PATCH 113/934] KVM: arm64: Reload PTE after invoking walker callback on preorder traversal The preorder callback on the kvm_pgtable_stage2_map() path can replace a table with a block, then recursively free the detached table. The higher-level walking logic stashes the old page table entry and then walks the freed table, invoking the leaf callback and potentially freeing pgtable pages prematurely. In normal operation, the call to tear down the detached stage-2 is indirected and uses an RCU callback to trigger the freeing. RCU is not available to pKVM, which is where this bug is triggered. Change the behavior of the walker to reload the page table entry after invoking the walker callback on preorder traversal, as it does for leaf entries. Tested on Pixel 6. Fixes: 5c359cca1faf ("KVM: arm64: Tear down unlinked stage-2 subtree after break-before-make") Suggested-by: Oliver Upton Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230522103258.402272-1-tabba@google.com --- arch/arm64/include/asm/kvm_pgtable.h | 6 +++--- arch/arm64/kvm/hyp/pgtable.c | 14 +++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index dc3c072e862f..93bd0975b15f 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -632,9 +632,9 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); * * The walker will walk the page-table entries corresponding to the input * address range specified, visiting entries according to the walker flags. - * Invalid entries are treated as leaf entries. Leaf entries are reloaded - * after invoking the walker callback, allowing the walker to descend into - * a newly installed table. + * Invalid entries are treated as leaf entries. The visited page table entry is + * reloaded after invoking the walker callback, allowing the walker to descend + * into a newly installed table. * * Returning a negative error code from the walker callback function will * terminate the walk immediately with the same error code. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 5282cb9ca4cf..e1eacffbc41f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -209,14 +209,26 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, .flags = flags, }; int ret = 0; + bool reload = false; kvm_pteref_t childp; bool table = kvm_pte_table(ctx.old, level); - if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) + if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) { ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE); + reload = true; + } if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) { ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF); + reload = true; + } + + /* + * Reload the page table after invoking the walker callback for leaf + * entries or after pre-order traversal, to allow the walker to descend + * into a newly installed or replaced table. + */ + if (reload) { ctx.old = READ_ONCE(*ptep); table = kvm_pte_table(ctx.old, level); } From 33d418da6f476b15e4510e0a590062583f63cd36 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 19 May 2023 15:13:11 +0200 Subject: [PATCH 114/934] riscv: Fix unused variable warning when BUILTIN_DTB is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ef69d2559fe9 ("riscv: Move early dtb mapping into the fixmap region") wrongly moved the #ifndef CONFIG_BUILTIN_DTB surrounding the pa variable definition in create_fdt_early_page_table(), so move it back to its right place to quiet the following warning: ../arch/riscv/mm/init.c: In function ‘create_fdt_early_page_table’: ../arch/riscv/mm/init.c:925:12: warning: unused variable ‘pa’ [-Wunused-variable] 925 | uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); Fixes: ef69d2559fe9 ("riscv: Move early dtb mapping into the fixmap region") Signed-off-by: Alexandre Ghiti Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20230519131311.391960-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 747e5b1ef02d..c6bb966e4123 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -922,9 +922,9 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early) static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va, uintptr_t dtb_pa) { +#ifndef CONFIG_BUILTIN_DTB uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); -#ifndef CONFIG_BUILTIN_DTB /* Make sure the fdt fixmap address is always aligned on PMD size */ BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE)); From 8557dc27126949c702bd3aafe8a7e0b7e4fcb44c Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 24 May 2023 09:41:18 +0800 Subject: [PATCH 115/934] md/raid5: fix miscalculation of 'end_sector' in raid5_read_one_chunk() 'end_sector' is compared to 'rdev->recovery_offset', which is offset to rdev, however, commit e82ed3a4fbb5 ("md/raid6: refactor raid5_read_one_chunk") changes the calculation of 'end_sector' to offset to the array. Fix this miscalculation. Fixes: e82ed3a4fbb5 ("md/raid6: refactor raid5_read_one_chunk") Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Yu Kuai Reviewed-by: Christoph Hellwig Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230524014118.3172781-1-yukuai1@huaweicloud.com --- drivers/md/raid5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4739ed891e75..9ea285fbc4a6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5516,7 +5516,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) sector = raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector, 0, &dd_idx, NULL); - end_sector = bio_end_sector(raid_bio); + end_sector = sector + bio_sectors(raid_bio); rcu_read_lock(); if (r5c_big_stripe_cached(conf, sector)) From b3d0dcc8e359cf5d57fb6308bc9750af5da574b3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 17 Apr 2023 00:29:35 +0200 Subject: [PATCH 116/934] soc: qcom: ramp_controller: Fix an error handling path in qcom_ramp_controller_probe() 'qrc' is known to be non-NULL at this point. Checking for 'qrc->desc' was expected instead, so use it. Fixes: a723c95fa137 ("soc: qcom: Add Qualcomm Ramp Controller driver") Signed-off-by: Christophe JAILLET Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/84727a79d0261b4112411aec23b553504015c02c.1681684138.git.christophe.jaillet@wanadoo.fr --- drivers/soc/qcom/ramp_controller.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/qcom/ramp_controller.c b/drivers/soc/qcom/ramp_controller.c index dc74d2a19de2..5e3ba0be0903 100644 --- a/drivers/soc/qcom/ramp_controller.c +++ b/drivers/soc/qcom/ramp_controller.c @@ -296,7 +296,7 @@ static int qcom_ramp_controller_probe(struct platform_device *pdev) return -ENOMEM; qrc->desc = device_get_match_data(&pdev->dev); - if (!qrc) + if (!qrc->desc) return -EINVAL; qrc->regmap = devm_regmap_init_mmio(&pdev->dev, base, &qrc_regmap_config); From 7b374a2fc8665bfb8a0d93b617463cc0732f533a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Apr 2023 13:44:21 +0300 Subject: [PATCH 117/934] soc: qcom: rmtfs: Fix error code in probe() Return an error code if of_property_count_u32_elems() fails. Don't return success. Fixes: e656cd0bcf3d ("soc: qcom: rmtfs: Optionally map RMTFS to more VMs") Signed-off-by: Dan Carpenter Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/76b21a14-70ff-4ca9-927d-587543c6699c@kili.mountain --- drivers/soc/qcom/rmtfs_mem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/qcom/rmtfs_mem.c b/drivers/soc/qcom/rmtfs_mem.c index ce48a9f3b4c8..f83811f51175 100644 --- a/drivers/soc/qcom/rmtfs_mem.c +++ b/drivers/soc/qcom/rmtfs_mem.c @@ -233,6 +233,7 @@ static int qcom_rmtfs_mem_probe(struct platform_device *pdev) num_vmids = 0; } else if (num_vmids < 0) { dev_err(&pdev->dev, "failed to count qcom,vmid elements: %d\n", num_vmids); + ret = num_vmids; goto remove_cdev; } else if (num_vmids > NUM_MAX_VMIDS) { dev_warn(&pdev->dev, From 47820d3263a4a7ba258fe2efe26ae5afb5b83036 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Tue, 16 May 2023 11:28:56 +0300 Subject: [PATCH 118/934] soc: qcom: Rename ice to qcom_ice to avoid module name conflict The following error was reported when building x86_64 allmodconfig: error: the following would cause module name conflict: drivers/soc/qcom/ice.ko drivers/net/ethernet/intel/ice/ice.ko Seems the 'ice' module name is already used by some Intel ethernet driver, so lets rename the Qualcomm Inline Crypto Engine (ICE) module from 'ice' to 'qcom_ice' to avoid any kind of errors/confusions. Reported-by: Stephen Rothwell Fixes: 2afbf43a4aec ("soc: qcom: Make the Qualcomm UFS/SDCC ICE a dedicated driver") Acked-by: Mukesh Ojha Signed-off-by: Abel Vesa Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230516082856.150214-1-abel.vesa@linaro.org --- drivers/soc/qcom/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile index 0f43a88b4894..89b775512bef 100644 --- a/drivers/soc/qcom/Makefile +++ b/drivers/soc/qcom/Makefile @@ -32,4 +32,5 @@ obj-$(CONFIG_QCOM_RPMHPD) += rpmhpd.o obj-$(CONFIG_QCOM_RPMPD) += rpmpd.o obj-$(CONFIG_QCOM_KRYO_L2_ACCESSORS) += kryo-l2-accessors.o obj-$(CONFIG_QCOM_ICC_BWMON) += icc-bwmon.o -obj-$(CONFIG_QCOM_INLINE_CRYPTO_ENGINE) += ice.o +qcom_ice-objs += ice.o +obj-$(CONFIG_QCOM_INLINE_CRYPTO_ENGINE) += qcom_ice.o From 672b584c68de309017cd2ce8938856c9b7c1b70e Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 11 Apr 2023 15:47:15 +0200 Subject: [PATCH 119/934] dt-bindings: power: qcom,rpmpd: Add SA8155P Add a compatible for SA8155P platforms and relevant defines to the include file. Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Tested-by: Bartosz Golaszewski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230411-topic-hanaau-v2-1-fd3d70844b31@linaro.org --- Documentation/devicetree/bindings/power/qcom,rpmpd.yaml | 1 + include/dt-bindings/power/qcom-rpmpd.h | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml b/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml index afad3135ed67..f9c211a9a938 100644 --- a/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml +++ b/Documentation/devicetree/bindings/power/qcom,rpmpd.yaml @@ -29,6 +29,7 @@ properties: - qcom,qcm2290-rpmpd - qcom,qcs404-rpmpd - qcom,qdu1000-rpmhpd + - qcom,sa8155p-rpmhpd - qcom,sa8540p-rpmhpd - qcom,sa8775p-rpmhpd - qcom,sdm660-rpmpd diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h index 1bf8e87ecd7e..867b18e041ea 100644 --- a/include/dt-bindings/power/qcom-rpmpd.h +++ b/include/dt-bindings/power/qcom-rpmpd.h @@ -90,6 +90,15 @@ #define SM8150_MMCX 9 #define SM8150_MMCX_AO 10 +/* SA8155P is a special case, kept for backwards compatibility */ +#define SA8155P_CX SM8150_CX +#define SA8155P_CX_AO SM8150_CX_AO +#define SA8155P_EBI SM8150_EBI +#define SA8155P_GFX SM8150_GFX +#define SA8155P_MSS SM8150_MSS +#define SA8155P_MX SM8150_MX +#define SA8155P_MX_AO SM8150_MX_AO + /* SM8250 Power Domain Indexes */ #define SM8250_CX 0 #define SM8250_CX_AO 1 From 8c67e554754ca669911625412ae9a6cd8cee1c82 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 11 Apr 2023 15:47:17 +0200 Subject: [PATCH 120/934] arm64: dts: qcom: Split out SA8155P and use correct RPMh power domains The RPMhPD setup on SA8155P is different compared to SM8150. Correct it to ensure the platform will not try accessing forbidden/missing RPMh entries at boot, as a bad vote will hang the machine. Signed-off-by: Konrad Dybcio Tested-by: Bartosz Golaszewski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230411-topic-hanaau-v2-3-fd3d70844b31@linaro.org --- arch/arm64/boot/dts/qcom/sa8155p-adp.dts | 2 +- arch/arm64/boot/dts/qcom/sa8155p.dtsi | 40 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/boot/dts/qcom/sa8155p.dtsi diff --git a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts index 339fea522509..15e1ae1c1a97 100644 --- a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts +++ b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts @@ -7,7 +7,7 @@ #include #include -#include "sm8150.dtsi" +#include "sa8155p.dtsi" #include "pmm8155au_1.dtsi" #include "pmm8155au_2.dtsi" diff --git a/arch/arm64/boot/dts/qcom/sa8155p.dtsi b/arch/arm64/boot/dts/qcom/sa8155p.dtsi new file mode 100644 index 000000000000..ffb7ab695213 --- /dev/null +++ b/arch/arm64/boot/dts/qcom/sa8155p.dtsi @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + * Copyright (c) 2023, Linaro Limited + * + * SA8155P is an automotive variant of SM8150, with some minor changes. + * Most notably, the RPMhPD setup differs: MMCX and LCX/LMX rails are gone, + * though the cmd-db doesn't reflect that and access attemps result in a bite. + */ + +#include "sm8150.dtsi" + +&dispcc { + power-domains = <&rpmhpd SA8155P_CX>; +}; + +&mdss_dsi0 { + power-domains = <&rpmhpd SA8155P_CX>; +}; + +&mdss_dsi1 { + power-domains = <&rpmhpd SA8155P_CX>; +}; + +&mdss_mdp { + power-domains = <&rpmhpd SA8155P_CX>; +}; + +&remoteproc_slpi { + power-domains = <&rpmhpd SA8155P_CX>, + <&rpmhpd SA8155P_MX>; +}; + +&rpmhpd { + /* + * The bindings were crafted such that SA8155P PDs match their + * SM8150 counterparts to make it more maintainable and only + * necessitate adjusting entries that actually differ + */ + compatible = "qcom,sa8155p-rpmhpd"; +}; From 4a08af2f22ba3c2fb26287dc27fa07bf298acb34 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Tue, 11 Apr 2023 15:47:16 +0200 Subject: [PATCH 121/934] soc: qcom: rpmhpd: Add SA8155P power domains Add the power domains exposed by RPMh in the Qualcomm SA8155P platform. Turns out they differ from SM8150. Signed-off-by: Konrad Dybcio Tested-by: Bartosz Golaszewski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230411-topic-hanaau-v2-2-fd3d70844b31@linaro.org --- drivers/soc/qcom/rpmhpd.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c index f20e2a49a669..63c35a32065b 100644 --- a/drivers/soc/qcom/rpmhpd.c +++ b/drivers/soc/qcom/rpmhpd.c @@ -342,6 +342,21 @@ static const struct rpmhpd_desc sm8150_desc = { .num_pds = ARRAY_SIZE(sm8150_rpmhpds), }; +static struct rpmhpd *sa8155p_rpmhpds[] = { + [SA8155P_CX] = &cx_w_mx_parent, + [SA8155P_CX_AO] = &cx_ao_w_mx_parent, + [SA8155P_EBI] = &ebi, + [SA8155P_GFX] = &gfx, + [SA8155P_MSS] = &mss, + [SA8155P_MX] = &mx, + [SA8155P_MX_AO] = &mx_ao, +}; + +static const struct rpmhpd_desc sa8155p_desc = { + .rpmhpds = sa8155p_rpmhpds, + .num_pds = ARRAY_SIZE(sa8155p_rpmhpds), +}; + /* SM8250 RPMH powerdomains */ static struct rpmhpd *sm8250_rpmhpds[] = { [SM8250_CX] = &cx_w_mx_parent, @@ -519,6 +534,7 @@ static const struct rpmhpd_desc sc8280xp_desc = { static const struct of_device_id rpmhpd_match_table[] = { { .compatible = "qcom,qdu1000-rpmhpd", .data = &qdu1000_desc }, + { .compatible = "qcom,sa8155p-rpmhpd", .data = &sa8155p_desc }, { .compatible = "qcom,sa8540p-rpmhpd", .data = &sa8540p_desc }, { .compatible = "qcom,sa8775p-rpmhpd", .data = &sa8775p_desc }, { .compatible = "qcom,sc7180-rpmhpd", .data = &sc7180_desc }, From 4c8bb2d567588ccbfbf374c079f291a1402c4454 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 18 Apr 2023 11:57:34 +0200 Subject: [PATCH 122/934] arm64: dts: qcom: sm8550: use uint16 for Soundwire interval The Soundwire bindings changed during review, after the SM8550 DTS Soundwire nodes were merged. Switch to uint16 for Soundwire qcom,ports-sinterval property, to match with current bindings. Fixes: 61b006389bb7 ("arm64: dts: qcom: sm8550: add Soundwire controllers") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230418095734.669858-1-krzysztof.kozlowski@linaro.org --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 43192ef21aec..4c6b2c582b27 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -2031,7 +2031,7 @@ qcom,din-ports = <4>; qcom,dout-ports = <9>; - qcom,ports-sinterval = <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>; + qcom,ports-sinterval = /bits/ 16 <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>; qcom,ports-offset1 = /bits/ 8 <0x01 0x03 0x05 0x02 0x04 0x15 0x00 0xff 0xff 0x06 0x0d 0xff 0x00>; qcom,ports-offset2 = /bits/ 8 <0xff 0x07 0x1f 0xff 0x07 0x1f 0xff 0xff 0xff 0xff 0xff 0xff 0xff>; qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x0f>; @@ -2077,7 +2077,7 @@ qcom,din-ports = <0>; qcom,dout-ports = <10>; - qcom,ports-sinterval = <0x03 0x3f 0x1f 0x07 0x00 0x18f 0xff 0xff 0xff 0xff>; + qcom,ports-sinterval = /bits/ 16 <0x03 0x3f 0x1f 0x07 0x00 0x18f 0xff 0xff 0xff 0xff>; qcom,ports-offset1 = /bits/ 8 <0x00 0x00 0x0b 0x01 0x00 0x00 0xff 0xff 0xff 0xff>; qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x0b 0x00 0x00 0x00 0xff 0xff 0xff 0xff>; qcom,ports-hstart = /bits/ 8 <0xff 0x03 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff>; @@ -2142,7 +2142,7 @@ qcom,din-ports = <4>; qcom,dout-ports = <9>; - qcom,ports-sinterval = <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>; + qcom,ports-sinterval = /bits/ 16 <0x07 0x1f 0x3f 0x07 0x1f 0x3f 0x18f 0xff 0xff 0x0f 0x0f 0xff 0x31f>; qcom,ports-offset1 = /bits/ 8 <0x01 0x03 0x05 0x02 0x04 0x15 0x00 0xff 0xff 0x06 0x0d 0xff 0x00>; qcom,ports-offset2 = /bits/ 8 <0xff 0x07 0x1f 0xff 0x07 0x1f 0xff 0xff 0xff 0xff 0xff 0xff 0xff>; qcom,ports-hstart = /bits/ 8 <0xff 0xff 0xff 0xff 0xff 0xff 0x08 0xff 0xff 0xff 0xff 0xff 0x0f>; From e2ab5aa11f191b54514f063a5b5c29f3559f4ab7 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 1 Mar 2023 10:50:53 +0200 Subject: [PATCH 123/934] net/mlx5e: Extract remaining tunnel encap code to dedicated file Move set_encap_dests() and clean_encap_dests() to the tunnel encap dedicated file. And rename them to mlx5e_tc_tun_encap_dests_set() and mlx5e_tc_tun_encap_dests_unset(). No functional change in this patch. It is needed in the next patch. Signed-off-by: Chris Mi Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc_tun_encap.c | 83 +++++++++++++++++ .../mellanox/mlx5/core/en/tc_tun_encap.h | 9 ++ .../net/ethernet/mellanox/mlx5/core/en_tc.c | 89 +------------------ 3 files changed, 94 insertions(+), 87 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 6a052c6cfc15..d516227b8fe8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1016,6 +1016,89 @@ out_err: return err; } +int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack, + bool *vf_tun) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_esw_flow_attr *esw_attr; + struct net_device *encap_dev = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; + int out_index; + int err = 0; + + if (!mlx5e_is_eswitch_flow(flow)) + return 0; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + *vf_tun = false; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + struct net_device *out_dev; + int mirred_ifindex; + + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mirred_ifindex = parse_attr->mirred_ifindex[out_index]; + out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); + err = -ENODEV; + goto out; + } + err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, + extack, &encap_dev); + dev_put(out_dev); + if (err) + goto out; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[out_index].rep = rpriv->rep; + esw_attr->dests[out_index].mdev = out_priv->mdev; + } + + if (*vf_tun && esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); + err = -EOPNOTSUPP; + goto out; + } + +out: + return err; +} + +void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr; + int out_index; + + if (!mlx5e_is_eswitch_flow(flow)) + return; + + esw_attr = attr->esw_attr; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mlx5e_detach_encap(flow->priv, flow, attr, out_index); + kfree(attr->parse_attr->tun_info[out_index]); + } +} + static int cmp_route_info(struct mlx5e_route_key *a, struct mlx5e_route_key *b) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h index 8ad273dde40e..5d7d67687cbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -30,6 +30,15 @@ int mlx5e_attach_decap_route(struct mlx5e_priv *priv, void mlx5e_detach_decap_route(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); +int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack, + bool *vf_tun); +void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info); int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e95414ef1f04..8935156f8f4e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1699,91 +1699,6 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); } -static int -set_encap_dests(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct mlx5_flow_attr *attr, - struct netlink_ext_ack *extack, - bool *vf_tun) -{ - struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_esw_flow_attr *esw_attr; - struct net_device *encap_dev = NULL; - struct mlx5e_rep_priv *rpriv; - struct mlx5e_priv *out_priv; - int out_index; - int err = 0; - - if (!mlx5e_is_eswitch_flow(flow)) - return 0; - - parse_attr = attr->parse_attr; - esw_attr = attr->esw_attr; - *vf_tun = false; - - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { - struct net_device *out_dev; - int mirred_ifindex; - - if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) - continue; - - mirred_ifindex = parse_attr->mirred_ifindex[out_index]; - out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); - if (!out_dev) { - NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); - err = -ENODEV; - goto out; - } - err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, - extack, &encap_dev); - dev_put(out_dev); - if (err) - goto out; - - if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && - !esw_attr->dest_int_port) - *vf_tun = true; - - out_priv = netdev_priv(encap_dev); - rpriv = out_priv->ppriv; - esw_attr->dests[out_index].rep = rpriv->rep; - esw_attr->dests[out_index].mdev = out_priv->mdev; - } - - if (*vf_tun && esw_attr->out_count > 1) { - NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); - err = -EOPNOTSUPP; - goto out; - } - -out: - return err; -} - -static void -clean_encap_dests(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct mlx5_flow_attr *attr) -{ - struct mlx5_esw_flow_attr *esw_attr; - int out_index; - - if (!mlx5e_is_eswitch_flow(flow)) - return; - - esw_attr = attr->esw_attr; - - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { - if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) - continue; - - mlx5e_detach_encap(priv, flow, attr, out_index); - kfree(attr->parse_attr->tun_info[out_index]); - } -} - static int verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) { @@ -1820,7 +1735,7 @@ post_process_attr(struct mlx5e_tc_flow *flow, if (err) goto err_out; - err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); + err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); if (err) goto err_out; @@ -4324,7 +4239,7 @@ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *a if (attr->post_act_handle) mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle); - clean_encap_dests(flow->priv, flow, attr); + mlx5e_tc_tun_encap_dests_unset(flow->priv, flow, attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(counter_dev, attr->counter); From 37c3b9fa7ccf5caad6d87ba4d42bf00be46be1cf Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 21 Feb 2023 04:41:41 +0200 Subject: [PATCH 124/934] net/mlx5e: Prevent encap offload when neigh update is running The cited commit adds a compeletion to remove dependency on rtnl lock. But it causes a deadlock for multiple encapsulations: crash> bt ffff8aece8a64000 PID: 1514557 TASK: ffff8aece8a64000 CPU: 3 COMMAND: "tc" #0 [ffffa6d14183f368] __schedule at ffffffffb8ba7f45 #1 [ffffa6d14183f3f8] schedule at ffffffffb8ba8418 #2 [ffffa6d14183f418] schedule_preempt_disabled at ffffffffb8ba8898 #3 [ffffa6d14183f428] __mutex_lock at ffffffffb8baa7f8 #4 [ffffa6d14183f4d0] mutex_lock_nested at ffffffffb8baabeb #5 [ffffa6d14183f4e0] mlx5e_attach_encap at ffffffffc0f48c17 [mlx5_core] #6 [ffffa6d14183f628] mlx5e_tc_add_fdb_flow at ffffffffc0f39680 [mlx5_core] #7 [ffffa6d14183f688] __mlx5e_add_fdb_flow at ffffffffc0f3b636 [mlx5_core] #8 [ffffa6d14183f6f0] mlx5e_tc_add_flow at ffffffffc0f3bcdf [mlx5_core] #9 [ffffa6d14183f728] mlx5e_configure_flower at ffffffffc0f3c1d1 [mlx5_core] #10 [ffffa6d14183f790] mlx5e_rep_setup_tc_cls_flower at ffffffffc0f3d529 [mlx5_core] #11 [ffffa6d14183f7a0] mlx5e_rep_setup_tc_cb at ffffffffc0f3d714 [mlx5_core] #12 [ffffa6d14183f7b0] tc_setup_cb_add at ffffffffb8931bb8 #13 [ffffa6d14183f810] fl_hw_replace_filter at ffffffffc0dae901 [cls_flower] #14 [ffffa6d14183f8d8] fl_change at ffffffffc0db5c57 [cls_flower] #15 [ffffa6d14183f970] tc_new_tfilter at ffffffffb8936047 #16 [ffffa6d14183fac8] rtnetlink_rcv_msg at ffffffffb88c7c31 #17 [ffffa6d14183fb50] netlink_rcv_skb at ffffffffb8942853 #18 [ffffa6d14183fbc0] rtnetlink_rcv at ffffffffb88c1835 #19 [ffffa6d14183fbd0] netlink_unicast at ffffffffb8941f27 #20 [ffffa6d14183fc18] netlink_sendmsg at ffffffffb8942245 #21 [ffffa6d14183fc98] sock_sendmsg at ffffffffb887d482 #22 [ffffa6d14183fcb8] ____sys_sendmsg at ffffffffb887d81a #23 [ffffa6d14183fd38] ___sys_sendmsg at ffffffffb88806e2 #24 [ffffa6d14183fe90] __sys_sendmsg at ffffffffb88807a2 #25 [ffffa6d14183ff28] __x64_sys_sendmsg at ffffffffb888080f #26 [ffffa6d14183ff38] do_syscall_64 at ffffffffb8b9b6a8 #27 [ffffa6d14183ff50] entry_SYSCALL_64_after_hwframe at ffffffffb8c0007c crash> bt 0xffff8aeb07544000 PID: 1110766 TASK: ffff8aeb07544000 CPU: 0 COMMAND: "kworker/u20:9" #0 [ffffa6d14e6b7bd8] __schedule at ffffffffb8ba7f45 #1 [ffffa6d14e6b7c68] schedule at ffffffffb8ba8418 #2 [ffffa6d14e6b7c88] schedule_timeout at ffffffffb8baef88 #3 [ffffa6d14e6b7d10] wait_for_completion at ffffffffb8ba968b #4 [ffffa6d14e6b7d60] mlx5e_take_all_encap_flows at ffffffffc0f47ec4 [mlx5_core] #5 [ffffa6d14e6b7da0] mlx5e_rep_update_flows at ffffffffc0f3e734 [mlx5_core] #6 [ffffa6d14e6b7df8] mlx5e_rep_neigh_update at ffffffffc0f400bb [mlx5_core] #7 [ffffa6d14e6b7e50] process_one_work at ffffffffb80acc9c #8 [ffffa6d14e6b7ed0] worker_thread at ffffffffb80ad012 #9 [ffffa6d14e6b7f10] kthread at ffffffffb80b615d #10 [ffffa6d14e6b7f50] ret_from_fork at ffffffffb8001b2f After the first encap is attached, flow will be added to encap entry's flows list. If neigh update is running at this time, the following encaps of the flow can't hold the encap_tbl_lock and sleep. If neigh update thread is waiting for that flow's init_done, deadlock happens. Fix it by holding lock outside of the for loop. If neigh update is running, prevent encap flows from offloading. Since the lock is held outside of the for loop, concurrent creation of encap entries is not allowed. So remove unnecessary wait_for_completion call for res_ready. Fixes: 95435ad7999b ("net/mlx5e: Only access fully initialized flows in neigh update") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc_tun_encap.c | 37 ++++++++++--------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index d516227b8fe8..f0c3464f037f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -492,6 +492,19 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) mlx5e_encap_dealloc(priv, e); } +static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + lockdep_assert_held(&esw->offloads.encap_tbl_lock); + + if (!refcount_dec_and_test(&e->refcnt)) + return; + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mlx5e_encap_dealloc(priv, e); +} + static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -816,6 +829,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, uintptr_t hash_key; int err = 0; + lockdep_assert_held(&esw->offloads.encap_tbl_lock); + parse_attr = attr->parse_attr; tun_info = parse_attr->tun_info[out_index]; mpls_info = &parse_attr->mpls_info[out_index]; @@ -829,7 +844,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, hash_key = hash_encap_info(&key); - mutex_lock(&esw->offloads.encap_tbl_lock); e = mlx5e_encap_get(priv, &key, hash_key); /* must verify if encap is valid or not */ @@ -840,15 +854,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, goto out_err; } - mutex_unlock(&esw->offloads.encap_tbl_lock); - wait_for_completion(&e->res_ready); - - /* Protect against concurrent neigh update. */ - mutex_lock(&esw->offloads.encap_tbl_lock); - if (e->compl_result < 0) { - err = -EREMOTEIO; - goto out_err; - } goto attach_flow; } @@ -877,15 +882,12 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, INIT_LIST_HEAD(&e->flows); hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); tbl_time_before = mlx5e_route_tbl_get_last_update(priv); - mutex_unlock(&esw->offloads.encap_tbl_lock); if (family == AF_INET) err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); - /* Protect against concurrent neigh update. */ - mutex_lock(&esw->offloads.encap_tbl_lock); complete_all(&e->res_ready); if (err) { e->compl_result = err; @@ -920,18 +922,15 @@ attach_flow: } else { flow_flag_set(flow, SLOW); } - mutex_unlock(&esw->offloads.encap_tbl_lock); return err; out_err: - mutex_unlock(&esw->offloads.encap_tbl_lock); if (e) - mlx5e_encap_put(priv, e); + mlx5e_encap_put_locked(priv, e); return err; out_err_init: - mutex_unlock(&esw->offloads.encap_tbl_lock); kfree(tun_info); kfree(e); return err; @@ -1027,6 +1026,7 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, struct net_device *encap_dev = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; + struct mlx5_eswitch *esw; int out_index; int err = 0; @@ -1037,6 +1037,8 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, esw_attr = attr->esw_attr; *vf_tun = false; + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { struct net_device *out_dev; int mirred_ifindex; @@ -1075,6 +1077,7 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, } out: + mutex_unlock(&esw->offloads.encap_tbl_lock); return err; } From 81fe2be062915e2a2fdc494c3cd90e946e946c25 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 1 May 2023 17:31:40 +0300 Subject: [PATCH 125/934] net/mlx5e: Consider internal buffers size in port buffer calculations Currently, when a user triggers a change in port buffer headroom (buffers 0-7), the driver checks that the requested headroom does not exceed the total port buffer size. However, this check does not take into account the internal buffers (buffers 8-9), which are also part of the total port buffer. This can result in treating invalid port buffer change requests as valid, causing unintended changes to the shared buffer. To address this, include the internal buffers size in the calculation of available port buffer space which ensures that port buffer requests do not exceed the correct limit. Furthermore, remove internal buffers (8-9) size from the total_size calculation as these buffers are reserved for internal use and are not exposed to the user. While at it, add verbosity to the debug prints in mlx5e_port_query_buffer() function to ease future debugging. Fixes: ecdf2dadee8e ("net/mlx5e: Receive buffer support for DCBX") Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/port_buffer.c | 40 ++++++++++++------- .../mellanox/mlx5/core/en/port_buffer.h | 8 ++-- .../ethernet/mellanox/mlx5/core/en_dcbnl.c | 7 ++-- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 7ac1ad9c46de..0d78527451bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -51,7 +51,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, if (err) goto out; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]); port_buffer->buffer[i].lossy = MLX5_GET(bufferx_reg, buffer, lossy); @@ -73,14 +73,24 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, port_buffer->buffer[i].lossy); } - port_buffer->headroom_size = total_used; + port_buffer->internal_buffers_size = 0; + for (i = MLX5E_MAX_NETWORK_BUFFER; i < MLX5E_TOTAL_BUFFERS; i++) { + buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]); + port_buffer->internal_buffers_size += + MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz; + } + port_buffer->port_buffer_size = MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz; - port_buffer->spare_buffer_size = - port_buffer->port_buffer_size - total_used; + port_buffer->headroom_size = total_used; + port_buffer->spare_buffer_size = port_buffer->port_buffer_size - + port_buffer->internal_buffers_size - + port_buffer->headroom_size; - mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n", - port_buffer->port_buffer_size, + mlx5e_dbg(HW, priv, + "total buffer size=%u, headroom buffer size=%u, internal buffers size=%u, spare buffer size=%u\n", + port_buffer->port_buffer_size, port_buffer->headroom_size, + port_buffer->internal_buffers_size, port_buffer->spare_buffer_size); out: kfree(out); @@ -206,11 +216,11 @@ static int port_update_pool_cfg(struct mlx5_core_dev *mdev, if (!MLX5_CAP_GEN(mdev, sbcam_reg)) return 0; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) lossless_buff_count += ((port_buffer->buffer[i].size) && (!(port_buffer->buffer[i].lossy))); - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { p = select_sbcm_params(&port_buffer->buffer[i], lossless_buff_count); err = mlx5e_port_set_sbcm(mdev, 0, i, MLX5_INGRESS_DIR, @@ -293,7 +303,7 @@ static int port_set_buffer(struct mlx5e_priv *priv, if (err) goto out; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); u64 size = port_buffer->buffer[i].size; u64 xoff = port_buffer->buffer[i].xoff; @@ -351,7 +361,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, { int i; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { if (port_buffer->buffer[i].lossy) { port_buffer->buffer[i].xoff = 0; port_buffer->buffer[i].xon = 0; @@ -408,7 +418,7 @@ static int update_buffer_lossy(struct mlx5_core_dev *mdev, int err; int i; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { prio_count = 0; lossy_count = 0; @@ -515,7 +525,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { update_prio2buffer = true; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) mlx5e_dbg(HW, priv, "%s: requested to map prio[%d] to buffer %d\n", __func__, i, prio2buffer[i]); @@ -530,7 +540,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, } if (change & MLX5E_PORT_BUFFER_SIZE) { - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]); if (!port_buffer.buffer[i].lossy && !buffer_size[i]) { mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n", @@ -544,7 +554,9 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used); - if (total_used > port_buffer.port_buffer_size) + if (total_used > port_buffer.headroom_size && + (total_used - port_buffer.headroom_size) > + port_buffer.spare_buffer_size) return -EINVAL; update_buffer = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h index a6ef118de758..f4a19ffbb641 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -35,7 +35,8 @@ #include "en.h" #include "port.h" -#define MLX5E_MAX_BUFFER 8 +#define MLX5E_MAX_NETWORK_BUFFER 8 +#define MLX5E_TOTAL_BUFFERS 10 #define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */ #define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \ @@ -60,8 +61,9 @@ struct mlx5e_bufferx_reg { struct mlx5e_port_buffer { u32 port_buffer_size; u32 spare_buffer_size; - u32 headroom_size; - struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER]; + u32 headroom_size; /* Buffers 0-7 */ + u32 internal_buffers_size; /* Buffers 8-9 */ + struct mlx5e_bufferx_reg buffer[MLX5E_MAX_NETWORK_BUFFER]; }; int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 89de92d06483..ebee52a8361a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -926,9 +926,10 @@ static int mlx5e_dcbnl_getbuffer(struct net_device *dev, if (err) return err; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size; - dcb_buffer->total_size = port_buffer.port_buffer_size; + dcb_buffer->total_size = port_buffer.port_buffer_size - + port_buffer.internal_buffers_size; return 0; } @@ -970,7 +971,7 @@ static int mlx5e_dcbnl_setbuffer(struct net_device *dev, if (err) return err; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) { changed |= MLX5E_PORT_BUFFER_SIZE; buffer_size = dcb_buffer->buffer_size; From 623efc4cbd6115db36716e31037cb6d1f3ce6754 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Tue, 9 May 2023 17:56:01 +0300 Subject: [PATCH 126/934] net/mlx5e: Do not update SBCM when prio2buffer command is invalid The shared buffer pools configuration which are stored in the SBCM register are updated when the user changes the prio2buffer mapping. However, in case the user desired prio2buffer change is invalid, which can occur due to mapping a lossless priority to a not large enough buffer, the SBCM update should not be performed, as the user command is failed. Thus, Perform the SBCM update only after xoff threshold calculation is performed and the user prio2buffer mapping is validated. Fixes: a440030d8946 ("net/mlx5e: Update shared buffer along with device buffer changes") Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 0d78527451bc..7e8e96cc5cd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -442,11 +442,11 @@ static int update_buffer_lossy(struct mlx5_core_dev *mdev, } if (changed) { - err = port_update_pool_cfg(mdev, port_buffer); + err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; - err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz); + err = port_update_pool_cfg(mdev, port_buffer); if (err) return err; From 824c8dc4a470040bf0e56ba716543839c2498d49 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 24 Apr 2023 12:31:59 +0300 Subject: [PATCH 127/934] net/mlx5: Drain health before unregistering devlink mlx5 health mechanism is using devlink APIs, which are using devlink notify APIs. After the cited patch, using devlink notify APIs after devlink is unregistered triggers a WARN_ON(). Hence, drain health WQ before devlink is unregistered. Fixes: cf530217408e ("devlink: Notify users when objects are accessible") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a7eb65cd0bdd..2132a6510639 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1802,15 +1802,16 @@ static void remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(dev); set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); - /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain - * fw_reset before unregistering the devlink. + /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using + * devlink notify APIs. + * Hence, we must drain them before unregistering the devlink. */ mlx5_drain_fw_reset(dev); + mlx5_drain_health_wq(dev); devlink_unregister(devlink); mlx5_sriov_disable(pdev); mlx5_thermal_uninit(dev); mlx5_crdump_disable(dev); - mlx5_drain_health_wq(dev); mlx5_uninit_one(dev); mlx5_pci_close(dev); mlx5_mdev_uninit(dev); From b4646da0573fae9dfa2b8f1f10936cb6eedd7230 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 24 Apr 2023 12:46:06 +0300 Subject: [PATCH 128/934] net/mlx5: SF, Drain health before removing device There is no point in recovery during device removal. Also, if health work started need to wait for it to avoid races and NULL pointer access. Hence, drain health WQ before removing device. Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index e2f26d0bc615..0692363cf80e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -63,6 +63,7 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev) struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); struct devlink *devlink = priv_to_devlink(sf_dev->mdev); + mlx5_drain_health_wq(sf_dev->mdev); devlink_unregister(devlink); mlx5_uninit_one(sf_dev->mdev); iounmap(sf_dev->mdev->iseg); From 341a80de2468f481b1f771683709b5649cbfe513 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sat, 29 Apr 2023 20:41:41 +0300 Subject: [PATCH 129/934] net/mlx5: fw_tracer, Fix event handling mlx5 driver needs to parse traces with event_id inside the range of first_string_trace and num_string_trace. However, mlx5 is parsing all events with event_id >= first_string_trace. Fix it by checking for the correct range. Fixes: c71ad41ccb0c ("net/mlx5: FW tracer, events handling") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index f40497823e65..7c0f2adbea00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -490,7 +490,7 @@ static void poll_trace(struct mlx5_fw_tracer *tracer, (u64)timestamp_low; break; default: - if (tracer_event->event_id >= tracer->str_db.first_string_trace || + if (tracer_event->event_id >= tracer->str_db.first_string_trace && tracer_event->event_id <= tracer->str_db.first_string_trace + tracer->str_db.num_string_trace) { tracer_event->type = TRACER_EVENT_TYPE_STRING; From 1db1f21caebbb1b6e9b1e7657df613616be3fb49 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 13 Apr 2023 15:48:30 +0300 Subject: [PATCH 130/934] net/mlx5e: Use query_special_contexts cmd only once per mdev Don't query the firmware so many times (num rqs * num wqes * wqe frags) because it slows down linearly the interface creation time when the product is larger. Do it only once per mdev and store the result in mlx5e_param. Due to helper function being called from different files, move it to an appropriate location. Rename the function with a proper prefix and add a small cleanup. This fix applies only for legacy rq. Fixes: 1b1e4868836a ("net/mlx5e: Use query_special_contexts for mkeys") Signed-off-by: Dragos Tatulea Reviewed-by: Or Har-Toov Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++---------------- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 21 ++++++++++++++++ include/linux/mlx5/driver.h | 1 + 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b8987a404d75..8e999f238194 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -327,6 +327,7 @@ struct mlx5e_params { unsigned int sw_mtu; int hard_mtu; bool ptp_rx; + __be32 terminate_lkey_be; }; static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2944691f06ad..0235adcbc609 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -727,26 +727,6 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) mlx5e_rq_shampo_hd_free(rq); } -static __be32 mlx5e_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev) -{ - u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; - int res; - - if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey)) - return MLX5_TERMINATE_SCATTER_LIST_LKEY; - - MLX5_SET(query_special_contexts_in, in, opcode, - MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - res = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out); - if (res) - return MLX5_TERMINATE_SCATTER_LIST_LKEY; - - res = MLX5_GET(query_special_contexts_out, out, - terminate_scatter_list_mkey); - return cpu_to_be32(res); -} - static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5e_rq_param *rqp, @@ -908,7 +888,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, /* check if num_frags is not a pow of two */ if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { wqe->data[f].byte_count = 0; - wqe->data[f].lkey = mlx5e_get_terminate_scatter_list_mkey(mdev); + wqe->data[f].lkey = params->terminate_lkey_be; wqe->data[f].addr = 0; } } @@ -5007,6 +4987,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* RQ */ mlx5e_build_rq_params(mdev, params); + params->terminate_lkey_be = mlx5_core_get_terminate_scatter_list_mkey(mdev); + params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 9d735c343a3b..678f0be81375 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_core.h" int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, @@ -122,3 +123,23 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) return mlx5_cmd_exec_in(dev, destroy_psv, in); } EXPORT_SYMBOL(mlx5_core_destroy_psv); + +__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + u32 mkey; + + if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey)) + return MLX5_TERMINATE_SCATTER_LIST_LKEY; + + MLX5_SET(query_special_contexts_in, in, opcode, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + if (mlx5_cmd_exec_inout(dev, query_special_contexts, in, out)) + return MLX5_TERMINATE_SCATTER_LIST_LKEY; + + mkey = MLX5_GET(query_special_contexts_out, out, + terminate_scatter_list_mkey); + return cpu_to_be32(mkey); +} +EXPORT_SYMBOL(mlx5_core_get_terminate_scatter_list_mkey); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a4c4f737f9c1..94d2be5848ae 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1093,6 +1093,7 @@ void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int npsvs, u32 *sig_index); int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); +__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); From 5d862ec631f3d3cc3b4f8cdb5b9fc5879663f1d3 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 22 May 2023 14:48:52 +0200 Subject: [PATCH 131/934] net/mlx5: Fix post parse infra to only parse every action once Caller of mlx5e_tc_act_post_parse() needs it to parse only the subset of actions starting after previous split and ending at the current action. However, that range is not provided as arguments and mlx5e_tc_act_post_parse() uses generic flow_action_for_each() that iterates over all flow actions. Not only this is redundant, it also causes a bug when mlx5e_tc_act->post_parse() callback is not idempotent since it will be called for every split. For example, ct action tc_act_post_parse_ct() callback obtains a reference to mlx5_ct_ft instance and calling it several times during parsing stage will cause reference counter imbalance. Fix the issue by providing a proper action range of the current split subset to mlx5e_tc_act_post_parse() and only calling mlx5e_tc_act->post_parse() for actions inside the subset range. Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c | 7 ++++++- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 +++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index fc923a99b6a4..0380a04c3691 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -84,7 +84,7 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, int mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, - struct flow_action *flow_action, + struct flow_action *flow_action, int from, int to, struct mlx5_flow_attr *attr, enum mlx5_flow_namespace_type ns_type) { @@ -96,6 +96,11 @@ mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, priv = parse_state->flow->priv; flow_action_for_each(i, act, flow_action) { + if (i < from) + continue; + else if (i > to) + break; + tc_act = mlx5e_tc_act_get(act->id, ns_type); if (!tc_act || !tc_act->post_parse) continue; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index 0e6e1872ac62..d6c12d0ea55b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -112,7 +112,7 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, int mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, - struct flow_action *flow_action, + struct flow_action *flow_action, int from, int to, struct mlx5_flow_attr *attr, enum mlx5_flow_namespace_type ns_type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8935156f8f4e..8a5a8703f0a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3859,8 +3859,8 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5_flow_attr *prev_attr; struct flow_action_entry *act; struct mlx5e_tc_act *tc_act; + int err, i, i_split = 0; bool is_missable; - int err, i; ns_type = mlx5e_get_flow_namespace(flow); list_add(&attr->list, &flow->attrs); @@ -3901,7 +3901,8 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, i < flow_action->num_entries - 1)) { is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false; - err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); + err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, + ns_type); if (err) goto out_free_post_acts; @@ -3911,6 +3912,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, goto out_free_post_acts; } + i_split = i + 1; list_add(&attr->list, &flow->attrs); } @@ -3925,7 +3927,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, } } - err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); + err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, ns_type); if (err) goto out_free_post_acts; From bdf274750fca17b289404ef03453c4070725302c Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 13 Oct 2021 14:39:24 +0300 Subject: [PATCH 132/934] net/mlx5e: Don't attach netdev profile while handling internal error As part of switchdev mode disablement, driver changes port netdevice profile from uplink to nic. If this process is triggered by health recovery flow (PCI reset, for ex.) profile attach would fail because all fw commands aborted when internal error flag is set. As a result, nic netdevice profile is not attached and driver fails to rollback to uplink profile, which leave driver in broken state and cause crash later. To handle broken state do netdevice profile initialization only instead of full attachment and release mdev resources on driver suspend as expected. Actual netdevice attachment is done during driver load. Fixes: c4d7eb57687f ("net/mxl5e: Add change profile method") Signed-off-by: Dmytro Linkin Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 35 ++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0235adcbc609..a07bbe9a61be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5833,8 +5833,8 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv) } static int -mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, - const struct mlx5e_profile *new_profile, void *new_ppriv) +mlx5e_netdev_init_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, + const struct mlx5e_profile *new_profile, void *new_ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; @@ -5850,6 +5850,25 @@ mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mde err = new_profile->init(priv->mdev, priv->netdev); if (err) goto priv_cleanup; + + return 0; + +priv_cleanup: + mlx5e_priv_cleanup(priv); + return err; +} + +static int +mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, + const struct mlx5e_profile *new_profile, void *new_ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + err = mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); + if (err) + return err; + err = mlx5e_attach_netdev(priv); if (err) goto profile_cleanup; @@ -5857,7 +5876,6 @@ mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mde profile_cleanup: new_profile->cleanup(priv); -priv_cleanup: mlx5e_priv_cleanup(priv); return err; } @@ -5876,6 +5894,12 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, priv->profile->cleanup(priv); mlx5e_priv_cleanup(priv); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + return -EIO; + } + err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv); if (err) { /* roll back to original profile */ netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err); @@ -5937,8 +5961,11 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - if (!netif_device_present(netdev)) + if (!netif_device_present(netdev)) { + if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) + mlx5e_destroy_mdev_resources(mdev); return -ENODEV; + } mlx5e_detach_netdev(priv); mlx5e_destroy_mdev_resources(mdev); From c4c24fc30cc417ace332ceceaba4f70f81dcd521 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 16 May 2023 02:28:02 +0000 Subject: [PATCH 133/934] net/mlx5e: Move Ethernet driver debugfs to profile init callback As priv->dfs_root is cleared, and therefore missed, when change eswitch mode, move the creation of the root debugfs to the init callback of mlx5e_nic_profile and mlx5e_uplink_rep_profile, and the destruction to the cleanup callback for symmeter. Fixes: 288eca60cc31 ("net/mlx5e: Add Ethernet driver debugfs") Signed-off-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 ++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a07bbe9a61be..a7c526ee5024 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5261,12 +5261,16 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5e_timestamp_init(priv); + priv->dfs_root = debugfs_create_dir("nic", + mlx5_debugfs_get_dev_root(mdev)); + fs = mlx5e_fs_init(priv->profile, mdev, !test_bit(MLX5E_STATE_DESTROYING, &priv->state), priv->dfs_root); if (!fs) { err = -ENOMEM; mlx5_core_err(mdev, "FS initialization failed, %d\n", err); + debugfs_remove_recursive(priv->dfs_root); return err; } priv->fs = fs; @@ -5287,6 +5291,7 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) mlx5e_health_destroy_reporters(priv); mlx5e_ktls_cleanup(priv); mlx5e_fs_cleanup(priv->fs); + debugfs_remove_recursive(priv->dfs_root); priv->fs = NULL; } @@ -6011,9 +6016,6 @@ static int mlx5e_probe(struct auxiliary_device *adev, priv->profile = profile; priv->ppriv = NULL; - priv->dfs_root = debugfs_create_dir("nic", - mlx5_debugfs_get_dev_root(priv->mdev)); - err = profile->init(mdev, netdev); if (err) { mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); @@ -6042,7 +6044,6 @@ err_resume: err_profile_cleanup: profile->cleanup(priv); err_destroy_netdev: - debugfs_remove_recursive(priv->dfs_root); mlx5e_destroy_netdev(priv); err_devlink_port_unregister: mlx5e_devlink_port_unregister(mlx5e_dev); @@ -6062,7 +6063,6 @@ static void mlx5e_remove(struct auxiliary_device *adev) unregister_netdev(priv->netdev); mlx5e_suspend(adev, state); priv->profile->cleanup(priv); - debugfs_remove_recursive(priv->dfs_root); mlx5e_destroy_netdev(priv); mlx5e_devlink_port_unregister(mlx5e_dev); mlx5e_destroy_devlink(mlx5e_dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1fc386eccaf8..3e7041bd5705 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -812,11 +813,15 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, { struct mlx5e_priv *priv = netdev_priv(netdev); + priv->dfs_root = debugfs_create_dir("nic", + mlx5_debugfs_get_dev_root(mdev)); + priv->fs = mlx5e_fs_init(priv->profile, mdev, !test_bit(MLX5E_STATE_DESTROYING, &priv->state), priv->dfs_root); if (!priv->fs) { netdev_err(priv->netdev, "FS allocation failed\n"); + debugfs_remove_recursive(priv->dfs_root); return -ENOMEM; } @@ -829,6 +834,7 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) { mlx5e_fs_cleanup(priv->fs); + debugfs_remove_recursive(priv->dfs_root); priv->fs = NULL; } From fe5c2d3aef9352ac36a6acbc2bff5f732211ce3b Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 17 May 2023 17:54:30 +0300 Subject: [PATCH 134/934] net/mlx5: DR, Add missing mutex init/destroy in pattern manager Add missing mutex init/destroy as caught by the lock's debug warning: DEBUG_LOCKS_WARN_ON(lock->magic != lock) Fixes: da5d0027d666 ("net/mlx5: DR, Add cache for modify header pattern") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c index 13e06a6a6b22..d6947fe13d56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c @@ -213,6 +213,8 @@ struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn) } INIT_LIST_HEAD(&mgr->ptrn_list); + mutex_init(&mgr->modify_hdr_mutex); + return mgr; free_mgr: @@ -237,5 +239,6 @@ void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr) } mlx5dr_icm_pool_destroy(mgr->ptrn_icm_pool); + mutex_destroy(&mgr->modify_hdr_mutex); kfree(mgr); } From d5972f1000c1e6b5185ded0fc194f21984f26e14 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 19 May 2023 22:43:03 +0300 Subject: [PATCH 135/934] net/mlx5: Fix check for allocation failure in comp_irqs_request_pci() This function accidentally dereferences "cpus" instead of returning directly. Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202305200354.KV3jU94w-lkp@intel.com/ Fixes: b48a0f72bc3e ("net/mlx5: Refactor completion irq request/release code") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index fe698c79616c..3db4866d7880 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -824,7 +824,7 @@ static int comp_irqs_request_pci(struct mlx5_core_dev *dev) ncomp_eqs = table->num_comp_eqs; cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); if (!cpus) - ret = -ENOMEM; + return -ENOMEM; i = 0; rcu_read_lock(); From c3acc46c602f1aa0449ea687057758e5224ae3da Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 10 May 2023 10:54:12 +0700 Subject: [PATCH 136/934] Documentation: net/mlx5: Wrap vnic reporter devlink commands in code blocks Sphinx reports htmldocs warnings: Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst:287: WARNING: Unexpected indentation. Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst:288: WARNING: Block quote ends without a blank line; unexpected unindent. Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst:290: WARNING: Unexpected indentation. Fix above warnings by wrapping diagnostic devlink commands in "vnic reporter" section in code blocks to be consistent with other devlink command snippets. Fixes: b0bc615df488ab ("net/mlx5: Add vnic devlink health reporter to PFs/VFs") Fixes: cf14af140a5ad0 ("net/mlx5e: Add vnic devlink health reporter to representors") Reviewed-by: Leon Romanovsky Signed-off-by: Bagas Sanjaya Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- .../device_drivers/ethernet/mellanox/mlx5/devlink.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst index 3a7a714cc08f..0f0598caea14 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst @@ -283,10 +283,14 @@ nic_receive_steering_discard: number of packets that completed RX flow steering but were discarded due to a mismatch in flow table. User commands examples: -- Diagnose PF/VF vnic counters + +- Diagnose PF/VF vnic counters:: + $ devlink health diagnose pci/0000:82:00.1 reporter vnic + - Diagnose representor vnic counters (performed by supplying devlink port of the - representor, which can be obtained via devlink port command) + representor, which can be obtained via devlink port command):: + $ devlink health diagnose pci/0000:82:00.1/65537 reporter vnic NOTE: This command can run over all interfaces such as PF/VF and representor ports. From 565b8c60e90f7f2d4763f5979c7aaa43697ab950 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 10 May 2023 10:54:13 +0700 Subject: [PATCH 137/934] Documentation: net/mlx5: Use bullet and definition lists for vnic counters description "vnic reporter" section contains unformatted description for vnic counters, which is rendered as one long paragraph instead of list. Use bullet and definition lists to match other lists. Fixes: b0bc615df488ab ("net/mlx5: Add vnic devlink health reporter to PFs/VFs") Reviewed-by: Leon Romanovsky Signed-off-by: Bagas Sanjaya Reviewed-by: Simon Horman Reviewed-by: Daniel Machon Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/devlink.rst | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst index 0f0598caea14..00687425d8b7 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst @@ -265,22 +265,26 @@ It is responsible for querying the vnic diagnostic counters from fw and displayi them in realtime. Description of the vnic counters: -total_q_under_processor_handle: number of queues in an error state due to -an async error or errored command. -send_queue_priority_update_flow: number of QP/SQ priority/SL update -events. -cq_overrun: number of times CQ entered an error state due to an -overflow. -async_eq_overrun: number of times an EQ mapped to async events was -overrun. -comp_eq_overrun: number of times an EQ mapped to completion events was -overrun. -quota_exceeded_command: number of commands issued and failed due to quota -exceeded. -invalid_command: number of commands issued and failed dues to any reason -other than quota exceeded. -nic_receive_steering_discard: number of packets that completed RX flow -steering but were discarded due to a mismatch in flow table. + +- total_q_under_processor_handle + number of queues in an error state due to + an async error or errored command. +- send_queue_priority_update_flow + number of QP/SQ priority/SL update events. +- cq_overrun + number of times CQ entered an error state due to an overflow. +- async_eq_overrun + number of times an EQ mapped to async events was overrun. + comp_eq_overrun number of times an EQ mapped to completion events was + overrun. +- quota_exceeded_command + number of commands issued and failed due to quota exceeded. +- invalid_command + number of commands issued and failed dues to any reason other than quota + exceeded. +- nic_receive_steering_discard + number of packets that completed RX flow + steering but were discarded due to a mismatch in flow table. User commands examples: From 92059da65d84227fcc7cabbd96c0cca19880f4ff Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 10 May 2023 10:54:14 +0700 Subject: [PATCH 138/934] Documentation: net/mlx5: Add blank line separator before numbered lists The doc forgets to add separator before numbered lists, which causes the lists to be appended to previous paragraph inline instead. Add the missing separator. Fixes: f2d51e579359b7 ("net/mlx5: Separate mlx5 driver documentation into multiple pages") Reviewed-by: Leon Romanovsky Signed-off-by: Bagas Sanjaya Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- .../device_drivers/ethernet/mellanox/mlx5/devlink.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst index 00687425d8b7..f962c0975d84 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst @@ -40,6 +40,7 @@ flow_steering_mode: Device flow steering mode --------------------------------------------- The flow steering mode parameter controls the flow steering mode of the driver. Two modes are supported: + 1. 'dmfs' - Device managed flow steering. 2. 'smfs' - Software/Driver managed flow steering. @@ -99,6 +100,7 @@ between representors and stacked devices. By default metadata is enabled on the supported devices in E-switch. Metadata is applicable only for E-switch in switchdev mode and users may disable it when NONE of the below use cases will be in use: + 1. HCA is in Dual/multi-port RoCE mode. 2. VF/SF representor bonding (Usually used for Live migration) 3. Stacked devices From bb72b94c659f5032850e9ab070d850bc743e3a0e Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 10 May 2023 10:54:15 +0700 Subject: [PATCH 139/934] Documentation: net/mlx5: Wrap notes in admonition blocks Wrap note paragraphs in note:: directive as it better fit for the purpose of noting devlink commands. Fixes: f2d51e579359b7 ("net/mlx5: Separate mlx5 driver documentation into multiple pages") Fixes: cf14af140a5ad0 ("net/mlx5e: Add vnic devlink health reporter to representors") Reviewed-by: Leon Romanovsky Signed-off-by: Bagas Sanjaya Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/devlink.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst index f962c0975d84..3354ca3608ee 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst @@ -182,7 +182,8 @@ User commands examples: $ devlink health diagnose pci/0000:82:00.0 reporter tx -NOTE: This command has valid output only when interface is up, otherwise the command has empty output. +.. note:: + This command has valid output only when interface is up, otherwise the command has empty output. - Show number of tx errors indicated, number of recover flows ended successfully, is autorecover enabled and graceful period from last recover:: @@ -234,8 +235,9 @@ User commands examples: $ devlink health dump show pci/0000:82:00.0 reporter fw -NOTE: This command can run only on the PF which has fw tracer ownership, -running it on other PF or any VF will return "Operation not permitted". +.. note:: + This command can run only on the PF which has fw tracer ownership, + running it on other PF or any VF will return "Operation not permitted". fw fatal reporter ----------------- @@ -258,7 +260,8 @@ User commands examples: $ devlink health dump show pci/0000:82:00.1 reporter fw_fatal -NOTE: This command can run only on PF. +.. note:: + This command can run only on PF. vnic reporter ------------- @@ -299,4 +302,5 @@ User commands examples: $ devlink health diagnose pci/0000:82:00.1/65537 reporter vnic -NOTE: This command can run over all interfaces such as PF/VF and representor ports. +.. note:: + This command can run over all interfaces such as PF/VF and representor ports. From 3a735530c159b75e1402c08abe1ba4eb99a1f7a3 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 15 May 2023 17:19:29 -0700 Subject: [PATCH 140/934] arm64: dts: qcom: sc7180-lite: Fix SDRAM freq for misidentified sc7180-lite boards In general, the three SKUs of sc7180 (lite, normal, and pro) are handled dynamically. The cpufreq table in sc7180.dtsi includes the superset of all CPU frequencies. The "qcom-cpufreq-hw" driver in Linux shows that we can dynamically detect which frequencies are actually available on the currently running CPU and then we can just enable those ones. The GPU is similarly dynamic. The nvmem has a fuse in it (see "gpu_speed_bin" in sc7180.dtsi) that the GPU driver can use to figure out which frequencies to enable. There is one part, however, that is not so dynamic. The way SDRAM frequency works in sc7180 is that it's tied to cpufreq. At the busiest cpufreq operating points we'll pick the top supported SDRAM frequency. They ramp down together. For the "pro" SKU of sc7180, we only enable one extra cpufreq step. That extra cpufreq step runs SDRAM at the same speed as the step below. Thus, for normal and pro things are OK. There is no sc7180-pro device tree snippet. For the "lite" SKU if sc7180, however, things aren't so easy. The "lite" SKU drops 3 cpufreq entries but can still run SDRAM at max frequency. That messed things up with the whole scheme. This is why we added the "sc7180-lite" fragment in commit 8fd01e01fd6f ("arm64: dts: qcom: sc7180-lite: Tweak DDR/L3 scaling on SC7180-lite"). When the lite scheme came about, it was agreed that the WiFi SKUs of lazor would _always_ be "lite" and would, in fact, be the only "lite" devices. Unfortunately, this decision changed and folks didn't realize that it would be a problem. Specifically, some later lazor WiFi-only devices were built with "pro" CPUs. Building WiFi-only lazor with "pro" CPUs isn't the end of the world. The SDRAM will ramp up a little sooner than it otherwise would, but aside from a small power hit things work OK. One problem, though, is that the SDRAM scaling becomes a bit quirky. Specifically, with the current tables we'll max out SDRAM frequency at 2.1GHz but then _lower_ it at 2.2GHz / 2.3GHz only to raise it back to max for 2.4GHz and 2.55GHz. Let's at least fix this so that the SDRAM frequency doesn't go down in that quirky way. On true "lite" SKUs this change will be a no-op because the operating points we're touching are disabled. This change is only useful when a board that thinks it has a "lite" CPU actually has a "normal" or "pro" one stuffed. Fixes: 8fd01e01fd6f ("arm64: dts: qcom: sc7180-lite: Tweak DDR/L3 scaling on SC7180-lite") Signed-off-by: Douglas Anderson Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230515171929.1.Ic8dee2cb79ce39ffc04eab2a344dde47b2f9459f@changeid --- arch/arm64/boot/dts/qcom/sc7180-lite.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi b/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi index d8ed1d7b4ec7..4b306a59d9be 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-lite.dtsi @@ -16,3 +16,11 @@ &cpu6_opp12 { opp-peak-kBps = <8532000 23347200>; }; + +&cpu6_opp13 { + opp-peak-kBps = <8532000 23347200>; +}; + +&cpu6_opp14 { + opp-peak-kBps = <8532000 23347200>; +}; From b317cebff59d9994ba041240654acb3c06b2f1f0 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 17 May 2023 04:18:49 +0200 Subject: [PATCH 141/934] dt-bindings: cache: qcom,llcc: Fix SM8550 description SM8550 (LLCCv4.1) has 4 register regions, this was not described between its addition and the restructurization that happened in the commit referenced in the fixes tag. Fix it. Fixes: 43aa006e074c ("dt-bindings: arm: msm: Fix register regions used for LLCC banks") Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Acked-by: Manivannan Sadhasivam Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230517-topic-kailua-llcc-v1-1-d57bd860c43e@linaro.org --- Documentation/devicetree/bindings/cache/qcom,llcc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml index d8b91944180a..44892aa589fd 100644 --- a/Documentation/devicetree/bindings/cache/qcom,llcc.yaml +++ b/Documentation/devicetree/bindings/cache/qcom,llcc.yaml @@ -129,6 +129,7 @@ allOf: - qcom,sm8250-llcc - qcom,sm8350-llcc - qcom,sm8450-llcc + - qcom,sm8550-llcc then: properties: reg: From 661a4f089317c877aecd598fb70cd46510cc8d29 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 17 May 2023 04:18:50 +0200 Subject: [PATCH 142/934] arm64: dts: qcom: sm8550: Use the correct LLCC register scheme During the ABI-breaking (for good reasons) conversion of the LLCC register description, SM8550 was not taken into account, resulting in LLCC being broken on any kernel containing the patch referenced in the fixes tag. Fix it by describing the regions properly. Fixes: ee13b5008707 ("qcom: llcc/edac: Fix the base address used for accessing LLCC banks") Signed-off-by: Konrad Dybcio Acked-by: Manivannan Sadhasivam Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230517-topic-kailua-llcc-v1-2-d57bd860c43e@linaro.org --- arch/arm64/boot/dts/qcom/sm8550.dtsi | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sm8550.dtsi b/arch/arm64/boot/dts/qcom/sm8550.dtsi index 4c6b2c582b27..558cbc430708 100644 --- a/arch/arm64/boot/dts/qcom/sm8550.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8550.dtsi @@ -3771,9 +3771,16 @@ system-cache-controller@25000000 { compatible = "qcom,sm8550-llcc"; - reg = <0 0x25000000 0 0x800000>, + reg = <0 0x25000000 0 0x200000>, + <0 0x25200000 0 0x200000>, + <0 0x25400000 0 0x200000>, + <0 0x25600000 0 0x200000>, <0 0x25800000 0 0x200000>; - reg-names = "llcc_base", "llcc_broadcast_base"; + reg-names = "llcc0_base", + "llcc1_base", + "llcc2_base", + "llcc3_base", + "llcc_broadcast_base"; interrupts = ; }; From 095aabe338d166f3a9c87bcc9b9b84ba80fdaddf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 22 May 2023 08:24:49 +0200 Subject: [PATCH 143/934] efi/libstub: zboot: Avoid eager evaluation of objcopy flags The Make variable containing the objcopy flags may be constructed from the output of build tools operating on build artifacts, and these may not exist when doing a make clean. So avoid evaluating them eagerly, to prevent spurious build warnings. Suggested-by: Pedro Falcato Tested-by: Alan Bartlett Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/Makefile.zboot | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/Makefile.zboot b/drivers/firmware/efi/libstub/Makefile.zboot index 89ef820f3b34..2c489627a807 100644 --- a/drivers/firmware/efi/libstub/Makefile.zboot +++ b/drivers/firmware/efi/libstub/Makefile.zboot @@ -32,7 +32,8 @@ zboot-size-len-$(CONFIG_KERNEL_GZIP) := 0 $(obj)/vmlinuz: $(obj)/vmlinux.bin FORCE $(call if_changed,$(zboot-method-y)) -OBJCOPYFLAGS_vmlinuz.o := -I binary -O $(EFI_ZBOOT_BFD_TARGET) $(EFI_ZBOOT_OBJCOPY_FLAGS) \ +# avoid eager evaluation to prevent references to non-existent build artifacts +OBJCOPYFLAGS_vmlinuz.o = -I binary -O $(EFI_ZBOOT_BFD_TARGET) $(EFI_ZBOOT_OBJCOPY_FLAGS) \ --rename-section .data=.gzdata,load,alloc,readonly,contents $(obj)/vmlinuz.o: $(obj)/vmlinuz FORCE $(call if_changed,objcopy) From fd936fd8ac105ba3eb764185e8ba483c789c893e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 May 2023 21:01:30 +0200 Subject: [PATCH 144/934] efi: fix missing prototype warnings The cper.c file needs to include an extra header, and efi_zboot_entry needs an extern declaration to avoid these 'make W=1' warnings: drivers/firmware/efi/libstub/zboot.c:65:1: error: no previous prototype for 'efi_zboot_entry' [-Werror=missing-prototypes] drivers/firmware/efi/efi.c:176:16: error: no previous prototype for 'efi_attr_is_visible' [-Werror=missing-prototypes] drivers/firmware/efi/cper.c:626:6: error: no previous prototype for 'cper_estatus_print' [-Werror=missing-prototypes] drivers/firmware/efi/cper.c:649:5: error: no previous prototype for 'cper_estatus_check_header' [-Werror=missing-prototypes] drivers/firmware/efi/cper.c:662:5: error: no previous prototype for 'cper_estatus_check' [-Werror=missing-prototypes] To make this easier, move the cper specific declarations to include/linux/cper.h. Signed-off-by: Arnd Bergmann Acked-by: Rafael J. Wysocki Signed-off-by: Ard Biesheuvel --- drivers/acpi/apei/apei-internal.h | 6 ------ drivers/acpi/apei/bert.c | 1 + drivers/firmware/efi/libstub/efistub.h | 3 +++ include/linux/cper.h | 6 ++++++ include/linux/efi.h | 2 ++ 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h index 1d6ef9654725..67c2c3b959e1 100644 --- a/drivers/acpi/apei/apei-internal.h +++ b/drivers/acpi/apei/apei-internal.h @@ -7,7 +7,6 @@ #ifndef APEI_INTERNAL_H #define APEI_INTERNAL_H -#include #include struct apei_exec_context; @@ -130,10 +129,5 @@ static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus) return sizeof(*estatus) + estatus->data_length; } -void cper_estatus_print(const char *pfx, - const struct acpi_hest_generic_status *estatus); -int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus); -int cper_estatus_check(const struct acpi_hest_generic_status *estatus); - int apei_osc_setup(void); #endif diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c index c23eb75866d0..7514e38d5640 100644 --- a/drivers/acpi/apei/bert.c +++ b/drivers/acpi/apei/bert.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "apei-internal.h" diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 67d5a20802e0..54a2822cae77 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -1133,4 +1133,7 @@ const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record, void efi_remap_image(unsigned long image_base, unsigned alloc_size, unsigned long code_size); +asmlinkage efi_status_t __efiapi +efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab); + #endif diff --git a/include/linux/cper.h b/include/linux/cper.h index eacb7dd7b3af..c1a7dc325121 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -572,4 +572,10 @@ void cper_print_proc_ia(const char *pfx, int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg); int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg); +struct acpi_hest_generic_status; +void cper_estatus_print(const char *pfx, + const struct acpi_hest_generic_status *estatus); +int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus); +int cper_estatus_check(const struct acpi_hest_generic_status *estatus); + #endif diff --git a/include/linux/efi.h b/include/linux/efi.h index 7aa62c92185f..571d1a6e1b74 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1338,4 +1338,6 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table) return xen_efi_config_table_is_usable(guid, table); } +umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n); + #endif /* _LINUX_EFI_H */ From 173d42721b4661327bf4dd8301714417695506c2 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 1 May 2023 13:44:41 -0700 Subject: [PATCH 145/934] drm/msm: Set max segment size earlier Fixes the following splat on a6xx gen2+ (a640, a650, a660 families), a6xx gen1 has smaller GMU allocations so they fit under the default 64K max segment size. ------------[ cut here ]------------ DMA-API: msm_dpu ae01000.display-controller: mapping sg segment longer than device claims to support [len=126976] [max=65536] WARNING: CPU: 5 PID: 9 at kernel/dma/debug.c:1160 debug_dma_map_sg+0x288/0x314 Modules linked in: CPU: 5 PID: 9 Comm: kworker/u16:0 Not tainted 6.3.0-rc2-debug+ #629 Hardware name: Google Villager (rev1+) with LTE (DT) Workqueue: events_unbound deferred_probe_work_func pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : debug_dma_map_sg+0x288/0x314 lr : debug_dma_map_sg+0x288/0x314 sp : ffffffc00809b560 x29: ffffffc00809b560 x28: 0000000000000060 x27: 0000000000000000 x26: 0000000000010000 x25: 0000000000000004 x24: 0000000000000004 x23: ffffffffffffffff x22: ffffffdb31693cc0 x21: ffffff8080935800 x20: ffffff8087417400 x19: ffffff8087a45010 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000010000 x14: 0000000000000001 x13: ffffffffffffffff x12: ffffffffffffffff x11: 0000000000000000 x10: 000000000000000a x9 : ffffffdb2ff05e14 x8 : ffffffdb31275000 x7 : ffffffdb2ff08908 x6 : 0000000000000000 x5 : 0000000000000001 x4 : ffffffdb2ff08a74 x3 : ffffffdb31275008 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffffff80803a9a80 Call trace: debug_dma_map_sg+0x288/0x314 __dma_map_sg_attrs+0x80/0xe4 dma_map_sgtable+0x30/0x4c get_pages+0x1d4/0x1e4 msm_gem_pin_pages_locked+0xbc/0xf8 msm_gem_pin_vma_locked+0x58/0xa0 msm_gem_get_and_pin_iova_range+0x98/0xac a6xx_gmu_memory_alloc+0x7c/0x128 a6xx_gmu_init+0x16c/0x9b0 a6xx_gpu_init+0x38c/0x3e4 adreno_bind+0x214/0x264 component_bind_all+0x128/0x1f8 msm_drm_bind+0x2b8/0x608 try_to_bring_up_aggregate_device+0x88/0x1a4 __component_add+0xec/0x13c component_add+0x1c/0x28 dp_display_probe+0x3f8/0x43c platform_probe+0x70/0xc4 really_probe+0x148/0x280 __driver_probe_device+0xc8/0xe0 driver_probe_device+0x44/0x100 __device_attach_driver+0x64/0xdc bus_for_each_drv+0xb0/0xd8 __device_attach+0xd8/0x168 device_initial_probe+0x1c/0x28 bus_probe_device+0x44/0xb0 deferred_probe_work_func+0xc8/0xe0 process_one_work+0x2e0/0x488 process_scheduled_works+0x4c/0x50 worker_thread+0x218/0x274 kthread+0xf0/0x100 ret_from_fork+0x10/0x20 irq event stamp: 293712 hardirqs last enabled at (293711): [] vprintk_emit+0x160/0x25c hardirqs last disabled at (293712): [] el1_dbg+0x24/0x80 softirqs last enabled at (279520): [] __do_softirq+0x21c/0x4bc softirqs last disabled at (279515): [] ____do_softirq+0x18/0x24 ---[ end trace 0000000000000000 ]--- Signed-off-by: Rob Clark Fixes: db735fc4036b ("drm/msm: Set dma maximum segment size for mdss") Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/534892/ Link: https://lore.kernel.org/r/20230501204441.1642741-1-robdclark@gmail.com --- drivers/gpu/drm/msm/msm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index b4cfa44a8a5c..463ca4164f5f 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -449,6 +449,8 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (ret) goto err_cleanup_mode_config; + dma_set_max_seg_size(dev, UINT_MAX); + /* Bind all our sub-components: */ ret = component_bind_all(dev, ddev); if (ret) @@ -459,8 +461,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) if (ret) goto err_msm_uninit; - dma_set_max_seg_size(dev, UINT_MAX); - msm_gem_shrinker_init(ddev); if (priv->kms_init) { From b78c77273a5648eddc02f275f582b681a5127711 Mon Sep 17 00:00:00 2001 From: Abhinav Kumar Date: Thu, 27 Apr 2023 16:28:47 -0700 Subject: [PATCH 146/934] drm/msm/dp: add module parameter for PSR On sc7280 where eDP is the primary display, PSR is causing IGT breakage even for basic test cases like kms_atomic and kms_atomic_transition. Most often the issue starts with below stack so providing that as reference Call trace: dpu_encoder_assign_crtc+0x64/0x6c dpu_crtc_enable+0x188/0x204 drm_atomic_helper_commit_modeset_enables+0xc0/0x274 msm_atomic_commit_tail+0x1a8/0x68c commit_tail+0xb0/0x160 drm_atomic_helper_commit+0x11c/0x124 drm_atomic_commit+0xb0/0xdc drm_atomic_connector_commit_dpms+0xf4/0x110 drm_mode_obj_set_property_ioctl+0x16c/0x3b0 drm_connector_property_set_ioctl+0x4c/0x74 drm_ioctl_kernel+0xec/0x15c drm_ioctl+0x264/0x408 __arm64_sys_ioctl+0x9c/0xd4 invoke_syscall+0x4c/0x110 el0_svc_common+0x94/0xfc do_el0_svc+0x3c/0xb0 el0_svc+0x2c/0x7c el0t_64_sync_handler+0x48/0x114 el0t_64_sync+0x190/0x194 ---[ end trace 0000000000000000 ]--- [drm-dp] dp_ctrl_push_idle: PUSH_IDLE pattern timedout Other basic use-cases still seem to work fine hence add a a module parameter to allow toggling psr enable/disable till PSR related issues are hashed out with IGT. Signed-off-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Acked-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/534420/ Link: https://lore.kernel.org/r/20230427232848.5200-1-quic_abhinavk@quicinc.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 99a38dbe51c0..37e0d12b3319 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -28,6 +28,10 @@ #include "dp_audio.h" #include "dp_debug.h" +static bool psr_enabled = false; +module_param(psr_enabled, bool, 0); +MODULE_PARM_DESC(psr_enabled, "enable PSR for eDP and DP displays"); + #define HPD_STRING_SIZE 30 enum { @@ -407,7 +411,7 @@ static int dp_display_process_hpd_high(struct dp_display_private *dp) edid = dp->panel->edid; - dp->dp_display.psr_supported = dp->panel->psr_cap.version; + dp->dp_display.psr_supported = dp->panel->psr_cap.version && psr_enabled; dp->audio_supported = drm_detect_monitor_audio(edid); dp_panel_handle_sink_request(dp->panel); From 1dd5483af494216e91c9a12f11992bcba483f944 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 24 May 2023 15:12:29 -0500 Subject: [PATCH 147/934] smb3: update a reviewer email in MAINTAINERS file Paulo has a new email address so update maintainers files. Signed-off-by: Steve French --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 207a65905f5e..839cc375b205 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5130,7 +5130,7 @@ X: drivers/clk/clkdev.c COMMON INTERNET FILE SYSTEM CLIENT (CIFS and SMB3) M: Steve French -R: Paulo Alcantara (DFS, global name space) +R: Paulo Alcantara (DFS, global name space) R: Ronnie Sahlberg (directory leases, sparse files) R: Shyam Prasad N (multichannel) R: Tom Talpey (RDMA, smbdirect) From b535cc796a4b4942cd189652588e8d37c1f5925a Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 May 2023 18:53:28 -0500 Subject: [PATCH 148/934] smb3: missing null check in SMB2_change_notify If plen is null when passed in, we only checked for null in one of the two places where it could be used. Although plen is always valid (not null) for current callers of the SMB2_change_notify function, this change makes it more consistent. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/202305251831.3V1gbbFs-lkp@intel.com/ Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 9ed61b6f9b21..7063b395d22f 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -3725,7 +3725,7 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, if (*out_data == NULL) { rc = -ENOMEM; goto cnotify_exit; - } else + } else if (plen) *plen = le32_to_cpu(smb_rsp->OutputBufferLength); } From d68cb7cf1fd0ef4287bc0ecd1ed0b6ae8e05fc70 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 24 May 2023 21:49:08 +0200 Subject: [PATCH 149/934] net: mellanox: mlxbf_gige: Fix skb_panic splat under memory pressure Do skb_put() after a new skb has been successfully allocated otherwise the reused skb leads to skb_panics or incorrect packet sizes. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Signed-off-by: Thomas Bogendoerfer Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230524194908.147145-1-tbogendoerfer@suse.de Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c index afa3b92a6905..0d5a41a2ae01 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c @@ -245,12 +245,6 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts) skb = priv->rx_skb[rx_pi_rem]; - skb_put(skb, datalen); - - skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */ - - skb->protocol = eth_type_trans(skb, netdev); - /* Alloc another RX SKB for this same index */ rx_skb = mlxbf_gige_alloc_skb(priv, MLXBF_GIGE_DEFAULT_BUF_SZ, &rx_buf_dma, DMA_FROM_DEVICE); @@ -259,6 +253,13 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts) priv->rx_skb[rx_pi_rem] = rx_skb; dma_unmap_single(priv->dev, *rx_wqe_addr, MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE); + + skb_put(skb, datalen); + + skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */ + + skb->protocol = eth_type_trans(skb, netdev); + *rx_wqe_addr = rx_buf_dma; } else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MAC_ERR) { priv->stats.rx_mac_errors++; From ffb3322181d9e8db880202e4f00991764a35d812 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 24 May 2023 20:57:14 +0800 Subject: [PATCH 150/934] net: stmmac: fix call trace when stmmac_xdp_xmit() is invoked We encountered a kernel call trace issue which was related to ndo_xdp_xmit callback on our i.MX8MP platform. The reproduce steps show as follows. 1. The FEC port (eth0) connects to a PC port, and the PC uses pktgen_sample03_burst_single_flow.sh to generate packets and send these packets to the FEC port. Notice that the script must be executed before step 2. 2. Run the "./xdp_redirect eth0 eth1" command on i.MX8MP, the eth1 interface is the dwmac. Then there will be a call trace issue soon. Please see the log for more details. The root cause is that the NETDEV_XDP_ACT_NDO_XMIT feature is enabled by default, so when the step 2 command is exexcuted and packets have already been sent to eth0, the stmmac_xdp_xmit() starts running before the stmmac_xdp_set_prog() finishes. To resolve this issue, we disable the NETDEV_XDP_ACT_NDO_XMIT feature by default and turn on/off this feature when the bpf program is installed/uninstalled which just like the other ethernet drivers. Call Trace log: [ 306.311271] ------------[ cut here ]------------ [ 306.315910] WARNING: CPU: 0 PID: 15 at lib/timerqueue.c:55 timerqueue_del+0x68/0x70 [ 306.323590] Modules linked in: [ 306.326654] CPU: 0 PID: 15 Comm: ksoftirqd/0 Not tainted 6.4.0-rc1+ #37 [ 306.333277] Hardware name: NXP i.MX8MPlus EVK board (DT) [ 306.338591] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 306.345561] pc : timerqueue_del+0x68/0x70 [ 306.349577] lr : __remove_hrtimer+0x5c/0xa0 [ 306.353777] sp : ffff80000b7c3920 [ 306.357094] x29: ffff80000b7c3920 x28: 0000000000000000 x27: 0000000000000001 [ 306.364244] x26: ffff80000a763a40 x25: ffff0000d0285a00 x24: 0000000000000001 [ 306.371390] x23: 0000000000000001 x22: ffff000179389a40 x21: 0000000000000000 [ 306.378537] x20: ffff000179389aa0 x19: ffff0000d2951308 x18: 0000000000001000 [ 306.385686] x17: f1d3000000000000 x16: 00000000c39c1000 x15: 55e99bbe00001a00 [ 306.392835] x14: 09000900120aa8c0 x13: e49af1d300000000 x12: 000000000000c39c [ 306.399987] x11: 100055e99bbe0000 x10: ffff8000090b1048 x9 : ffff8000081603fc [ 306.407133] x8 : 000000000000003c x7 : 000000000000003c x6 : 0000000000000001 [ 306.414284] x5 : ffff0000d2950980 x4 : 0000000000000000 x3 : 0000000000000000 [ 306.421432] x2 : 0000000000000001 x1 : ffff0000d2951308 x0 : ffff0000d2951308 [ 306.428585] Call trace: [ 306.431035] timerqueue_del+0x68/0x70 [ 306.434706] __remove_hrtimer+0x5c/0xa0 [ 306.438549] hrtimer_start_range_ns+0x2bc/0x370 [ 306.443089] stmmac_xdp_xmit+0x174/0x1b0 [ 306.447021] bq_xmit_all+0x194/0x4b0 [ 306.450612] __dev_flush+0x4c/0x98 [ 306.454024] xdp_do_flush+0x18/0x38 [ 306.457522] fec_enet_rx_napi+0x6c8/0xc68 [ 306.461539] __napi_poll+0x40/0x220 [ 306.465038] net_rx_action+0xf8/0x240 [ 306.468707] __do_softirq+0x128/0x3a8 [ 306.472378] run_ksoftirqd+0x40/0x58 [ 306.475961] smpboot_thread_fn+0x1c4/0x288 [ 306.480068] kthread+0x124/0x138 [ 306.483305] ret_from_fork+0x10/0x20 [ 306.486889] ---[ end trace 0000000000000000 ]--- Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") Signed-off-by: Wei Fang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230524125714.357337-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +-- drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c | 6 ++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0fca81507a77..52cab9de05f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7233,8 +7233,7 @@ int stmmac_dvr_probe(struct device *device, ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_XSK_ZEROCOPY | - NETDEV_XDP_ACT_NDO_XMIT; + NETDEV_XDP_ACT_XSK_ZEROCOPY; ret = stmmac_tc_init(priv, priv); if (!ret) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c index 9d4d8c3dad0a..aa6f16d3df64 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c @@ -117,6 +117,9 @@ int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog, return -EOPNOTSUPP; } + if (!prog) + xdp_features_clear_redirect_target(dev); + need_update = !!priv->xdp_prog != !!prog; if (if_running && need_update) stmmac_xdp_release(dev); @@ -131,5 +134,8 @@ int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog, if (if_running && need_update) stmmac_xdp_open(dev); + if (prog) + xdp_features_set_redirect_target(dev, false); + return 0; } From 31642e7089df8fd3f54ca7843f7ee2952978cad1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 May 2023 14:14:56 +0000 Subject: [PATCH 151/934] netrom: fix info-leak in nr_write_internal() Simon Kapadia reported the following issue: The Online Amateur Radio Community (OARC) has recently been experimenting with building a nationwide packet network in the UK. As part of our experimentation, we have been testing out packet on 300bps HF, and playing with net/rom. For HF packet at this baud rate you really need to make sure that your MTU is relatively low; AX.25 suggests a PACLEN of 60, and a net/rom PACLEN of 40 to go with that. However the Linux net/rom support didn't work with a low PACLEN; the mkiss module would truncate packets if you set the PACLEN below about 200 or so, e.g.: Apr 19 14:00:51 radio kernel: [12985.747310] mkiss: ax1: truncating oversized transmit packet! This didn't make any sense to me (if the packets are smaller why would they be truncated?) so I started investigating. I looked at the packets using ethereal, and found that many were just huge compared to what I would expect. A simple net/rom connection request packet had the request and then a bunch of what appeared to be random data following it: Simon provided a patch that I slightly revised: Not only we must not use skb_tailroom(), we also do not want to count NR_NETWORK_LEN twice. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Co-Developed-by: Simon Kapadia Signed-off-by: Simon Kapadia Signed-off-by: Eric Dumazet Tested-by: Simon Kapadia Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230524141456.1045467-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/netrom/nr_subr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 3f99b432ea70..e2d2af924cff 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -123,7 +123,7 @@ void nr_write_internal(struct sock *sk, int frametype) unsigned char *dptr; int len, timeout; - len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; + len = NR_TRANSPORT_LEN; switch (frametype & 0x0F) { case NR_CONNREQ: @@ -141,7 +141,8 @@ void nr_write_internal(struct sock *sk, int frametype) return; } - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) + skb = alloc_skb(NR_NETWORK_LEN + len, GFP_ATOMIC); + if (!skb) return; /* @@ -149,7 +150,7 @@ void nr_write_internal(struct sock *sk, int frametype) */ skb_reserve(skb, NR_NETWORK_LEN); - dptr = skb_put(skb, skb_tailroom(skb)); + dptr = skb_put(skb, len); switch (frametype & 0x0F) { case NR_CONNREQ: From 081e8df6819997eae236f75dd52f0c147c4be939 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 24 May 2023 10:07:12 -0700 Subject: [PATCH 152/934] tools: ynl: avoid dict errors on older Python versions Python 3.9.0 or newer supports combining dicts() with |, but older versions of Python are still used in the wild (e.g. on CentOS 8, which goes EoL May 31, 2024). With Python 3.6.8 we get: TypeError: unsupported operand type(s) for |: 'dict' and 'dict' Use older syntax. Tested with non-legacy families only. Fixes: f036d936ca57 ("tools: ynl: Add fixed-header support to ynl") Reviewed-by: Simon Horman Reviewed-by: Donald Hunter Tested-by: Donald Hunter Link: https://lore.kernel.org/r/20230524170712.2036128-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index aa77bcae4807..3144f33196be 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -591,8 +591,9 @@ class YnlFamily(SpecFamily): print('Unexpected message: ' + repr(gm)) continue - rsp.append(self._decode(gm.raw_attrs, op.attr_set.name) - | gm.fixed_header_attrs) + rsp_msg = self._decode(gm.raw_attrs, op.attr_set.name) + rsp_msg.update(gm.fixed_header_attrs) + rsp.append(rsp_msg) if not rsp: return None From 822b5a1c17df7e338b9f05d1cfe5764e37c7f74f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 24 May 2023 16:29:34 -0700 Subject: [PATCH 153/934] af_packet: Fix data-races of pkt_sk(sk)->num. syzkaller found a data race of pkt_sk(sk)->num. The value is changed under lock_sock() and po->bind_lock, so we need READ_ONCE() to access pkt_sk(sk)->num without these locks in packet_bind_spkt(), packet_bind(), and sk_diag_fill(). Note that WRITE_ONCE() is already added by commit c7d2ef5dd4b0 ("net/packet: annotate accesses to po->bind"). BUG: KCSAN: data-race in packet_bind / packet_do_bind write (marked) to 0xffff88802ffd1cee of 2 bytes by task 7322 on cpu 0: packet_do_bind+0x446/0x640 net/packet/af_packet.c:3236 packet_bind+0x99/0xe0 net/packet/af_packet.c:3321 __sys_bind+0x19b/0x1e0 net/socket.c:1803 __do_sys_bind net/socket.c:1814 [inline] __se_sys_bind net/socket.c:1812 [inline] __x64_sys_bind+0x40/0x50 net/socket.c:1812 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc read to 0xffff88802ffd1cee of 2 bytes by task 7318 on cpu 1: packet_bind+0xbf/0xe0 net/packet/af_packet.c:3322 __sys_bind+0x19b/0x1e0 net/socket.c:1803 __do_sys_bind net/socket.c:1814 [inline] __se_sys_bind net/socket.c:1812 [inline] __x64_sys_bind+0x40/0x50 net/socket.c:1812 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc value changed: 0x0300 -> 0x0000 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 7318 Comm: syz-executor.4 Not tainted 6.3.0-13380-g7fddb5b5300c #4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Fixes: 96ec6327144e ("packet: Diag core and basic socket info dumping") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20230524232934.50950-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 4 ++-- net/packet/diag.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 94c6a1ffa459..a1f9a0e9f3c8 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3299,7 +3299,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); name[sizeof(uaddr->sa_data_min)] = 0; - return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); + return packet_do_bind(sk, name, 0, READ_ONCE(pkt_sk(sk)->num)); } static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) @@ -3317,7 +3317,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len return -EINVAL; return packet_do_bind(sk, NULL, sll->sll_ifindex, - sll->sll_protocol ? : pkt_sk(sk)->num); + sll->sll_protocol ? : READ_ONCE(pkt_sk(sk)->num)); } static struct proto packet_proto = { diff --git a/net/packet/diag.c b/net/packet/diag.c index d0c4eda4cdc6..f6b200cb3c06 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -143,7 +143,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, rp = nlmsg_data(nlh); rp->pdiag_family = AF_PACKET; rp->pdiag_type = sk->sk_type; - rp->pdiag_num = ntohs(po->num); + rp->pdiag_num = ntohs(READ_ONCE(po->num)); rp->pdiag_ino = sk_ino; sock_diag_save_cookie(sk, rp->pdiag_cookie); From 8a0d57df8938e9fd2e99d47a85b7f37d86f91097 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 24 May 2023 22:17:41 -0700 Subject: [PATCH 154/934] tls: improve lockless access safety of tls_err_abort() Most protos' poll() methods insert a memory barrier between writes to sk_err and sk_error_report(). This dates back to commit a4d258036ed9 ("tcp: Fix race in tcp_poll"). I guess we should do the same thing in TLS, tcp_poll() does not hold the socket lock. Fixes: 3c4d7559159b ("tls: kernel TLS support") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/tls/tls_strp.c | 4 +++- net/tls/tls_sw.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c index da95abbb7ea3..f37f4a0fcd3c 100644 --- a/net/tls/tls_strp.c +++ b/net/tls/tls_strp.c @@ -20,7 +20,9 @@ static void tls_strp_abort_strp(struct tls_strparser *strp, int err) strp->stopped = 1; /* Report an error on the lower socket */ - strp->sk->sk_err = -err; + WRITE_ONCE(strp->sk->sk_err, -err); + /* Paired with smp_rmb() in tcp_poll() */ + smp_wmb(); sk_error_report(strp->sk); } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 6e6a7c37d685..1a53c8f481e9 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -70,7 +70,9 @@ noinline void tls_err_abort(struct sock *sk, int err) { WARN_ON_ONCE(err >= 0); /* sk->sk_err should contain a positive error code. */ - sk->sk_err = -err; + WRITE_ONCE(sk->sk_err, -err); + /* Paired with smp_rmb() in tcp_poll() */ + smp_wmb(); sk_error_report(sk); } From dc362e20cd6ab7a93d1b09669730c406f0910c35 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 25 May 2023 23:56:12 +0530 Subject: [PATCH 155/934] amd-xgbe: fix the false linkup in xgbe_phy_status In the event of a change in XGBE mode, the current auto-negotiation needs to be reset and the AN cycle needs to be re-triggerred. However, the current code ignores the return value of xgbe_set_mode(), leading to false information as the link is declared without checking the status register. Fix this by propagating the mode switch status information to xgbe_phy_status(). Fixes: e57f7a3feaef ("amd-xgbe: Prepare for working with more than one type of phy") Co-developed-by: Sudheesh Mavila Signed-off-by: Sudheesh Mavila Reviewed-by: Simon Horman Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 33a9574e9e04..32d2c6fac652 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1329,7 +1329,7 @@ static enum xgbe_mode xgbe_phy_status_aneg(struct xgbe_prv_data *pdata) return pdata->phy_if.phy_impl.an_outcome(pdata); } -static void xgbe_phy_status_result(struct xgbe_prv_data *pdata) +static bool xgbe_phy_status_result(struct xgbe_prv_data *pdata) { struct ethtool_link_ksettings *lks = &pdata->phy.lks; enum xgbe_mode mode; @@ -1367,8 +1367,13 @@ static void xgbe_phy_status_result(struct xgbe_prv_data *pdata) pdata->phy.duplex = DUPLEX_FULL; - if (xgbe_set_mode(pdata, mode) && pdata->an_again) + if (!xgbe_set_mode(pdata, mode)) + return false; + + if (pdata->an_again) xgbe_phy_reconfig_aneg(pdata); + + return true; } static void xgbe_phy_status(struct xgbe_prv_data *pdata) @@ -1398,7 +1403,8 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) return; } - xgbe_phy_status_result(pdata); + if (xgbe_phy_status_result(pdata)) + return; if (test_bit(XGBE_LINK_INIT, &pdata->dev_state)) clear_bit(XGBE_LINK_INIT, &pdata->dev_state); From 14b4bd01f8e5d663a41ef8cec5857db874b66e88 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Tue, 23 May 2023 17:25:15 +0100 Subject: [PATCH 156/934] media: verisilicon: Additional fix for the crash when opening the driver This fixes the following issue observed on Odroid-M1 board: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Mem abort info: ... Modules linked in: crct10dif_ce hantro_vpu snd_soc_simple_card snd_soc_simple_card_utils v4l2_vp9 v4l2_h264 rockchip_saradc v4l2_mem2mem videobuf2_dma_contig videobuf2_memops rtc_rk808 videobuf2_v4l2 industrialio_triggered_buffer rockchip_thermal dwmac_rk stmmac_platform stmmac videodev kfifo_buf display_connector videobuf2_common pcs_xpcs mc rockchipdrm analogix_dp dw_mipi_dsi dw_hdmi drm_display_helper panfrost drm_shmem_helper gpu_sched ip_tables x_tables ipv6 CPU: 3 PID: 176 Comm: v4l_id Not tainted 6.3.0-rc7-next-20230420 #13481 Hardware name: Hardkernel ODROID-M1 (DT) pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : hantro_try_fmt+0xa0/0x278 [hantro_vpu] lr : hantro_try_fmt+0x94/0x278 [hantro_vpu] ... Call trace: hantro_try_fmt+0xa0/0x278 [hantro_vpu] hantro_set_fmt_out+0x3c/0x298 [hantro_vpu] hantro_reset_raw_fmt+0x98/0x128 [hantro_vpu] hantro_set_fmt_cap+0x240/0x254 [hantro_vpu] hantro_reset_encoded_fmt+0x94/0xcc [hantro_vpu] hantro_reset_fmts+0x18/0x38 [hantro_vpu] hantro_open+0xd4/0x20c [hantro_vpu] v4l2_open+0x80/0x120 [videodev] chrdev_open+0xc0/0x22c do_dentry_open+0x13c/0x48c vfs_open+0x2c/0x38 path_openat+0x550/0x934 do_filp_open+0x80/0x12c do_sys_openat2+0xb4/0x168 __arm64_sys_openat+0x64/0xac invoke_syscall+0x48/0x114 el0_svc_common+0x100/0x120 do_el0_svc+0x3c/0xa8 el0_svc+0x40/0xa8 el0t_64_sync_handler+0xb8/0xbc el0t_64_sync+0x190/0x194 Code: 97fc8a7f f940aa80 52864a61 72a686c1 (b9400800) ---[ end trace 0000000000000000 ]--- Fixes: db6f68b51e5c ("media: verisilicon: Do not set context src/dst formats in reset functions") Signed-off-by: Benjamin Gaignard Tested-by: Michael Tretter Tested-by: Diederik de Haas Tested-by: Marek Szyprowski Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/verisilicon/hantro_v4l2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/verisilicon/hantro_v4l2.c b/drivers/media/platform/verisilicon/hantro_v4l2.c index 835518534e3b..61cfaaf4e927 100644 --- a/drivers/media/platform/verisilicon/hantro_v4l2.c +++ b/drivers/media/platform/verisilicon/hantro_v4l2.c @@ -397,10 +397,12 @@ hantro_reset_raw_fmt(struct hantro_ctx *ctx, int bit_depth) if (!raw_vpu_fmt) return -EINVAL; - if (ctx->is_encoder) + if (ctx->is_encoder) { encoded_fmt = &ctx->dst_fmt; - else + ctx->vpu_src_fmt = raw_vpu_fmt; + } else { encoded_fmt = &ctx->src_fmt; + } hantro_reset_fmt(&raw_fmt, raw_vpu_fmt); raw_fmt.width = encoded_fmt->width; From 76743b29f4802afdf81d7ff1228c11e697850235 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Wed, 3 May 2023 08:53:40 +0100 Subject: [PATCH 157/934] media: camss: camss-video: Don't zero subdev format again after initialization In an earlier commit, setting the which field of the subdev format struct in video_get_subdev_format was moved to a designated initializer that also zeroes all other fields. However, the memset call that was zeroing the fields earlier was left in place, causing the which field to be cleared after being set in the initializer. Remove the memset call from video_get_subdev_format to avoid clearing the initialized which field. Fixes: ecefa105cc44 ("media: Zero-initialize all structures passed to subdev pad operations") Signed-off-by: Yassine Oudjana Acked-by: Bryan O'Donoghue Tested-by: Andrey Konovalov Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/qcom/camss/camss-video.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/platform/qcom/camss/camss-video.c b/drivers/media/platform/qcom/camss/camss-video.c index 898f32177b12..8640db306026 100644 --- a/drivers/media/platform/qcom/camss/camss-video.c +++ b/drivers/media/platform/qcom/camss/camss-video.c @@ -353,7 +353,6 @@ static int video_get_subdev_format(struct camss_video *video, if (subdev == NULL) return -EPIPE; - memset(&fmt, 0, sizeof(fmt)); fmt.pad = pad; ret = v4l2_subdev_call(subdev, pad, get_fmt, NULL, &fmt); From ed17f89e9502f03af493e130620a9bb74c07cf28 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Sat, 22 Apr 2023 11:39:05 +0100 Subject: [PATCH 158/934] media: mediatek: vcodec: Only apply 4K frame sizes on decoder formats When VCODEC_CAPABILITY_4K_DISABLED is not set in dec_capability, skip formats that are not MTK_FMT_DEC so only decoder formats is updated in mtk_init_vdec_params. Fixes: e25528e1dbe5 ("media: mediatek: vcodec: Use 4K frame size when supported by stateful decoder") Signed-off-by: Pin-yen Lin Reviewed-by: Chen-Yu Tsai Reviewed-by: Yunfei Dong Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../media/platform/mediatek/vcodec/mtk_vcodec_dec_stateful.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateful.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateful.c index 29991551cf61..0fbd030026c7 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateful.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateful.c @@ -584,6 +584,9 @@ static void mtk_init_vdec_params(struct mtk_vcodec_ctx *ctx) if (!(ctx->dev->dec_capability & VCODEC_CAPABILITY_4K_DISABLED)) { for (i = 0; i < num_supported_formats; i++) { + if (mtk_video_formats[i].type != MTK_FMT_DEC) + continue; + mtk_video_formats[i].frmsize.max_width = VCODEC_DEC_4K_CODED_WIDTH; mtk_video_formats[i].frmsize.max_height = From fe4526d99e2e06b08bb80316c3a596ea6a807b75 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 20 Apr 2023 08:26:53 +0100 Subject: [PATCH 159/934] media: cec: core: disable adapter in cec_devnode_unregister Explicitly disable the CEC adapter in cec_devnode_unregister() Usually this does not really do anything important, but for drivers that use the CEC pin framework this is needed to properly stop the hrtimer. Without this a crash would happen when such a driver is unloaded with rmmod. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/core/cec-adap.c | 5 ++++- drivers/media/cec/core/cec-core.c | 2 ++ drivers/media/cec/core/cec-priv.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c index 769ea6b2e1d0..be0c38969479 100644 --- a/drivers/media/cec/core/cec-adap.c +++ b/drivers/media/cec/core/cec-adap.c @@ -1585,7 +1585,7 @@ static void cec_claim_log_addrs(struct cec_adapter *adap, bool block) * * This function is called with adap->lock held. */ -static int cec_adap_enable(struct cec_adapter *adap) +int cec_adap_enable(struct cec_adapter *adap) { bool enable; int ret = 0; @@ -1595,6 +1595,9 @@ static int cec_adap_enable(struct cec_adapter *adap) if (adap->needs_hpd) enable = enable && adap->phys_addr != CEC_PHYS_ADDR_INVALID; + if (adap->devnode.unregistered) + enable = false; + if (enable == adap->is_enabled) return 0; diff --git a/drivers/media/cec/core/cec-core.c b/drivers/media/cec/core/cec-core.c index af358e901b5f..7e153c5cad04 100644 --- a/drivers/media/cec/core/cec-core.c +++ b/drivers/media/cec/core/cec-core.c @@ -191,6 +191,8 @@ static void cec_devnode_unregister(struct cec_adapter *adap) mutex_lock(&adap->lock); __cec_s_phys_addr(adap, CEC_PHYS_ADDR_INVALID, false); __cec_s_log_addrs(adap, NULL, false); + // Disable the adapter (since adap->devnode.unregistered is true) + cec_adap_enable(adap); mutex_unlock(&adap->lock); cdev_device_del(&devnode->cdev, &devnode->dev); diff --git a/drivers/media/cec/core/cec-priv.h b/drivers/media/cec/core/cec-priv.h index b78df931aa74..ed1f8c67626b 100644 --- a/drivers/media/cec/core/cec-priv.h +++ b/drivers/media/cec/core/cec-priv.h @@ -47,6 +47,7 @@ int cec_monitor_pin_cnt_inc(struct cec_adapter *adap); void cec_monitor_pin_cnt_dec(struct cec_adapter *adap); int cec_adap_status(struct seq_file *file, void *priv); int cec_thread_func(void *_adap); +int cec_adap_enable(struct cec_adapter *adap); void __cec_s_phys_addr(struct cec_adapter *adap, u16 phys_addr, bool block); int __cec_s_log_addrs(struct cec_adapter *adap, struct cec_log_addrs *log_addrs, bool block); From 73af6c7511038249cad3d5f3b44bf8d78ac0f499 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 24 Apr 2023 16:07:28 +0100 Subject: [PATCH 160/934] media: cec: core: don't set last_initiator if tx in progress When a message was received the last_initiator is set to 0xff. This will force the signal free time for the next transmit to that for a new initiator. However, if a new transmit is already in progress, then don't set last_initiator, since that's the initiator of the current transmit. Overwriting this would cause the signal free time of a following transmit to be that of the new initiator instead of a next transmit. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/core/cec-adap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c index be0c38969479..241b1621b197 100644 --- a/drivers/media/cec/core/cec-adap.c +++ b/drivers/media/cec/core/cec-adap.c @@ -1091,7 +1091,8 @@ void cec_received_msg_ts(struct cec_adapter *adap, mutex_lock(&adap->lock); dprintk(2, "%s: %*ph\n", __func__, msg->len, msg->msg); - adap->last_initiator = 0xff; + if (!adap->transmit_in_progress) + adap->last_initiator = 0xff; /* Check if this message was for us (directed or broadcast). */ if (!cec_msg_is_broadcast(msg)) { From a05e9aabd9dc27fc8888678391e3bf78624f8253 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 19 Apr 2023 08:06:02 +0100 Subject: [PATCH 161/934] media: staging: media: atomisp: init high & low vars Fix a compiler warning: include/linux/dev_printk.h: In function 'ov2680_probe': include/linux/dev_printk.h:144:31: warning: 'high' may be used uninitialized [-Wmaybe-uninitialized] 144 | dev_printk_index_wrap(_dev_err, KERN_ERR, dev, dev_fmt(fmt), ##__VA_ARGS__) | ^~~~~~~~ In function 'ov2680_detect', inlined from 'ov2680_s_config' at drivers/staging/media/atomisp/i2c/atomisp-ov2680.c:468:8, inlined from 'ov2680_probe' at drivers/staging/media/atomisp/i2c/atomisp-ov2680.c:647:8: drivers/staging/media/atomisp/i2c/atomisp-ov2680.c:376:13: note: 'high' was declared here 376 | u32 high, low; | ^~~~ 'high' is indeed uninitialized after the ov_read_reg8() call failed, so there is no point showing the value. Just say that the read failed. But low can also be used uninitialized later, so just make it more robust and properly zero the high and low variables. Signed-off-by: Hans Verkuil Reviewed-by: Hans de Goede Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/atomisp/i2c/atomisp-ov2680.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c index 63de214916f5..c079368019e8 100644 --- a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c +++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c @@ -373,7 +373,7 @@ static int ov2680_get_fmt(struct v4l2_subdev *sd, static int ov2680_detect(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; - u32 high, low; + u32 high = 0, low = 0; int ret; u16 id; u8 revision; @@ -383,7 +383,7 @@ static int ov2680_detect(struct i2c_client *client) ret = ov_read_reg8(client, OV2680_SC_CMMN_CHIP_ID_H, &high); if (ret) { - dev_err(&client->dev, "sensor_id_high = 0x%x\n", high); + dev_err(&client->dev, "sensor_id_high read failed (%d)\n", ret); return -ENODEV; } ret = ov_read_reg8(client, OV2680_SC_CMMN_CHIP_ID_L, &low); From 9b9e46aa07273ceb96866b2e812b46f1ee0b8d2f Mon Sep 17 00:00:00 2001 From: Osama Muhammad Date: Thu, 25 May 2023 22:27:46 +0500 Subject: [PATCH 162/934] nfcsim.c: Fix error checking for debugfs_create_dir This patch fixes the error checking in nfcsim.c. The DebugFS kernel API is developed in a way that the caller can safely ignore the errors that occur during the creation of DebugFS nodes. Signed-off-by: Osama Muhammad Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/nfc/nfcsim.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c index 44eeb17ae48d..a55381f80cd6 100644 --- a/drivers/nfc/nfcsim.c +++ b/drivers/nfc/nfcsim.c @@ -336,10 +336,6 @@ static struct dentry *nfcsim_debugfs_root; static void nfcsim_debugfs_init(void) { nfcsim_debugfs_root = debugfs_create_dir("nfcsim", NULL); - - if (!nfcsim_debugfs_root) - pr_err("Could not create debugfs entry\n"); - } static void nfcsim_debugfs_remove(void) From 8d73259ef23f449329294dc187932f7470268126 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 May 2023 14:20:38 -0700 Subject: [PATCH 163/934] perf ftrace latency: Remove unnecessary "--" from --use-nsec option The option name should not have the dashes. Current version shows four dashes for the option. $ perf ftrace latency -h Usage: perf ftrace [] [] or: perf ftrace [] -- [] [] or: perf ftrace {trace|latency} [] [] or: perf ftrace {trace|latency} [] -- [] [] -b, --use-bpf Use BPF to measure function latency -n, ----use-nsec Use nano-second histogram -T, --trace-funcs Show latency of given function Fixes: 84005bb6148618cc ("perf ftrace latency: Add -n/--use-nsec option") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Changbin Du Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230525212038.3535851-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 810e3376c7d6..f9906f52e4fa 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -1175,7 +1175,7 @@ int cmd_ftrace(int argc, const char **argv) OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf, "Use BPF to measure function latency"), #endif - OPT_BOOLEAN('n', "--use-nsec", &ftrace.use_nsec, + OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec, "Use nano-second histogram"), OPT_PARENT(common_options), }; From 8938f75a5e35c597a647c28984a0304da7a33d63 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Tue, 23 May 2023 17:12:22 +0200 Subject: [PATCH 164/934] ASoC: simple-card: Add missing of_node_put() in case of error In the error path, a of_node_put() for platform is missing. Just add it. Signed-off-by: Herve Codina Acked-by: Kuninori Morimoto Link: https://lore.kernel.org/r/20230523151223.109551-9-herve.codina@bootlin.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 6f044cc8357e..5a5e4ecd0f61 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -416,6 +416,7 @@ static int __simple_for_each_link(struct asoc_simple_priv *priv, if (ret < 0) { of_node_put(codec); + of_node_put(plat); of_node_put(np); goto error; } From 420c4495b5e56cc11e6802ce98400544f698b393 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 9 May 2023 19:39:00 +0000 Subject: [PATCH 165/934] mtd: spi-nor: spansion: make sure local struct does not contain garbage Following errors were seen with um-x86_64-gcc12/um-allyesconfig: + /kisskb/src/drivers/mtd/spi-nor/spansion.c: error: 'op' is used uninitialized [-Werror=uninitialized]: => 495:27, 364:27 Initialise local struct spi_mem_op with all zeros at declaration in order to avoid using garbage data for fields that are not explicitly set afterwards. Reported-by: Geert Uytterhoeven Fixes: c87c9b11c53ce ("mtd: spi-nor: spansion: Determine current address mode") Fixes: 6afcc84080c41 ("mtd: spi-nor: spansion: Add support for Infineon S25FS256T") Signed-off-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230509193900.948753-1-tudor.ambarus@linaro.org --- drivers/mtd/spi-nor/spansion.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/spi-nor/spansion.c b/drivers/mtd/spi-nor/spansion.c index 15f9a80c10b9..36876aa849ed 100644 --- a/drivers/mtd/spi-nor/spansion.c +++ b/drivers/mtd/spi-nor/spansion.c @@ -361,7 +361,7 @@ static int cypress_nor_determine_addr_mode_by_sr1(struct spi_nor *nor, */ static int cypress_nor_set_addr_mode_nbytes(struct spi_nor *nor) { - struct spi_mem_op op; + struct spi_mem_op op = {}; u8 addr_mode; int ret; @@ -492,7 +492,7 @@ s25fs256t_post_bfpt_fixup(struct spi_nor *nor, const struct sfdp_parameter_header *bfpt_header, const struct sfdp_bfpt *bfpt) { - struct spi_mem_op op; + struct spi_mem_op op = {}; int ret; ret = cypress_nor_set_addr_mode_nbytes(nor); From 650a8884a364ff2568b51cde9009cfd43cdae6ad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 22:21:24 +0200 Subject: [PATCH 166/934] mtd: rawnand: ingenic: fix empty stub helper definitions A few functions provide an empty interface definition when CONFIG_MTD_NAND_INGENIC_ECC is disabled, but they are accidentally defined as global functions in the header: drivers/mtd/nand/raw/ingenic/ingenic_ecc.h:39:5: error: no previous prototype for 'ingenic_ecc_calculate' drivers/mtd/nand/raw/ingenic/ingenic_ecc.h:46:5: error: no previous prototype for 'ingenic_ecc_correct' drivers/mtd/nand/raw/ingenic/ingenic_ecc.h:53:6: error: no previous prototype for 'ingenic_ecc_release' drivers/mtd/nand/raw/ingenic/ingenic_ecc.h:57:21: error: no previous prototype for 'of_ingenic_ecc_get' Turn them into 'static inline' definitions instead. Fixes: 15de8c6efd0e ("mtd: rawnand: ingenic: Separate top-level and SoC specific code") Signed-off-by: Arnd Bergmann Reviewed-by: Paul Cercueil Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230516202133.559488-1-arnd@kernel.org --- drivers/mtd/nand/raw/ingenic/ingenic_ecc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h index 2cda439b5e11..017868f59f22 100644 --- a/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h +++ b/drivers/mtd/nand/raw/ingenic/ingenic_ecc.h @@ -36,25 +36,25 @@ int ingenic_ecc_correct(struct ingenic_ecc *ecc, void ingenic_ecc_release(struct ingenic_ecc *ecc); struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *np); #else /* CONFIG_MTD_NAND_INGENIC_ECC */ -int ingenic_ecc_calculate(struct ingenic_ecc *ecc, +static inline int ingenic_ecc_calculate(struct ingenic_ecc *ecc, struct ingenic_ecc_params *params, const u8 *buf, u8 *ecc_code) { return -ENODEV; } -int ingenic_ecc_correct(struct ingenic_ecc *ecc, +static inline int ingenic_ecc_correct(struct ingenic_ecc *ecc, struct ingenic_ecc_params *params, u8 *buf, u8 *ecc_code) { return -ENODEV; } -void ingenic_ecc_release(struct ingenic_ecc *ecc) +static inline void ingenic_ecc_release(struct ingenic_ecc *ecc) { } -struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *np) +static inline struct ingenic_ecc *of_ingenic_ecc_get(struct device_node *np) { return ERR_PTR(-ENODEV); } From 4095f4d9225a64921a6ee4bb8dbc94c6edc2df08 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 18 May 2023 08:54:40 +0000 Subject: [PATCH 167/934] mtd: spi-nor: Fix divide by zero for spi-nor-generic flashes We failed to initialize n_banks for spi-nor-generic flashes, which caused a devide by zero when computing the bank_size. By default we consider that all chips have a single bank. Initialize the default number of banks for spi-nor-generic flashes. Even if the bug is fixed with this simple initialization, check the n_banks value before dividing so that we make sure this kind of bug won't occur again if some other struct instance is created uninitialized. Suggested-by: Todd Brandt Reported-by: Todd Brandt Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217448 Fixes: 9d6c5d64f028 ("mtd: spi-nor: Introduce the concept of bank") Link: https://lore.kernel.org/all/20230516225108.29194-1-todd.e.brandt@intel.com/ Signed-off-by: Tudor Ambarus Tested-by: Todd Brandt Reviewed-by: Miquel Raynal Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230518085440.2363676-1-tudor.ambarus@linaro.org --- drivers/mtd/spi-nor/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 0bb0ad14a2fc..5f29fac8669a 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2018,6 +2018,7 @@ static const struct spi_nor_manufacturer *manufacturers[] = { static const struct flash_info spi_nor_generic_flash = { .name = "spi-nor-generic", + .n_banks = 1, /* * JESD216 rev A doesn't specify the page size, therefore we need a * sane default. @@ -2921,7 +2922,8 @@ static void spi_nor_late_init_params(struct spi_nor *nor) if (nor->flags & SNOR_F_HAS_LOCK && !nor->params->locking_ops) spi_nor_init_default_locking_ops(nor); - nor->params->bank_size = div64_u64(nor->params->size, nor->info->n_banks); + if (nor->info->n_banks > 1) + params->bank_size = div64_u64(params->size, nor->info->n_banks); } /** @@ -2987,6 +2989,7 @@ static void spi_nor_init_default_params(struct spi_nor *nor) /* Set SPI NOR sizes. */ params->writesize = 1; params->size = (u64)info->sector_size * info->n_sectors; + params->bank_size = params->size; params->page_size = info->page_size; if (!(info->flags & SPI_NOR_NO_FR)) { From 690917c647e245da76183656400d4926427a8b93 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 24 May 2023 17:03:07 -0700 Subject: [PATCH 168/934] perf bpf filter: Fix a broken perf sample data naming for BPF CO-RE BPF CO-RE requires 3 underscores for the ignored suffix rule but it mistakenly used only 2. Let's fix it. Fixes: 3a8b8fc3174891c4 ("perf bpf filter: Support pre-5.16 kernels where 'mem_hops' isn't in 'union perf_mem_data_src'") Signed-off-by: Namhyung Kim Acked-by: Andrii Nakryiko Acked-by: John Fastabend Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20230525000307.3202449-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_skel/sample_filter.bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c index cffe493af1ed..fb94f5280626 100644 --- a/tools/perf/util/bpf_skel/sample_filter.bpf.c +++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c @@ -25,7 +25,7 @@ struct perf_sample_data___new { } __attribute__((preserve_access_index)); /* new kernel perf_mem_data_src definition */ -union perf_mem_data_src__new { +union perf_mem_data_src___new { __u64 val; struct { __u64 mem_op:5, /* type of opcode */ @@ -108,7 +108,7 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, if (entry->part == 7) return kctx->data->data_src.mem_blk; if (entry->part == 8) { - union perf_mem_data_src__new *data = (void *)&kctx->data->data_src; + union perf_mem_data_src___new *data = (void *)&kctx->data->data_src; if (bpf_core_field_exists(data->mem_hops)) return data->mem_hops; From a0c2f92d36d20d72efb27cbe8669037321e92f22 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 15 May 2023 09:50:39 -0700 Subject: [PATCH 169/934] perf arm: Fix include path to cs-etm.h Change "../cs-etm.h" to just "../../../util/cs-etm.h" as ../cs-etm.h doesn't exist. Suggested-by: Leo Yan Reviewed-by: Leo Yan Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230515165039.544045-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index 860a8b42b4b5..a9623b128ece 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -12,7 +12,7 @@ #include "arm-spe.h" #include "hisi-ptt.h" #include "../../../util/pmu.h" -#include "../cs-etm.h" +#include "../../../util/cs-etm.h" struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) From 65cd8e5534f873b76b55fb17e942c512ee3699d9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 17 Apr 2023 12:25:46 -0700 Subject: [PATCH 170/934] perf build: Don't compile demangle-cxx.cpp if not necessary demangle-cxx.cpp requires a C++ compiler, but feature checks may fail because of the absence of this. Add a CONFIG_CXX_DEMANGLE so that the source isn't built if not supported. Copy libbfd and cplus demangle variants to a weak symbol-elf.c version so they aren't dependent on C++. These variants are only built with the build option BUILD_NONDISTRO=1. Committer note: This also handles this build break when a C++ compiler isn't available: CXX /tmp/build/perf/util/demangle-cxx.o /bin/sh: g++: command not found Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Qi Liu Cc: Ravi Bangoria Link: https://lore.kernel.org/r/20230417192546.99923-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 + tools/perf/util/Build | 2 +- tools/perf/util/symbol-elf.c | 27 +++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 70268442f7ee..a794d9eca93d 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -927,6 +927,7 @@ ifndef NO_DEMANGLE EXTLIBS += -lstdc++ CFLAGS += -DHAVE_CXA_DEMANGLE_SUPPORT CXXFLAGS += -DHAVE_CXA_DEMANGLE_SUPPORT + $(call detected,CONFIG_CXX_DEMANGLE) endif ifdef BUILD_NONDISTRO ifeq ($(filter -liberty,$(EXTLIBS)),) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index bd18fe5f2719..f9df1df1eec0 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -214,7 +214,7 @@ perf-$(CONFIG_ZSTD) += zstd.o perf-$(CONFIG_LIBCAP) += cap.o -perf-y += demangle-cxx.o +perf-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o perf-y += demangle-ocaml.o perf-y += demangle-java.o perf-y += demangle-rust.o diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index b2ed9cc52265..63882a4db5c7 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -31,6 +31,13 @@ #include #endif +#if defined(HAVE_LIBBFD_SUPPORT) || defined(HAVE_CPLUS_DEMANGLE_SUPPORT) +#ifndef DMGL_PARAMS +#define DMGL_PARAMS (1 << 0) /* Include function args */ +#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ +#endif +#endif + #ifndef EM_AARCH64 #define EM_AARCH64 183 /* ARM 64 bit */ #endif @@ -271,6 +278,26 @@ static bool want_demangle(bool is_kernel_sym) return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; } +/* + * Demangle C++ function signature, typically replaced by demangle-cxx.cpp + * version. + */ +__weak char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused, + bool modifiers __maybe_unused) +{ +#ifdef HAVE_LIBBFD_SUPPORT + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return bfd_demangle(NULL, str, flags); +#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT) + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return cplus_demangle(str, flags); +#else + return NULL; +#endif +} + static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) { char *demangled = NULL; From 251c01e27feee83ff8fa294d33277616c9032dd0 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 26 May 2023 15:30:23 -0300 Subject: [PATCH 171/934] perf bpf: Do not use llvm-strip on BPF binary llvm-strip is not really required. Remove this dependency to make it easier to build perf with BUILD_BPF_SKEL=1. Committer notes: This removes the need for the 'llvm' package just to get llvm-strip. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Song Liu Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1593c5dcaa9e..f48794816d82 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -181,7 +181,6 @@ HOSTCC ?= gcc HOSTLD ?= ld HOSTAR ?= ar CLANG ?= clang -LLVM_STRIP ?= llvm-strip PKG_CONFIG = $(CROSS_COMPILE)pkg-config @@ -1083,7 +1082,7 @@ $(BPFTOOL): | $(SKEL_TMP_OUT) $(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT) $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \ - -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@ + -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ $(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL) $(QUIET_GENSKEL)$(BPFTOOL) gen skeleton $< > $@ From 15d4371bac978bb96b36c37e822cdae24c0dcd77 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 22 May 2023 11:26:04 +0100 Subject: [PATCH 172/934] perf cs-etm: Copy kernel coresight-pmu.h header Copy the kernel version of the header to fix the header diff build warning. Some new definitions were only added to the tools side header, but these are only used in Perf so move them to a different header. Signed-off-by: James Clark Reviewed-by: Mike Leach Link: https://lore.kernel.org/r/20230522102604.1081416-1-james.clark@arm.com Cc: Mark Rutland Cc: Suzuki K Poulose Cc: Ian Rogers Cc: Peter Zijlstra Cc: Adrian Hunter Cc: acme@kernel.org Cc: Jiri Olsa Cc: Namhyung Kim Cc: Will Deacon Cc: Leo Yan Cc: Alexander Shishkin Cc: linux-arm-kernel@lists.infradead.org Cc: coresight@lists.linaro.org Cc: siyanteng@loongson.cn Cc: John Garry Cc: Ingo Molnar Cc: linux-kernel@vger.kernel.org Cc: linux-perf-users@vger.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/coresight-pmu.h | 13 ------------- tools/perf/util/cs-etm.h | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index cef3b1c25335..51ac441a37c3 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -21,19 +21,6 @@ */ #define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2)) -/* CoreSight trace ID is currently the bottom 7 bits of the value */ -#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) - -/* - * perf record will set the legacy meta data values as unused initially. - * This allows perf report to manage the decoders created when dynamic - * allocation in operation. - */ -#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31) - -/* Value to set for unused trace ID values */ -#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F - /* * Below are the definition of bit offsets for perf option, and works as * arbitrary values for all ETM versions. diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 70cac0375b34..ecca40787ac9 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -227,6 +227,19 @@ struct cs_etm_packet_queue { #define INFO_HEADER_SIZE (sizeof(((struct perf_record_auxtrace_info *)0)->type) + \ sizeof(((struct perf_record_auxtrace_info *)0)->reserved__)) +/* CoreSight trace ID is currently the bottom 7 bits of the value */ +#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) + +/* + * perf record will set the legacy meta data values as unused initially. + * This allows perf report to manage the decoders created when dynamic + * allocation in operation. + */ +#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31) + +/* Value to set for unused trace ID values */ +#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); struct perf_event_attr *cs_etm_get_default_config(struct perf_pmu *pmu); From c8268a9b91055c992909a0845cfa59c624908da8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 May 2023 15:45:03 -0300 Subject: [PATCH 173/934] tools headers UAPI: Sync the linux/in.h with the kernel sources Picking the changes from: 3632679d9e4f879f ("ipv{4,6}/raw: fix output xfrm lookup wrt protocol") That includes a define (IP_PROTOCOL) that isn't being used in generating any id -> string table used by 'perf trace'. Addresses this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/in.h' differs from latest version at 'include/uapi/linux/in.h' diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Nicolas Dichtel Cc: Paolo Abeni Link: https://lore.kernel.org/lkml/ZHD/Ms0DMq7viaq+@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/in.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 4b7f2df66b99..e682ab628dfa 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -163,6 +163,7 @@ struct in_addr { #define IP_MULTICAST_ALL 49 #define IP_UNICAST_IF 50 #define IP_LOCAL_PORT_RANGE 51 +#define IP_PROTOCOL 52 #define MCAST_EXCLUDE 0 #define MCAST_INCLUDE 1 From c041d33bf7ec731bb71f47e4d45a7aec9e40b1b9 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 19 May 2023 16:57:57 -0700 Subject: [PATCH 174/934] perf evsel: Separate bpf_counter_list and bpf_filters, can be used at the same time 'struct evsel' uses a union for the two lists. This turned out to be error prone. For example: [root@quaco ~]# perf stat --bpf-prog 5246 Error: cpu-clock event does not have sample flags 66c660 failed to set filter "(null)" on event cpu-clock with 2 (No such file or directory) [root@quaco ~]# perf stat --bpf-prog 5246 Fix this issue by separating the two lists. Fixes: 56ec9457a4a20c5e ("perf bpf filter: Implement event sample filtering") Signed-off-by: Song Liu Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Namhyung Kim Cc: kernel-team@meta.com Link: https://lore.kernel.org/r/20230519235757.3613719-1-song@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + tools/perf/util/evsel.h | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2f5910b31fa9..c2dbb5647e75 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -282,6 +282,7 @@ void evsel__init(struct evsel *evsel, evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->config_terms); INIT_LIST_HEAD(&evsel->bpf_counter_list); + INIT_LIST_HEAD(&evsel->bpf_filters); perf_evsel__object.init(evsel); evsel->sample_size = __evsel__sample_size(attr->sample_type); evsel__calc_id_pos(evsel); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index df8928745fc6..0f54f28a69c2 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -151,10 +151,8 @@ struct evsel { */ struct bpf_counter_ops *bpf_counter_ops; - union { - struct list_head bpf_counter_list; /* for perf-stat -b */ - struct list_head bpf_filters; /* for perf-record --filter */ - }; + struct list_head bpf_counter_list; /* for perf-stat -b */ + struct list_head bpf_filters; /* for perf-record --filter */ /* for perf-stat --use-bpf */ int bperf_leader_prog_fd; From abac3ac97fe8734b620e7322a116450d7f90aa43 Mon Sep 17 00:00:00 2001 From: Vladislav Efanov Date: Fri, 26 May 2023 19:16:32 +0300 Subject: [PATCH 175/934] batman-adv: Broken sync while rescheduling delayed work Syzkaller got a lot of crashes like: KASAN: use-after-free Write in *_timers* All of these crashes point to the same memory area: The buggy address belongs to the object at ffff88801f870000 which belongs to the cache kmalloc-8k of size 8192 The buggy address is located 5320 bytes inside of 8192-byte region [ffff88801f870000, ffff88801f872000) This area belongs to : batadv_priv->batadv_priv_dat->delayed_work->timer_list The reason for these issues is the lack of synchronization. Delayed work (batadv_dat_purge) schedules new timer/work while the device is being deleted. As the result new timer/delayed work is set after cancel_delayed_work_sync() was called. So after the device is freed the timer list contains pointer to already freed memory. Found by Linux Verification Center (linuxtesting.org) with syzkaller. Cc: stable@kernel.org Fixes: 2f1dfbe18507 ("batman-adv: Distributed ARP Table - implement local storage") Signed-off-by: Vladislav Efanov Acked-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/distributed-arp-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 6968e55eb971..28a939d56090 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -101,7 +101,6 @@ static void batadv_dat_purge(struct work_struct *work); */ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) { - INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge); queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work, msecs_to_jiffies(10000)); } @@ -819,6 +818,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv) if (!bat_priv->dat.hash) return -ENOMEM; + INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge); batadv_dat_start_timer(bat_priv); batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1, From 45c2f36871955b51b4ce083c447388d8c72d6b91 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 15 May 2023 11:18:21 +0200 Subject: [PATCH 176/934] btrfs: call btrfs_orig_bbio_end_io in btrfs_end_bio_work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When I implemented the storage layer bio splitting, I was under the assumption that we'll never split metadata bios. But Qu reminded me that this can actually happen with very old file systems with unaligned metadata chunks and RAID0. I still haven't seen such a case in practice, but we better handled this case, especially as it is fairly easily to do not calling the ->end_іo method directly in btrfs_end_io_work, and using the proper btrfs_orig_bbio_end_io helper instead. In addition to the old file system with unaligned metadata chunks case documented in the commit log, the combination of the new scrub code with Johannes pending raid-stripe-tree also triggers this case. We spent some time debugging it and found that this patch solves the problem. Fixes: 103c19723c80 ("btrfs: split the bio submission path into a separate file") CC: stable@vger.kernel.org # 6.3+ Reviewed-by: Johannes Thumshirn Tested-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 5379c4714905..35d34c731d72 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -330,7 +330,7 @@ static void btrfs_end_bio_work(struct work_struct *work) if (bbio->inode && !(bbio->bio.bi_opf & REQ_META)) btrfs_check_read_bio(bbio, bbio->bio.bi_private); else - bbio->end_io(bbio); + btrfs_orig_bbio_end_io(bbio); } static void btrfs_simple_end_io(struct bio *bio) From 8fd9f4232d8152c650fd15127f533a0f6d0a4b2b Mon Sep 17 00:00:00 2001 From: Shida Zhang Date: Tue, 16 May 2023 09:34:30 +0800 Subject: [PATCH 177/934] btrfs: fix an uninitialized variable warning in btrfs_log_inode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following warning reported by gcc 10.2.1 under x86_64: ../fs/btrfs/tree-log.c: In function ‘btrfs_log_inode’: ../fs/btrfs/tree-log.c:6211:9: error: ‘last_range_start’ may be used uninitialized in this function [-Werror=maybe-uninitialized] 6211 | ret = insert_dir_log_key(trans, log, path, key.objectid, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6212 | first_dir_index, last_dir_index); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../fs/btrfs/tree-log.c:6161:6: note: ‘last_range_start’ was declared here 6161 | u64 last_range_start; | ^~~~~~~~~~~~~~~~ This might be a false positive fixed in later compiler versions but we want to have it fixed. Reported-by: k2ci Reviewed-by: Anand Jain Signed-off-by: Shida Zhang Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9b212e8c70cc..d2755d5e338b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6158,7 +6158,7 @@ static int log_delayed_deletions_incremental(struct btrfs_trans_handle *trans, { struct btrfs_root *log = inode->root->log_root; const struct btrfs_delayed_item *curr; - u64 last_range_start; + u64 last_range_start = 0; u64 last_range_end = 0; struct btrfs_key key; From 5ad9b4719fc9bc4715c7e19875a962095b0577e7 Mon Sep 17 00:00:00 2001 From: pengfuyuan Date: Tue, 23 May 2023 15:09:55 +0800 Subject: [PATCH 178/934] btrfs: fix csum_tree_block page iteration to avoid tripping on -Werror=array-bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling on a MIPS 64-bit machine we get these warnings: In file included from ./arch/mips/include/asm/cacheflush.h:13, from ./include/linux/cacheflush.h:5, from ./include/linux/highmem.h:8, from ./include/linux/bvec.h:10, from ./include/linux/blk_types.h:10, from ./include/linux/blkdev.h:9, from fs/btrfs/disk-io.c:7: fs/btrfs/disk-io.c: In function ‘csum_tree_block’: fs/btrfs/disk-io.c:100:34: error: array subscript 1 is above array bounds of ‘struct page *[1]’ [-Werror=array-bounds] 100 | kaddr = page_address(buf->pages[i]); | ~~~~~~~~~~^~~ ./include/linux/mm.h:2135:48: note: in definition of macro ‘page_address’ 2135 | #define page_address(page) lowmem_page_address(page) | ^~~~ cc1: all warnings being treated as errors We can check if i overflows to solve the problem. However, this doesn't make much sense, since i == 1 and num_pages == 1 doesn't execute the body of the loop. In addition, i < num_pages can also ensure that buf->pages[i] will not cross the boundary. Unfortunately, this doesn't help with the problem observed here: gcc still complains. To fix this add a compile-time condition for the extent buffer page array size limit, which would eventually lead to eliminating the whole for loop. CC: stable@vger.kernel.org # 5.10+ Signed-off-by: pengfuyuan Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6de6dcf2743e..2b1b227505f3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -96,7 +96,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result) crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE, first_page_part - BTRFS_CSUM_SIZE); - for (i = 1; i < num_pages; i++) { + for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) { kaddr = page_address(buf->pages[i]); crypto_shash_update(shash, kaddr, PAGE_SIZE); } From 48b47f0caaa8a9f05ed803cb4f335fa3a7bfc622 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 12 May 2023 17:05:41 +0900 Subject: [PATCH 179/934] ksmbd: fix uninitialized pointer read in ksmbd_vfs_rename() Uninitialized rd.delegated_inode can be used in vfs_rename(). Fix this by setting rd.delegated_inode to NULL to avoid the uninitialized read. Fixes: 74d7970febf7 ("ksmbd: fix racy issue from using ->d_parent and ->d_name") Reported-by: Coverity Scan Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 778c152708e4..9bdb01c5b201 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -743,6 +743,7 @@ retry: rd.new_dir = new_path.dentry->d_inode, rd.new_dentry = new_dentry, rd.flags = flags, + rd.delegated_inode = NULL, err = vfs_rename(&rd); if (err) ksmbd_debug(VFS, "vfs_rename failed err %d\n", err); From df14afeed2e6c1bbadef7d2f9c46887bbd6d8d94 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 14 May 2023 10:02:27 +0900 Subject: [PATCH 180/934] ksmbd: fix uninitialized pointer read in smb2_create_link() There is a case that file_present is true and path is uninitialized. This patch change file_present is set to false by default and set to true when patch is initialized. Fixes: 74d7970febf7 ("ksmbd: fix racy issue from using ->d_parent and ->d_name") Reported-by: Coverity Scan Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 717bcd20545b..1632b2a1e516 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -5506,7 +5506,7 @@ static int smb2_create_link(struct ksmbd_work *work, { char *link_name = NULL, *target_name = NULL, *pathname = NULL; struct path path; - bool file_present = true; + bool file_present = false; int rc; if (buf_len < (u64)sizeof(struct smb2_file_link_info) + @@ -5539,8 +5539,8 @@ static int smb2_create_link(struct ksmbd_work *work, if (rc) { if (rc != -ENOENT) goto out; - file_present = false; - } + } else + file_present = true; if (file_info->ReplaceIfExists) { if (file_present) { From 84c5aa47925a1f40d698b6a6a2bf67e99617433d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 12 May 2023 23:29:12 +0900 Subject: [PATCH 181/934] ksmbd: fix credit count leakage This patch fix the failure from smb2.credits.single_req_credits_granted test. When client send 8192 credit request, ksmbd return 8191 credit granted. ksmbd should give maximum possible credits that must be granted within the range of not exceeding the max credit to client. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 1632b2a1e516..bec885c007ce 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -326,13 +326,9 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) if (hdr->Command == SMB2_NEGOTIATE) aux_max = 1; else - aux_max = conn->vals->max_credits - credit_charge; + aux_max = conn->vals->max_credits - conn->total_credits; credits_granted = min_t(unsigned short, credits_requested, aux_max); - if (conn->vals->max_credits - conn->total_credits < credits_granted) - credits_granted = conn->vals->max_credits - - conn->total_credits; - conn->total_credits += credits_granted; work->credits_granted += credits_granted; From d738950f112c8f40f0515fe967db998e8235a175 Mon Sep 17 00:00:00 2001 From: Kuan-Ting Chen Date: Fri, 19 May 2023 22:59:28 +0900 Subject: [PATCH 182/934] ksmbd: fix slab-out-of-bounds read in smb2_handle_negotiate Check request_buf length first to avoid out-of-bounds read by req->DialectCount. [ 3350.990282] BUG: KASAN: slab-out-of-bounds in smb2_handle_negotiate+0x35d7/0x3e60 [ 3350.990282] Read of size 2 at addr ffff88810ad61346 by task kworker/5:0/276 [ 3351.000406] Workqueue: ksmbd-io handle_ksmbd_work [ 3351.003499] Call Trace: [ 3351.006473] [ 3351.006473] dump_stack_lvl+0x8d/0xe0 [ 3351.006473] print_report+0xcc/0x620 [ 3351.006473] kasan_report+0x92/0xc0 [ 3351.006473] smb2_handle_negotiate+0x35d7/0x3e60 [ 3351.014760] ksmbd_smb_negotiate_common+0x7a7/0xf00 [ 3351.014760] handle_ksmbd_work+0x3f7/0x12d0 [ 3351.014760] process_one_work+0xa85/0x1780 Cc: stable@vger.kernel.org Signed-off-by: Kuan-Ting Chen Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index bec885c007ce..83c668243c95 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1053,16 +1053,16 @@ int smb2_handle_negotiate(struct ksmbd_work *work) return rc; } - if (req->DialectCount == 0) { - pr_err("malformed packet\n"); + smb2_buf_len = get_rfc1002_len(work->request_buf); + smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects); + if (smb2_neg_size > smb2_buf_len) { rsp->hdr.Status = STATUS_INVALID_PARAMETER; rc = -EINVAL; goto err_out; } - smb2_buf_len = get_rfc1002_len(work->request_buf); - smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects); - if (smb2_neg_size > smb2_buf_len) { + if (req->DialectCount == 0) { + pr_err("malformed packet\n"); rsp->hdr.Status = STATUS_INVALID_PARAMETER; rc = -EINVAL; goto err_out; From 0512a5f89e1fae74251fde6893ff634f1c96c6fb Mon Sep 17 00:00:00 2001 From: Kuan-Ting Chen Date: Fri, 19 May 2023 23:00:24 +0900 Subject: [PATCH 183/934] ksmbd: fix multiple out-of-bounds read during context decoding Check the remaining data length before accessing the context structure to ensure that the entire structure is contained within the packet. Additionally, since the context data length `ctxt_len` has already been checked against the total packet length `len_of_ctxts`, update the comparison to use `ctxt_len`. Cc: stable@vger.kernel.org Signed-off-by: Kuan-Ting Chen Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 53 ++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 83c668243c95..a1939ff7c742 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -845,13 +845,14 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn, static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn, struct smb2_preauth_neg_context *pneg_ctxt, - int len_of_ctxts) + int ctxt_len) { /* * sizeof(smb2_preauth_neg_context) assumes SMB311_SALT_SIZE Salt, * which may not be present. Only check for used HashAlgorithms[1]. */ - if (len_of_ctxts < MIN_PREAUTH_CTXT_DATA_LEN) + if (ctxt_len < + sizeof(struct smb2_neg_context) + MIN_PREAUTH_CTXT_DATA_LEN) return STATUS_INVALID_PARAMETER; if (pneg_ctxt->HashAlgorithms != SMB2_PREAUTH_INTEGRITY_SHA512) @@ -863,15 +864,23 @@ static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn, static void decode_encrypt_ctxt(struct ksmbd_conn *conn, struct smb2_encryption_neg_context *pneg_ctxt, - int len_of_ctxts) + int ctxt_len) { - int cph_cnt = le16_to_cpu(pneg_ctxt->CipherCount); - int i, cphs_size = cph_cnt * sizeof(__le16); + int cph_cnt; + int i, cphs_size; + + if (sizeof(struct smb2_encryption_neg_context) > ctxt_len) { + pr_err("Invalid SMB2_ENCRYPTION_CAPABILITIES context size\n"); + return; + } conn->cipher_type = 0; + cph_cnt = le16_to_cpu(pneg_ctxt->CipherCount); + cphs_size = cph_cnt * sizeof(__le16); + if (sizeof(struct smb2_encryption_neg_context) + cphs_size > - len_of_ctxts) { + ctxt_len) { pr_err("Invalid cipher count(%d)\n", cph_cnt); return; } @@ -919,15 +928,22 @@ static void decode_compress_ctxt(struct ksmbd_conn *conn, static void decode_sign_cap_ctxt(struct ksmbd_conn *conn, struct smb2_signing_capabilities *pneg_ctxt, - int len_of_ctxts) + int ctxt_len) { - int sign_algo_cnt = le16_to_cpu(pneg_ctxt->SigningAlgorithmCount); - int i, sign_alos_size = sign_algo_cnt * sizeof(__le16); + int sign_algo_cnt; + int i, sign_alos_size; + + if (sizeof(struct smb2_signing_capabilities) > ctxt_len) { + pr_err("Invalid SMB2_SIGNING_CAPABILITIES context length\n"); + return; + } conn->signing_negotiated = false; + sign_algo_cnt = le16_to_cpu(pneg_ctxt->SigningAlgorithmCount); + sign_alos_size = sign_algo_cnt * sizeof(__le16); if (sizeof(struct smb2_signing_capabilities) + sign_alos_size > - len_of_ctxts) { + ctxt_len) { pr_err("Invalid signing algorithm count(%d)\n", sign_algo_cnt); return; } @@ -965,18 +981,16 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn, len_of_ctxts = len_of_smb - offset; while (i++ < neg_ctxt_cnt) { - int clen; - - /* check that offset is not beyond end of SMB */ - if (len_of_ctxts == 0) - break; + int clen, ctxt_len; if (len_of_ctxts < sizeof(struct smb2_neg_context)) break; pctx = (struct smb2_neg_context *)((char *)pctx + offset); clen = le16_to_cpu(pctx->DataLength); - if (clen + sizeof(struct smb2_neg_context) > len_of_ctxts) + ctxt_len = clen + sizeof(struct smb2_neg_context); + + if (ctxt_len > len_of_ctxts) break; if (pctx->ContextType == SMB2_PREAUTH_INTEGRITY_CAPABILITIES) { @@ -987,7 +1001,7 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn, status = decode_preauth_ctxt(conn, (struct smb2_preauth_neg_context *)pctx, - len_of_ctxts); + ctxt_len); if (status != STATUS_SUCCESS) break; } else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES) { @@ -998,7 +1012,7 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn, decode_encrypt_ctxt(conn, (struct smb2_encryption_neg_context *)pctx, - len_of_ctxts); + ctxt_len); } else if (pctx->ContextType == SMB2_COMPRESSION_CAPABILITIES) { ksmbd_debug(SMB, "deassemble SMB2_COMPRESSION_CAPABILITIES context\n"); @@ -1017,9 +1031,10 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn, } else if (pctx->ContextType == SMB2_SIGNING_CAPABILITIES) { ksmbd_debug(SMB, "deassemble SMB2_SIGNING_CAPABILITIES context\n"); + decode_sign_cap_ctxt(conn, (struct smb2_signing_capabilities *)pctx, - len_of_ctxts); + ctxt_len); } /* offsets must be 8 byte aligned */ From 36322523dddb11107e9f7f528675a0dec2536103 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 19 May 2023 23:09:48 +0900 Subject: [PATCH 184/934] ksmbd: fix UAF issue from opinfo->conn If opinfo->conn is another connection and while ksmbd send oplock break request to cient on current connection, The connection for opinfo->conn can be disconnect and conn could be freed. When sending oplock break request, this ksmbd_conn can be used and cause user-after-free issue. When getting opinfo from the list, ksmbd check connection is being released. If it is not released, Increase ->r_count to wait that connection is freed. Cc: stable@vger.kernel.org Reported-by: Per Forlin Tested-by: Per Forlin Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 72 +++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 6d1ccb999893..db181bdad73a 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -157,13 +157,42 @@ static struct oplock_info *opinfo_get_list(struct ksmbd_inode *ci) rcu_read_lock(); opinfo = list_first_or_null_rcu(&ci->m_op_list, struct oplock_info, op_entry); - if (opinfo && !atomic_inc_not_zero(&opinfo->refcount)) - opinfo = NULL; + if (opinfo) { + if (!atomic_inc_not_zero(&opinfo->refcount)) + opinfo = NULL; + else { + atomic_inc(&opinfo->conn->r_count); + if (ksmbd_conn_releasing(opinfo->conn)) { + atomic_dec(&opinfo->conn->r_count); + atomic_dec(&opinfo->refcount); + opinfo = NULL; + } + } + } + rcu_read_unlock(); return opinfo; } +static void opinfo_conn_put(struct oplock_info *opinfo) +{ + struct ksmbd_conn *conn; + + if (!opinfo) + return; + + conn = opinfo->conn; + /* + * Checking waitqueue to dropping pending requests on + * disconnection. waitqueue_active is safe because it + * uses atomic operation for condition. + */ + if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q)) + wake_up(&conn->r_count_q); + opinfo_put(opinfo); +} + void opinfo_put(struct oplock_info *opinfo) { if (!atomic_dec_and_test(&opinfo->refcount)) @@ -666,13 +695,6 @@ static void __smb2_oplock_break_noti(struct work_struct *wk) out: ksmbd_free_work_struct(work); - /* - * Checking waitqueue to dropping pending requests on - * disconnection. waitqueue_active is safe because it - * uses atomic operation for condition. - */ - if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q)) - wake_up(&conn->r_count_q); } /** @@ -706,7 +728,6 @@ static int smb2_oplock_break_noti(struct oplock_info *opinfo) work->conn = conn; work->sess = opinfo->sess; - atomic_inc(&conn->r_count); if (opinfo->op_state == OPLOCK_ACK_WAIT) { INIT_WORK(&work->work, __smb2_oplock_break_noti); ksmbd_queue_work(work); @@ -776,13 +797,6 @@ static void __smb2_lease_break_noti(struct work_struct *wk) out: ksmbd_free_work_struct(work); - /* - * Checking waitqueue to dropping pending requests on - * disconnection. waitqueue_active is safe because it - * uses atomic operation for condition. - */ - if (!atomic_dec_return(&conn->r_count) && waitqueue_active(&conn->r_count_q)) - wake_up(&conn->r_count_q); } /** @@ -822,7 +836,6 @@ static int smb2_lease_break_noti(struct oplock_info *opinfo) work->conn = conn; work->sess = opinfo->sess; - atomic_inc(&conn->r_count); if (opinfo->op_state == OPLOCK_ACK_WAIT) { list_for_each_safe(tmp, t, &opinfo->interim_list) { struct ksmbd_work *in_work; @@ -1144,8 +1157,10 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, } prev_opinfo = opinfo_get_list(ci); if (!prev_opinfo || - (prev_opinfo->level == SMB2_OPLOCK_LEVEL_NONE && lctx)) + (prev_opinfo->level == SMB2_OPLOCK_LEVEL_NONE && lctx)) { + opinfo_conn_put(prev_opinfo); goto set_lev; + } prev_op_has_lease = prev_opinfo->is_lease; if (prev_op_has_lease) prev_op_state = prev_opinfo->o_lease->state; @@ -1153,19 +1168,19 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, if (share_ret < 0 && prev_opinfo->level == SMB2_OPLOCK_LEVEL_EXCLUSIVE) { err = share_ret; - opinfo_put(prev_opinfo); + opinfo_conn_put(prev_opinfo); goto err_out; } if (prev_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH && prev_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) { - opinfo_put(prev_opinfo); + opinfo_conn_put(prev_opinfo); goto op_break_not_needed; } list_add(&work->interim_entry, &prev_opinfo->interim_list); err = oplock_break(prev_opinfo, SMB2_OPLOCK_LEVEL_II); - opinfo_put(prev_opinfo); + opinfo_conn_put(prev_opinfo); if (err == -ENOENT) goto set_lev; /* Check all oplock was freed by close */ @@ -1228,14 +1243,14 @@ static void smb_break_all_write_oplock(struct ksmbd_work *work, return; if (brk_opinfo->level != SMB2_OPLOCK_LEVEL_BATCH && brk_opinfo->level != SMB2_OPLOCK_LEVEL_EXCLUSIVE) { - opinfo_put(brk_opinfo); + opinfo_conn_put(brk_opinfo); return; } brk_opinfo->open_trunc = is_trunc; list_add(&work->interim_entry, &brk_opinfo->interim_list); oplock_break(brk_opinfo, SMB2_OPLOCK_LEVEL_II); - opinfo_put(brk_opinfo); + opinfo_conn_put(brk_opinfo); } /** @@ -1263,6 +1278,13 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, list_for_each_entry_rcu(brk_op, &ci->m_op_list, op_entry) { if (!atomic_inc_not_zero(&brk_op->refcount)) continue; + + atomic_inc(&brk_op->conn->r_count); + if (ksmbd_conn_releasing(brk_op->conn)) { + atomic_dec(&brk_op->conn->r_count); + continue; + } + rcu_read_unlock(); if (brk_op->is_lease && (brk_op->o_lease->state & (~(SMB2_LEASE_READ_CACHING_LE | @@ -1292,7 +1314,7 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, brk_op->open_trunc = is_trunc; oplock_break(brk_op, SMB2_OPLOCK_LEVEL_NONE); next: - opinfo_put(brk_op); + opinfo_conn_put(brk_op); rcu_read_lock(); } rcu_read_unlock(); From 6cc2268f5647cbfde3d4fc2e4ee005070ea3a8d2 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 19 May 2023 23:11:33 +0900 Subject: [PATCH 185/934] ksmbd: fix incorrect AllocationSize set in smb2_get_info If filesystem support sparse file, ksmbd should return allocated size using ->i_blocks instead of stat->size. This fix generic/694 xfstests. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index a1939ff7c742..7a81541de602 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4369,21 +4369,6 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp, return 0; } -static unsigned long long get_allocation_size(struct inode *inode, - struct kstat *stat) -{ - unsigned long long alloc_size = 0; - - if (!S_ISDIR(stat->mode)) { - if ((inode->i_blocks << 9) <= stat->size) - alloc_size = stat->size; - else - alloc_size = inode->i_blocks << 9; - } - - return alloc_size; -} - static void get_file_standard_info(struct smb2_query_info_rsp *rsp, struct ksmbd_file *fp, void *rsp_org) { @@ -4398,7 +4383,7 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp, sinfo = (struct smb2_file_standard_info *)rsp->Buffer; delete_pending = ksmbd_inode_pending_delete(fp); - sinfo->AllocationSize = cpu_to_le64(get_allocation_size(inode, &stat)); + sinfo->AllocationSize = cpu_to_le64(inode->i_blocks << 9); sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending); sinfo->DeletePending = delete_pending; @@ -4463,7 +4448,7 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->Attributes = fp->f_ci->m_fattr; file_info->Pad1 = 0; file_info->AllocationSize = - cpu_to_le64(get_allocation_size(inode, &stat)); + cpu_to_le64(inode->i_blocks << 9); file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); file_info->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending); @@ -4652,7 +4637,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp, file_info->ChangeTime = cpu_to_le64(time); file_info->Attributes = fp->f_ci->m_fattr; file_info->AllocationSize = - cpu_to_le64(get_allocation_size(inode, &stat)); + cpu_to_le64(inode->i_blocks << 9); file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); file_info->Reserved = cpu_to_le32(0); rsp->OutputBufferLength = From 6fe55c2799bc29624770c26f98ba7b06214f43e0 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 25 May 2023 00:13:38 +0900 Subject: [PATCH 186/934] ksmbd: call putname after using the last component last component point filename struct. Currently putname is called after vfs_path_parent_lookup(). And then last component is used for lookup_one_qstr_excl(). name in last component is freed by previous calling putname(). And It cause file lookup failure when testing generic/464 test of xfstest. Fixes: 74d7970febf7 ("ksmbd: fix racy issue from using ->d_parent and ->d_name") Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/vfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 9bdb01c5b201..6f302919e9f7 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -86,12 +86,14 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, err = vfs_path_parent_lookup(filename, flags, &parent_path, &last, &type, root_share_path); - putname(filename); - if (err) + if (err) { + putname(filename); return err; + } if (unlikely(type != LAST_NORM)) { path_put(&parent_path); + putname(filename); return -ENOENT; } @@ -108,12 +110,14 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf, path->dentry = d; path->mnt = share_conf->vfs_path.mnt; path_put(&parent_path); + putname(filename); return 0; err_out: inode_unlock(parent_path.dentry->d_inode); path_put(&parent_path); + putname(filename); return -ENOENT; } From 396ac4c982f6d6cd7c0293a546a0ba5d77fe7f90 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 May 2023 15:01:33 +0300 Subject: [PATCH 187/934] smb: delete an unnecessary statement We don't need to set the list iterators to NULL before a list_for_each_entry() loop because they are assigned inside the macro. Signed-off-by: Dan Carpenter Reviewed-by: Mukesh Ojha Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 5065398665f1..6e3be58cfe49 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -618,7 +618,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, * Add a new one instead */ spin_lock(&ses->iface_lock); - iface = niface = NULL; list_for_each_entry_safe(iface, niface, &ses->iface_list, iface_head) { ret = iface_cmp(iface, &tmp_iface); From 4ea0bf4b98d66a7a790abb285539f395596bae92 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Sat, 6 May 2023 11:55:02 +0200 Subject: [PATCH 188/934] io_uring: undeprecate epoll_ctl support Libuv recently started using it so there is at least one consumer now. Cc: stable@vger.kernel.org Fixes: 61a2732af4b0 ("io_uring: deprecate epoll_ctl support") Link: https://github.com/libuv/libuv/pull/3979 Signed-off-by: Ben Noordhuis Link: https://lore.kernel.org/r/20230506095502.13401-1-info@bnoordhuis.nl Signed-off-by: Jens Axboe --- io_uring/epoll.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/io_uring/epoll.c b/io_uring/epoll.c index 9aa74d2c80bc..89bff2068a19 100644 --- a/io_uring/epoll.c +++ b/io_uring/epoll.c @@ -25,10 +25,6 @@ int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_epoll *epoll = io_kiocb_to_cmd(req, struct io_epoll); - pr_warn_once("%s: epoll_ctl support in io_uring is deprecated and will " - "be removed in a future Linux kernel version.\n", - current->comm); - if (sqe->buf_index || sqe->splice_fd_in) return -EINVAL; From 3d49f7406b5d9822c1411c6658bac2ae55ba19a2 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 17 May 2023 17:16:34 +0530 Subject: [PATCH 189/934] EDAC/qcom: Remove superfluous return variable assignment in qcom_llcc_core_setup() "ret" variable will be assigned on both success and failure cases. So there is no need to initialize it during start of qcom_llcc_core_setup(). Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230517114635.76358-2-manivannan.sadhasivam@linaro.org --- drivers/edac/qcom_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c index 265e0fb39bc7..6140001f21c4 100644 --- a/drivers/edac/qcom_edac.c +++ b/drivers/edac/qcom_edac.c @@ -170,7 +170,7 @@ static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap) static int qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv) { - int ret = 0; + int ret; switch (err_type) { case LLCC_DRAM_CE: From cbd77119b6355872cd308a60e99f9ca678435d15 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 17 May 2023 17:16:35 +0530 Subject: [PATCH 190/934] EDAC/qcom: Get rid of hardcoded register offsets The LLCC EDAC register offsets varies between each SoC. Hardcoding the register offsets won't work and will often result in crash due to accessing the wrong locations. Hence, get the register offsets from the LLCC driver matching the individual SoCs. Cc: # 6.0: 5365cea199c7 ("soc: qcom: llcc: Rename reg_offset structs to reflect LLCC version") Cc: # 6.0: c13d7d261e36 ("soc: qcom: llcc: Pass LLCC version based register offsets to EDAC driver") Cc: # 6.0 Fixes: a6e9d7ef252c ("soc: qcom: llcc: Add configuration data for SM8450 SoC") Acked-by: Borislav Petkov (AMD) Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230517114635.76358-3-manivannan.sadhasivam@linaro.org --- drivers/edac/qcom_edac.c | 116 ++++++++++++++--------------- include/linux/soc/qcom/llcc-qcom.h | 6 -- 2 files changed, 58 insertions(+), 64 deletions(-) diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c index 6140001f21c4..b2db545c6810 100644 --- a/drivers/edac/qcom_edac.c +++ b/drivers/edac/qcom_edac.c @@ -21,30 +21,9 @@ #define TRP_SYN_REG_CNT 6 #define DRP_SYN_REG_CNT 8 -#define LLCC_COMMON_STATUS0 0x0003000c #define LLCC_LB_CNT_MASK GENMASK(31, 28) #define LLCC_LB_CNT_SHIFT 28 -/* Single & double bit syndrome register offsets */ -#define TRP_ECC_SB_ERR_SYN0 0x0002304c -#define TRP_ECC_DB_ERR_SYN0 0x00020370 -#define DRP_ECC_SB_ERR_SYN0 0x0004204c -#define DRP_ECC_DB_ERR_SYN0 0x00042070 - -/* Error register offsets */ -#define TRP_ECC_ERROR_STATUS1 0x00020348 -#define TRP_ECC_ERROR_STATUS0 0x00020344 -#define DRP_ECC_ERROR_STATUS1 0x00042048 -#define DRP_ECC_ERROR_STATUS0 0x00042044 - -/* TRP, DRP interrupt register offsets */ -#define DRP_INTERRUPT_STATUS 0x00041000 -#define TRP_INTERRUPT_0_STATUS 0x00020480 -#define DRP_INTERRUPT_CLEAR 0x00041008 -#define DRP_ECC_ERROR_CNTR_CLEAR 0x00040004 -#define TRP_INTERRUPT_0_CLEAR 0x00020484 -#define TRP_ECC_ERROR_CNTR_CLEAR 0x00020440 - /* Mask and shift macros */ #define ECC_DB_ERR_COUNT_MASK GENMASK(4, 0) #define ECC_DB_ERR_WAYS_MASK GENMASK(31, 16) @@ -60,15 +39,6 @@ #define DRP_TRP_INT_CLEAR GENMASK(1, 0) #define DRP_TRP_CNT_CLEAR GENMASK(1, 0) -/* Config registers offsets*/ -#define DRP_ECC_ERROR_CFG 0x00040000 - -/* Tag RAM, Data RAM interrupt register offsets */ -#define CMN_INTERRUPT_0_ENABLE 0x0003001c -#define CMN_INTERRUPT_2_ENABLE 0x0003003c -#define TRP_INTERRUPT_0_ENABLE 0x00020488 -#define DRP_INTERRUPT_ENABLE 0x0004100c - #define SB_ERROR_THRESHOLD 0x1 #define SB_ERROR_THRESHOLD_SHIFT 24 #define SB_DB_TRP_INTERRUPT_ENABLE 0x3 @@ -88,9 +58,6 @@ enum { static const struct llcc_edac_reg_data edac_reg_data[] = { [LLCC_DRAM_CE] = { .name = "DRAM Single-bit", - .synd_reg = DRP_ECC_SB_ERR_SYN0, - .count_status_reg = DRP_ECC_ERROR_STATUS1, - .ways_status_reg = DRP_ECC_ERROR_STATUS0, .reg_cnt = DRP_SYN_REG_CNT, .count_mask = ECC_SB_ERR_COUNT_MASK, .ways_mask = ECC_SB_ERR_WAYS_MASK, @@ -98,9 +65,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = { }, [LLCC_DRAM_UE] = { .name = "DRAM Double-bit", - .synd_reg = DRP_ECC_DB_ERR_SYN0, - .count_status_reg = DRP_ECC_ERROR_STATUS1, - .ways_status_reg = DRP_ECC_ERROR_STATUS0, .reg_cnt = DRP_SYN_REG_CNT, .count_mask = ECC_DB_ERR_COUNT_MASK, .ways_mask = ECC_DB_ERR_WAYS_MASK, @@ -108,9 +72,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = { }, [LLCC_TRAM_CE] = { .name = "TRAM Single-bit", - .synd_reg = TRP_ECC_SB_ERR_SYN0, - .count_status_reg = TRP_ECC_ERROR_STATUS1, - .ways_status_reg = TRP_ECC_ERROR_STATUS0, .reg_cnt = TRP_SYN_REG_CNT, .count_mask = ECC_SB_ERR_COUNT_MASK, .ways_mask = ECC_SB_ERR_WAYS_MASK, @@ -118,9 +79,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = { }, [LLCC_TRAM_UE] = { .name = "TRAM Double-bit", - .synd_reg = TRP_ECC_DB_ERR_SYN0, - .count_status_reg = TRP_ECC_ERROR_STATUS1, - .ways_status_reg = TRP_ECC_ERROR_STATUS0, .reg_cnt = TRP_SYN_REG_CNT, .count_mask = ECC_DB_ERR_COUNT_MASK, .ways_mask = ECC_DB_ERR_WAYS_MASK, @@ -128,7 +86,7 @@ static const struct llcc_edac_reg_data edac_reg_data[] = { }, }; -static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap) +static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_bcast_regmap) { u32 sb_err_threshold; int ret; @@ -137,31 +95,31 @@ static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap) * Configure interrupt enable registers such that Tag, Data RAM related * interrupts are propagated to interrupt controller for servicing */ - ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE, + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable, TRP0_INTERRUPT_ENABLE, TRP0_INTERRUPT_ENABLE); if (ret) return ret; - ret = regmap_update_bits(llcc_bcast_regmap, TRP_INTERRUPT_0_ENABLE, + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->trp_interrupt_0_enable, SB_DB_TRP_INTERRUPT_ENABLE, SB_DB_TRP_INTERRUPT_ENABLE); if (ret) return ret; sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT); - ret = regmap_write(llcc_bcast_regmap, DRP_ECC_ERROR_CFG, + ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_ecc_error_cfg, sb_err_threshold); if (ret) return ret; - ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE, + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable, DRP0_INTERRUPT_ENABLE, DRP0_INTERRUPT_ENABLE); if (ret) return ret; - ret = regmap_write(llcc_bcast_regmap, DRP_INTERRUPT_ENABLE, + ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_interrupt_enable, SB_DB_DRP_INTERRUPT_ENABLE); return ret; } @@ -175,24 +133,28 @@ qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv) switch (err_type) { case LLCC_DRAM_CE: case LLCC_DRAM_UE: - ret = regmap_write(drv->bcast_regmap, DRP_INTERRUPT_CLEAR, + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->drp_interrupt_clear, DRP_TRP_INT_CLEAR); if (ret) return ret; - ret = regmap_write(drv->bcast_regmap, DRP_ECC_ERROR_CNTR_CLEAR, + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->drp_ecc_error_cntr_clear, DRP_TRP_CNT_CLEAR); if (ret) return ret; break; case LLCC_TRAM_CE: case LLCC_TRAM_UE: - ret = regmap_write(drv->bcast_regmap, TRP_INTERRUPT_0_CLEAR, + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->trp_interrupt_0_clear, DRP_TRP_INT_CLEAR); if (ret) return ret; - ret = regmap_write(drv->bcast_regmap, TRP_ECC_ERROR_CNTR_CLEAR, + ret = regmap_write(drv->bcast_regmap, + drv->edac_reg_offset->trp_ecc_error_cntr_clear, DRP_TRP_CNT_CLEAR); if (ret) return ret; @@ -205,16 +167,54 @@ qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv) return ret; } +struct qcom_llcc_syn_regs { + u32 synd_reg; + u32 count_status_reg; + u32 ways_status_reg; +}; + +static void get_reg_offsets(struct llcc_drv_data *drv, int err_type, + struct qcom_llcc_syn_regs *syn_regs) +{ + const struct llcc_edac_reg_offset *edac_reg_offset = drv->edac_reg_offset; + + switch (err_type) { + case LLCC_DRAM_CE: + syn_regs->synd_reg = edac_reg_offset->drp_ecc_sb_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0; + break; + case LLCC_DRAM_UE: + syn_regs->synd_reg = edac_reg_offset->drp_ecc_db_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0; + break; + case LLCC_TRAM_CE: + syn_regs->synd_reg = edac_reg_offset->trp_ecc_sb_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0; + break; + case LLCC_TRAM_UE: + syn_regs->synd_reg = edac_reg_offset->trp_ecc_db_err_syn0; + syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1; + syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0; + break; + } +} + /* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/ static int dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type) { struct llcc_edac_reg_data reg_data = edac_reg_data[err_type]; + struct qcom_llcc_syn_regs regs = { }; int err_cnt, err_ways, ret, i; u32 synd_reg, synd_val; + get_reg_offsets(drv, err_type, ®s); + for (i = 0; i < reg_data.reg_cnt; i++) { - synd_reg = reg_data.synd_reg + (i * 4); + synd_reg = regs.synd_reg + (i * 4); ret = regmap_read(drv->regmaps[bank], synd_reg, &synd_val); if (ret) @@ -224,7 +224,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type) reg_data.name, i, synd_val); } - ret = regmap_read(drv->regmaps[bank], reg_data.count_status_reg, + ret = regmap_read(drv->regmaps[bank], regs.count_status_reg, &err_cnt); if (ret) goto clear; @@ -234,7 +234,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type) edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n", reg_data.name, err_cnt); - ret = regmap_read(drv->regmaps[bank], reg_data.ways_status_reg, + ret = regmap_read(drv->regmaps[bank], regs.ways_status_reg, &err_ways); if (ret) goto clear; @@ -295,7 +295,7 @@ static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl) /* Iterate over the banks and look for Tag RAM or Data RAM errors */ for (i = 0; i < drv->num_banks; i++) { - ret = regmap_read(drv->regmaps[i], DRP_INTERRUPT_STATUS, + ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->drp_interrupt_status, &drp_error); if (!ret && (drp_error & SB_ECC_ERROR)) { @@ -310,7 +310,7 @@ static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl) if (!ret) irq_rc = IRQ_HANDLED; - ret = regmap_read(drv->regmaps[i], TRP_INTERRUPT_0_STATUS, + ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->trp_interrupt_0_status, &trp_error); if (!ret && (trp_error & SB_ECC_ERROR)) { @@ -342,7 +342,7 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev) int ecc_irq; int rc; - rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap); + rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap); if (rc) return rc; diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 423220e66026..93417ba1ead4 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -69,9 +69,6 @@ struct llcc_slice_desc { /** * struct llcc_edac_reg_data - llcc edac registers data for each error type * @name: Name of the error - * @synd_reg: Syndrome register address - * @count_status_reg: Status register address to read the error count - * @ways_status_reg: Status register address to read the error ways * @reg_cnt: Number of registers * @count_mask: Mask value to get the error count * @ways_mask: Mask value to get the error ways @@ -80,9 +77,6 @@ struct llcc_slice_desc { */ struct llcc_edac_reg_data { char *name; - u64 synd_reg; - u64 count_status_reg; - u64 ways_status_reg; u32 reg_cnt; u32 count_mask; u32 ways_mask; From 780328abc0cbde7398d3c04be56fbb5f89ed10b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 26 May 2023 07:41:23 +0200 Subject: [PATCH 191/934] fbdev: matroxfb ssd1307fb: Switch i2c drivers back to use .probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit b8a1a4cd5a98 ("i2c: Provide a temporary .probe_new() call-back type"), all drivers being converted to .probe_new() and then 03c835f498b5 ("i2c: Switch .probe() to not take an id parameter") convert back to (the new) .probe() to be able to eventually drop .probe_new() from struct i2c_driver. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/matrox/matroxfb_maven.c | 2 +- drivers/video/fbdev/ssd1307fb.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/matrox/matroxfb_maven.c b/drivers/video/fbdev/matrox/matroxfb_maven.c index 727a10a59811..b15a8ad92ba7 100644 --- a/drivers/video/fbdev/matrox/matroxfb_maven.c +++ b/drivers/video/fbdev/matrox/matroxfb_maven.c @@ -1291,7 +1291,7 @@ static struct i2c_driver maven_driver={ .driver = { .name = "maven", }, - .probe_new = maven_probe, + .probe = maven_probe, .remove = maven_remove, .id_table = maven_id, }; diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c index 046b9990d27c..132d1a205011 100644 --- a/drivers/video/fbdev/ssd1307fb.c +++ b/drivers/video/fbdev/ssd1307fb.c @@ -844,7 +844,7 @@ static const struct i2c_device_id ssd1307fb_i2c_id[] = { MODULE_DEVICE_TABLE(i2c, ssd1307fb_i2c_id); static struct i2c_driver ssd1307fb_driver = { - .probe_new = ssd1307fb_probe, + .probe = ssd1307fb_probe, .remove = ssd1307fb_remove, .id_table = ssd1307fb_i2c_id, .driver = { From 30bc32c7c1f975cc3c14e1c7dc437266311282cf Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 24 May 2023 16:39:32 +0200 Subject: [PATCH 192/934] wifi: mt76: mt7615: fix possible race in mt7615_mac_sta_poll Grab sta_poll_lock spinlock in mt7615_mac_sta_poll routine in order to avoid possible races with mt7615_mac_add_txs() or mt7615_mac_fill_rx() removing msta pointer from sta_poll_list. Fixes: a621372a04ac ("mt76: mt7615: rework mt7615_mac_sta_poll for usb code") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/48b23404b759de4f1db2ef85975c72a4aeb1097c.1684938695.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index da1d17b73a25..64002484ccad 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -914,7 +914,10 @@ void mt7615_mac_sta_poll(struct mt7615_dev *dev) msta = list_first_entry(&sta_poll_list, struct mt7615_sta, poll_list); + + spin_lock_bh(&dev->sta_poll_lock); list_del_init(&msta->poll_list); + spin_unlock_bh(&dev->sta_poll_lock); addr = mt7615_mac_wtbl_addr(dev, msta->wcid.idx) + 19 * 4; From fdd7d1fff4e3b97c4706f4c0e000a38b134b7ae5 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 27 May 2023 03:33:23 -0500 Subject: [PATCH 193/934] cifs: address unused variable warning Fix trivial unused variable warning (when SMB1 support disabled) "ioctl.c:324:17: warning: variable 'caps' set but not used [-Wunused-but-set-variable]" Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202305250056.oZhsJmdD-lkp@intel.com/ Signed-off-by: Steve French --- fs/smb/client/ioctl.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index cb3be58cd55e..fff092bbc7a3 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -321,7 +321,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) struct tcon_link *tlink; struct cifs_sb_info *cifs_sb; __u64 ExtAttrBits = 0; +#ifdef CONFIG_CIFS_POSIX +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY __u64 caps; +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ +#endif /* CONFIG_CIFS_POSIX */ xid = get_xid(); @@ -331,9 +335,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) if (pSMBFile == NULL) break; tcon = tlink_tcon(pSMBFile->tlink); - caps = le64_to_cpu(tcon->fsUnixInfo.Capability); #ifdef CONFIG_CIFS_POSIX #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + caps = le64_to_cpu(tcon->fsUnixInfo.Capability); if (CIFS_UNIX_EXTATTR_CAP & caps) { __u64 ExtAttrMask = 0; rc = CIFSGetExtAttr(xid, tcon, From 700581ede41d029403feec935df4616309696fd7 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 15 May 2023 15:48:59 +0800 Subject: [PATCH 194/934] soundwire: dmi-quirks: add new mapping for HP Spectre x360 A BIOS/DMI update seems to have broken some devices, let's add a new mapping. Link: https://github.com/thesofproject/linux/issues/4323 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230515074859.3097-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/dmi-quirks.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c index 58ea013fa918..2a1096dab63d 100644 --- a/drivers/soundwire/dmi-quirks.c +++ b/drivers/soundwire/dmi-quirks.c @@ -99,6 +99,13 @@ static const struct dmi_system_id adr_remap_quirk_table[] = { }, .driver_data = (void *)intel_tgl_bios, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8709"), + }, + .driver_data = (void *)intel_tgl_bios, + }, { /* quirk used for NUC15 'Bishop County' LAPBC510 and LAPBC710 skews */ .matches = { From 99e09b9c0ab43346c52f2787ca4e5c4b1798362e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 17 May 2023 18:37:36 +0200 Subject: [PATCH 195/934] soundwire: qcom: add proper error paths in qcom_swrm_startup() Reverse actions in qcom_swrm_startup() error paths to avoid leaking stream memory and keeping runtime PM unbalanced. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230517163736.997553-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul --- drivers/soundwire/qcom.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index c296e0bf897b..280455f047a3 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -1099,8 +1099,10 @@ static int qcom_swrm_startup(struct snd_pcm_substream *substream, } sruntime = sdw_alloc_stream(dai->name); - if (!sruntime) - return -ENOMEM; + if (!sruntime) { + ret = -ENOMEM; + goto err_alloc; + } ctrl->sruntime[dai->id] = sruntime; @@ -1110,12 +1112,19 @@ static int qcom_swrm_startup(struct snd_pcm_substream *substream, if (ret < 0 && ret != -ENOTSUPP) { dev_err(dai->dev, "Failed to set sdw stream on %s\n", codec_dai->name); - sdw_release_stream(sruntime); - return ret; + goto err_set_stream; } } return 0; + +err_set_stream: + sdw_release_stream(sruntime); +err_alloc: + pm_runtime_mark_last_busy(ctrl->dev); + pm_runtime_put_autosuspend(ctrl->dev); + + return ret; } static void qcom_swrm_shutdown(struct snd_pcm_substream *substream, From 2b28fc688cdff225c41cdd22857500e187453ed7 Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Thu, 18 May 2023 08:54:21 -0500 Subject: [PATCH 196/934] arm64: dts: imx8qm-mek: correct GPIOs for USDHC2 CD and WP signals The USDHC2 CD and WP sginal should be on LSIO_GPIO5. Fixes: 307fd14d4b14 ("arm64: dts: imx: add imx8qm mek support") Signed-off-by: Shenwei Wang Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8qm-mek.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts index ce9d3f0b98fc..607cd6b4e972 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts @@ -82,8 +82,8 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; vmmc-supply = <®_usdhc2_vmmc>; - cd-gpios = <&lsio_gpio4 22 GPIO_ACTIVE_LOW>; - wp-gpios = <&lsio_gpio4 21 GPIO_ACTIVE_HIGH>; + cd-gpios = <&lsio_gpio5 22 GPIO_ACTIVE_LOW>; + wp-gpios = <&lsio_gpio5 21 GPIO_ACTIVE_HIGH>; status = "okay"; }; From ca50d7765587fe0a8351a6e8d9742cfd4811d925 Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Fri, 26 May 2023 10:38:54 -0500 Subject: [PATCH 197/934] arm64: dts: imx8-ss-dma: assign default clock rate for lpuarts Add the assigned-clocks and assigned-clock-rates properties for the LPUARTx nodes. Without these properties, the default clock rate used would be 0, which can cause the UART ports to fail when open. Fixes: 35f4e9d7530f ("arm64: dts: imx8: split adma ss into dma and audio ss") Signed-off-by: Shenwei Wang Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi index 2dce8f2ee3ea..adb98a72bdfd 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi @@ -90,6 +90,8 @@ dma_subsys: bus@5a000000 { clocks = <&uart0_lpcg IMX_LPCG_CLK_4>, <&uart0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "baud"; + assigned-clocks = <&clk IMX_SC_R_UART_0 IMX_SC_PM_CLK_PER>; + assigned-clock-rates = <80000000>; power-domains = <&pd IMX_SC_R_UART_0>; status = "disabled"; }; @@ -100,6 +102,8 @@ dma_subsys: bus@5a000000 { clocks = <&uart1_lpcg IMX_LPCG_CLK_4>, <&uart1_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "baud"; + assigned-clocks = <&clk IMX_SC_R_UART_1 IMX_SC_PM_CLK_PER>; + assigned-clock-rates = <80000000>; power-domains = <&pd IMX_SC_R_UART_1>; status = "disabled"; }; @@ -110,6 +114,8 @@ dma_subsys: bus@5a000000 { clocks = <&uart2_lpcg IMX_LPCG_CLK_4>, <&uart2_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "baud"; + assigned-clocks = <&clk IMX_SC_R_UART_2 IMX_SC_PM_CLK_PER>; + assigned-clock-rates = <80000000>; power-domains = <&pd IMX_SC_R_UART_2>; status = "disabled"; }; @@ -120,6 +126,8 @@ dma_subsys: bus@5a000000 { clocks = <&uart3_lpcg IMX_LPCG_CLK_4>, <&uart3_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "baud"; + assigned-clocks = <&clk IMX_SC_R_UART_3 IMX_SC_PM_CLK_PER>; + assigned-clock-rates = <80000000>; power-domains = <&pd IMX_SC_R_UART_3>; status = "disabled"; }; From 5cf9a090a39c97f4506b7b53739d469b1c05a7e9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 27 May 2023 11:28:36 +0200 Subject: [PATCH 198/934] fbdev: imsttfb: Release framebuffer and dealloc cmap on error path Add missing cleanups in error path. Signed-off-by: Helge Deller --- drivers/video/fbdev/imsttfb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c index 975dd682fae4..075f11991281 100644 --- a/drivers/video/fbdev/imsttfb.c +++ b/drivers/video/fbdev/imsttfb.c @@ -1452,9 +1452,13 @@ static int init_imstt(struct fb_info *info) FBINFO_HWACCEL_FILLRECT | FBINFO_HWACCEL_YPAN; - fb_alloc_cmap(&info->cmap, 0, 0); + if (fb_alloc_cmap(&info->cmap, 0, 0)) { + framebuffer_release(info); + return -ENODEV; + } if (register_framebuffer(info) < 0) { + fb_dealloc_cmap(&info->cmap); framebuffer_release(info); return -ENODEV; } From 518ecb6a209f6ff678aeadf9f2bf870c0982ca85 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 27 May 2023 11:37:29 +0200 Subject: [PATCH 199/934] fbdev: imsttfb: Fix error path of imsttfb_probe() Release ressources when init_imstt() returns failure. Signed-off-by: Helge Deller --- drivers/video/fbdev/imsttfb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c index 075f11991281..ee7d01ad1406 100644 --- a/drivers/video/fbdev/imsttfb.c +++ b/drivers/video/fbdev/imsttfb.c @@ -1535,8 +1535,10 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto error; info->pseudo_palette = par->palette; ret = init_imstt(info); - if (!ret) - pci_set_drvdata(pdev, info); + if (ret) + goto error; + + pci_set_drvdata(pdev, info); return ret; error: From d78bd6cc68276bd57f766f7cb98bfe32c23ab327 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 27 May 2023 08:41:09 +0200 Subject: [PATCH 200/934] fbcon: Fix null-ptr-deref in soft_cursor syzbot repored this bug in the softcursor code: BUG: KASAN: null-ptr-deref in soft_cursor+0x384/0x6b4 drivers/video/fbdev/core/softcursor.c:70 Read of size 16 at addr 0000000000000200 by task kworker/u4:1/12 CPU: 0 PID: 12 Comm: kworker/u4:1 Not tainted 6.4.0-rc3-syzkaller-geb0f1697d729 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/28/2023 Workqueue: events_power_efficient fb_flashcursor Call trace: dump_backtrace+0x1b8/0x1e4 arch/arm64/kernel/stacktrace.c:233 show_stack+0x2c/0x44 arch/arm64/kernel/stacktrace.c:240 __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd0/0x124 lib/dump_stack.c:106 print_report+0xe4/0x514 mm/kasan/report.c:465 kasan_report+0xd4/0x130 mm/kasan/report.c:572 kasan_check_range+0x264/0x2a4 mm/kasan/generic.c:187 __asan_memcpy+0x3c/0x84 mm/kasan/shadow.c:105 soft_cursor+0x384/0x6b4 drivers/video/fbdev/core/softcursor.c:70 bit_cursor+0x113c/0x1a64 drivers/video/fbdev/core/bitblit.c:377 fb_flashcursor+0x35c/0x54c drivers/video/fbdev/core/fbcon.c:380 process_one_work+0x788/0x12d4 kernel/workqueue.c:2405 worker_thread+0x8e0/0xfe8 kernel/workqueue.c:2552 kthread+0x288/0x310 kernel/kthread.c:379 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:853 This fix let bit_cursor() bail out early when a font bitmap isn't available yet. Signed-off-by: Helge Deller Reported-by: syzbot+d910bd780e6efac35869@syzkaller.appspotmail.com Acked-by: Sam Ravnborg Cc: stable@kernel.org --- drivers/video/fbdev/core/bitblit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c index f98e8f298bc1..8587c9da0670 100644 --- a/drivers/video/fbdev/core/bitblit.c +++ b/drivers/video/fbdev/core/bitblit.c @@ -247,6 +247,9 @@ static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode, cursor.set = 0; + if (!vc->vc_font.data) + return; + c = scr_readw((u16 *) vc->vc_pos); attribute = get_attribute(info, c); src = vc->vc_font.data + ((c & charmask) * (w * vc->vc_font.height)); From b3e6bcb94590dea45396b9481e47b809b1be4afa Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 23 May 2023 23:49:48 -0400 Subject: [PATCH 201/934] ext4: add EA_INODE checking to ext4_iget() Add a new flag, EXT4_IGET_EA_INODE which indicates whether the inode is expected to have the EA_INODE flag or not. If the flag is not set/clear as expected, then fail the iget() operation and mark the file system as corrupted. This commit also makes the ext4_iget() always perform the is_bad_inode() check even when the inode is already inode cache. This allows us to remove the is_bad_inode() check from the callers of ext4_iget() in the ea_inode code. Reported-by: syzbot+cbb68193bdb95af4340a@syzkaller.appspotmail.com Reported-by: syzbot+62120febbd1ee3c3c860@syzkaller.appspotmail.com Reported-by: syzbot+edce54daffee36421b4c@syzkaller.appspotmail.com Cc: stable@kernel.org Signed-off-by: Theodore Ts'o Link: https://lore.kernel.org/r/20230524034951.779531-2-tytso@mit.edu Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 3 ++- fs/ext4/inode.c | 31 ++++++++++++++++++++++++++----- fs/ext4/xattr.c | 36 +++++++----------------------------- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6948d673bba2..9525c52b78dc 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2901,7 +2901,8 @@ typedef enum { EXT4_IGET_NORMAL = 0, EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */ EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */ - EXT4_IGET_BAD = 0x0004 /* Allow to iget a bad inode */ + EXT4_IGET_BAD = 0x0004, /* Allow to iget a bad inode */ + EXT4_IGET_EA_INODE = 0x0008 /* Inode should contain an EA value */ } ext4_iget_flags; extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ce5f21b6c2b3..258f3cbed347 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4641,6 +4641,21 @@ static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val) inode_set_iversion_queried(inode, val); } +static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags) + +{ + if (flags & EXT4_IGET_EA_INODE) { + if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) + return "missing EA_INODE flag"; + } else { + if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) + return "unexpected EA_INODE flag"; + } + if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) + return "unexpected bad inode w/o EXT4_IGET_BAD"; + return NULL; +} + struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ext4_iget_flags flags, const char *function, unsigned int line) @@ -4650,6 +4665,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, struct ext4_inode_info *ei; struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct inode *inode; + const char *err_str; journal_t *journal = EXT4_SB(sb)->s_journal; long ret; loff_t size; @@ -4677,8 +4693,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, inode = iget_locked(sb, ino); if (!inode) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) + if (!(inode->i_state & I_NEW)) { + if ((err_str = check_igot_inode(inode, flags)) != NULL) { + ext4_error_inode(inode, function, line, 0, err_str); + iput(inode); + return ERR_PTR(-EFSCORRUPTED); + } return inode; + } ei = EXT4_I(inode); iloc.bh = NULL; @@ -4944,10 +4966,9 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) ext4_error_inode(inode, function, line, 0, "casefold flag without casefold feature"); - if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) { - ext4_error_inode(inode, function, line, 0, - "bad inode without EXT4_IGET_BAD flag"); - ret = -EUCLEAN; + if ((err_str = check_igot_inode(inode, flags)) != NULL) { + ext4_error_inode(inode, function, line, 0, err_str); + ret = -EFSCORRUPTED; goto bad_inode; } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index dfc2e223bd10..a27208129a80 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -433,7 +433,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, return -EFSCORRUPTED; } - inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL); + inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_EA_INODE); if (IS_ERR(inode)) { err = PTR_ERR(inode); ext4_error(parent->i_sb, @@ -441,23 +441,6 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, err); return err; } - - if (is_bad_inode(inode)) { - ext4_error(parent->i_sb, - "error while reading EA inode %lu is_bad_inode", - ea_ino); - err = -EIO; - goto error; - } - - if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { - ext4_error(parent->i_sb, - "EA inode %lu does not have EXT4_EA_INODE_FL flag", - ea_ino); - err = -EINVAL; - goto error; - } - ext4_xattr_inode_set_class(inode); /* @@ -478,9 +461,6 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, *ea_inode = inode; return 0; -error: - iput(inode); - return err; } /* Remove entry from mbcache when EA inode is getting evicted */ @@ -1556,11 +1536,10 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, while (ce) { ea_inode = ext4_iget(inode->i_sb, ce->e_value, - EXT4_IGET_NORMAL); - if (!IS_ERR(ea_inode) && - !is_bad_inode(ea_inode) && - (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) && - i_size_read(ea_inode) == value_len && + EXT4_IGET_EA_INODE); + if (IS_ERR(ea_inode)) + goto next_entry; + if (i_size_read(ea_inode) == value_len && !ext4_xattr_inode_read(ea_inode, ea_data, value_len) && !ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data, value_len) && @@ -1570,9 +1549,8 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, kvfree(ea_data); return ea_inode; } - - if (!IS_ERR(ea_inode)) - iput(ea_inode); + iput(ea_inode); + next_entry: ce = mb_cache_entry_find_next(ea_inode_cache, ce); } kvfree(ea_data); From 36e4fc57fc1619f462e669e939209c45763bc8f5 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Sun, 28 May 2023 19:36:02 +0200 Subject: [PATCH 202/934] efi: Bump stub image version for macOS HVF compatibility The macOS hypervisor framework includes a host-side VMM called VZLinuxBootLoader [1] which implements native support for booting the Linux kernel inside a guest directly (instead of, e.g., via GRUB installed inside the guest). On x86, it incorporates a BIOS style loader that does not implement or expose EFI to the loaded kernel. However, this loader appears to fail when the 'image minor version' field in the kernel image's PE/COFF header (which is generally only used by EFI based bootloaders) is set to any value other than 0x0. [2] Commit e346bebbd36b1576 ("efi: libstub: Always enable initrd command line loader and bump version") incremented the EFI stub image minor version to convey that all EFI stub kernels now implement support for the initrd= command line option, and do so in a way where it can load initrd images from any filesystem known to the EFI firmware (as opposed to prior implementations that could only load initrds from the same volume that the kernel image was loaded from). Unfortunately, bumping the version to v1.1 triggers this issue in VZLinuxBootLoader, breaking the boot on x86. So let's keep the image minor version at 0x0, and bump the image major version instead. While at it, convert this field to a bit field, so that individual features are discoverable from it, as suggested by Linus. So let's bump the major version to v3, and document the initrd= command line loading feature as being represented by bit 1 in the mask. Note that, due to the prior interpretation as a monotonically increasing version field, loaders are still permitted to assume that the LoadFile2 initrd loading feature is supported for any major version value >= 1, even if bit 0 is not set. [1] https://developer.apple.com/documentation/virtualization/vzlinuxbootloader [2] https://lore.kernel.org/linux-efi/CAG8fp8Teu4G9JuenQrqGndFt2Gy+V4YgJ=hN1xX7AD940YKf3A@mail.gmail.com/ Fixes: e346bebbd36b1576 ("efi: libstub: Always enable initrd command ...") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217485 Signed-off-by: Akihiro Suda [ardb: rewrite comment and commit log] Signed-off-by: Ard Biesheuvel --- include/linux/pe.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/include/linux/pe.h b/include/linux/pe.h index 5e1e11540870..fdf9c95709ba 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -11,25 +11,26 @@ #include /* - * Linux EFI stub v1.0 adds the following functionality: - * - Loading initrd from the LINUX_EFI_INITRD_MEDIA_GUID device path, - * - Loading/starting the kernel from firmware that targets a different - * machine type, via the entrypoint exposed in the .compat PE/COFF section. + * Starting from version v3.0, the major version field should be interpreted as + * a bit mask of features supported by the kernel's EFI stub: + * - 0x1: initrd loading from the LINUX_EFI_INITRD_MEDIA_GUID device path, + * - 0x2: initrd loading using the initrd= command line option, where the file + * may be specified using device path notation, and is not required to + * reside on the same volume as the loaded kernel image. * * The recommended way of loading and starting v1.0 or later kernels is to use * the LoadImage() and StartImage() EFI boot services, and expose the initrd * via the LINUX_EFI_INITRD_MEDIA_GUID device path. * - * Versions older than v1.0 support initrd loading via the image load options - * (using initrd=, limited to the volume from which the kernel itself was - * loaded), or via arch specific means (bootparams, DT, etc). + * Versions older than v1.0 may support initrd loading via the image load + * options (using initrd=, limited to the volume from which the kernel itself + * was loaded), or only via arch specific means (bootparams, DT, etc). * - * On x86, LoadImage() and StartImage() can be omitted if the EFI handover - * protocol is implemented, which can be inferred from the version, - * handover_offset and xloadflags fields in the bootparams structure. + * The minor version field must remain 0x0. + * (https://lore.kernel.org/all/efd6f2d4-547c-1378-1faa-53c044dbd297@gmail.com/) */ -#define LINUX_EFISTUB_MAJOR_VERSION 0x1 -#define LINUX_EFISTUB_MINOR_VERSION 0x1 +#define LINUX_EFISTUB_MAJOR_VERSION 0x3 +#define LINUX_EFISTUB_MINOR_VERSION 0x0 /* * LINUX_PE_MAGIC appears at offset 0x38 into the MS-DOS header of EFI bootable From b6d572aeb58a5e0be86bd51ea514c4feba996cc4 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 24 May 2023 10:41:57 +0300 Subject: [PATCH 203/934] thunderbolt: Increase DisplayPort Connection Manager handshake timeout It turns out that when plugging in VGA cable through USB-C to VGA/DVI dongle the Connection Manager handshake can take longer time, at least on Intel Titan Ridge based docks such as Dell WD91TB. This leads to following error in the dmesg: thunderbolt 0000:00:0d.3: 3:10: DP tunnel activation failed, aborting and the display stays blank (because we failed to establish the tunnel). For this reason increase the timeout to 3s. Reported-by: Koba Ko Cc: stable@vger.kernel.org Acked-By: Yehezkel Bernat Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 9099ae73e78f..4f222673d651 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -526,7 +526,7 @@ static int tb_dp_xchg_caps(struct tb_tunnel *tunnel) * Perform connection manager handshake between IN and OUT ports * before capabilities exchange can take place. */ - ret = tb_dp_cm_handshake(in, out, 1500); + ret = tb_dp_cm_handshake(in, out, 3000); if (ret) return ret; From 3fe95742af29b8b4eccab2ba94bc521805c6e10c Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 24 May 2023 13:47:04 +0300 Subject: [PATCH 204/934] thunderbolt: Do not touch CL state configuration during discovery If the boot firmware has already established tunnels, especially ones that have special requirements from the link such as DisplayPort, we should not blindly enable CL states (nor change the TMU configuration). Otherwise the existing tunnels may not work as expected. For this reason, skip the CL state enabling when we go over the existing topology. This will also keep the TMU settings untouched because we do not change the TMU configuration when CL states are not enabled. Reported-by: Koba Ko Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7831 Cc: stable@vger.kernel.org # v6.0+ Acked-By: Yehezkel Bernat Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 7bfbc9ca9ba4..c1af712ca728 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -737,6 +737,7 @@ static void tb_scan_port(struct tb_port *port) { struct tb_cm *tcm = tb_priv(port->sw->tb); struct tb_port *upstream_port; + bool discovery = false; struct tb_switch *sw; int ret; @@ -804,8 +805,10 @@ static void tb_scan_port(struct tb_port *port) * tunnels and know which switches were authorized already by * the boot firmware. */ - if (!tcm->hotplug_active) + if (!tcm->hotplug_active) { dev_set_uevent_suppress(&sw->dev, true); + discovery = true; + } /* * At the moment Thunderbolt 2 and beyond (devices with LC) we @@ -835,10 +838,14 @@ static void tb_scan_port(struct tb_port *port) * CL0s and CL1 are enabled and supported together. * Silently ignore CLx enabling in case CLx is not supported. */ - ret = tb_switch_enable_clx(sw, TB_CL1); - if (ret && ret != -EOPNOTSUPP) - tb_sw_warn(sw, "failed to enable %s on upstream port\n", - tb_switch_clx_name(TB_CL1)); + if (discovery) { + tb_sw_dbg(sw, "discovery, not touching CL states\n"); + } else { + ret = tb_switch_enable_clx(sw, TB_CL1); + if (ret && ret != -EOPNOTSUPP) + tb_sw_warn(sw, "failed to enable %s on upstream port\n", + tb_switch_clx_name(TB_CL1)); + } if (tb_switch_is_clx_enabled(sw, TB_CL1)) /* From ed309ce522185583b163bd0c74f0d9f299fe1826 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 26 May 2023 11:59:08 +0100 Subject: [PATCH 205/934] RISC-V: mark hibernation as nonportable Hibernation support depends on firmware marking its reserved/PMP protected regions as not accessible from Linux. The latest versions of the de-facto SBI implementation (OpenSBI) do not do this, having dropped the no-map property to enable 1 GiB huge page mappings by the kernel. This was exposed by commit 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping"), which made the first 2 MiB of DRAM (where SBI typically resides) accessible by the kernel. Attempting to hibernate with either OpenSBI, or other implementations following its lead, will lead to a kernel panic ([1], [2]) as the hibernation process will attempt to save/restore any mapped regions, including the PMP protected regions in use by the SBI implementation. Mark hibernation as depending on "NONPORTABLE", as only a small subset of systems are capable of supporting it, until such time that an SBI implementation independent way to communicate what regions are in use has been agreed on. As hibernation support landed in v6.4-rc1, disabling it for most platforms does not constitute a regression. The alternative would have been reverting commit 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping"). Doing so would permit hibernation on platforms with these SBI implementations, but would limit the options we have to solve the protection of the region without causing a regression in hibernation support. Reported-by: Song Shuai Link: https://lore.kernel.org/all/CAAYs2=gQvkhTeioMmqRDVGjdtNF_vhB+vm_1dHJxPNi75YDQ_Q@mail.gmail.com/ [1] Reported-by: JeeHeng Sia Link: https://groups.google.com/a/groups.riscv.org/g/sw-dev/c/ITXwaKfA6z8 [2] Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20230526-astride-detonator-9ae120051159@wendy Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 348c0fa1fc8c..2bb0c38419ff 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -799,8 +799,11 @@ menu "Power management options" source "kernel/power/Kconfig" +# Hibernation is only possible on systems where the SBI implementation has +# marked its reserved memory as not accessible from, or does not run +# from the same memory as, Linux config ARCH_HIBERNATION_POSSIBLE - def_bool y + def_bool NONPORTABLE config ARCH_HIBERNATION_HEADER def_bool HIBERNATION From a6e766dea0a22918735176e4af862d535962f11e Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Tue, 23 May 2023 16:25:47 +0100 Subject: [PATCH 206/934] misc: fastrpc: Pass proper scm arguments for secure map request If a map request is made with securemap attribute, the memory ownership needs to be reassigned to new VMID to allow access from protection domain. Currently only DSP VMID is passed to the reassign call which is incorrect as only a combination of HLOS and DSP VMID is allowed for memory ownership reassignment and passing only DSP VMID will cause assign call failure. Also pass proper restoring permissions to HLOS as the source permission will now carry both HLOS and DSP VMID permission. Change is also made to get valid physical address from scatter/gather for this allocation request. Fixes: e90d91190619 ("misc: fastrpc: Add support to secure memory map") Cc: stable Tested-by: Ekansh Gupta Signed-off-by: Ekansh Gupta Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230523152550.438363-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index f48466960f1b..32a5415624bf 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -316,12 +316,14 @@ static void fastrpc_free_map(struct kref *ref) if (map->table) { if (map->attr & FASTRPC_ATTR_SECUREMAP) { struct qcom_scm_vmperm perm; + int vmid = map->fl->cctx->vmperms[0].vmid; + u64 src_perms = BIT(QCOM_SCM_VMID_HLOS) | BIT(vmid); int err = 0; perm.vmid = QCOM_SCM_VMID_HLOS; perm.perm = QCOM_SCM_PERM_RWX; err = qcom_scm_assign_mem(map->phys, map->size, - &map->fl->cctx->perms, &perm, 1); + &src_perms, &perm, 1); if (err) { dev_err(map->fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d", map->phys, map->size, err); @@ -787,8 +789,12 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd, goto map_err; } - map->phys = sg_dma_address(map->table->sgl); - map->phys += ((u64)fl->sctx->sid << 32); + if (attr & FASTRPC_ATTR_SECUREMAP) { + map->phys = sg_phys(map->table->sgl); + } else { + map->phys = sg_dma_address(map->table->sgl); + map->phys += ((u64)fl->sctx->sid << 32); + } map->size = len; map->va = sg_virt(map->table->sgl); map->len = len; @@ -798,9 +804,15 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd, * If subsystem VMIDs are defined in DTSI, then do * hyp_assign from HLOS to those VM(s) */ + u64 src_perms = BIT(QCOM_SCM_VMID_HLOS); + struct qcom_scm_vmperm dst_perms[2] = {0}; + + dst_perms[0].vmid = QCOM_SCM_VMID_HLOS; + dst_perms[0].perm = QCOM_SCM_PERM_RW; + dst_perms[1].vmid = fl->cctx->vmperms[0].vmid; + dst_perms[1].perm = QCOM_SCM_PERM_RWX; map->attr = attr; - err = qcom_scm_assign_mem(map->phys, (u64)map->size, &fl->cctx->perms, - fl->cctx->vmperms, fl->cctx->vmcount); + err = qcom_scm_assign_mem(map->phys, (u64)map->size, &src_perms, dst_perms, 2); if (err) { dev_err(sess->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d", map->phys, map->size, err); From 3c7d0079a1831118ef232bd9c2f34d058a1f31c2 Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Tue, 23 May 2023 16:25:48 +0100 Subject: [PATCH 207/934] misc: fastrpc: Reassign memory ownership only for remote heap The userspace map request for remote heap allocates CMA memory. The ownership of this memory needs to be reassigned to proper owners to allow access from the protection domain running on DSP. This reassigning of ownership is not correct if done for any other supported flags. When any other flag is requested from userspace, fastrpc is trying to reassign the ownership of memory and this reassignment is getting skipped for remote heap request which is incorrect. Add proper flag check to reassign the memory only if remote heap is requested. Fixes: 532ad70c6d44 ("misc: fastrpc: Add mmap request assigning for static PD pool") Cc: stable Tested-by: Ekansh Gupta Signed-off-by: Ekansh Gupta Reviewed-by: Srinivas Kandagatla Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230523152550.438363-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 32a5415624bf..a654dc416480 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1904,7 +1904,7 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) req.vaddrout = rsp_msg.vaddr; /* Add memory to static PD pool, protection thru hypervisor */ - if (req.flags != ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) { + if (req.flags == ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) { struct qcom_scm_vmperm perm; perm.vmid = QCOM_SCM_VMID_HLOS; From b6a062853ddf6b4f653af2d8b75ba45bb9a036ad Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Tue, 23 May 2023 16:25:49 +0100 Subject: [PATCH 208/934] misc: fastrpc: return -EPIPE to invocations on device removal The return value is initialized as -1, or -EPERM. The completion of an invocation implies that the return value is set appropriately, but "Permission denied" does not accurately describe the outcome of the invocation. Set the invocation's return value to a more appropriate "Broken pipe", as the cleanup breaks the driver's connection with rpmsg. Fixes: c68cfb718c8f ("misc: fastrpc: Add support for context Invoke method") Cc: stable Signed-off-by: Richard Acayan Reviewed-by: Srinivas Kandagatla Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230523152550.438363-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index a654dc416480..964f67dad2f9 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -2349,8 +2349,10 @@ static void fastrpc_notify_users(struct fastrpc_user *user) struct fastrpc_invoke_ctx *ctx; spin_lock(&user->lock); - list_for_each_entry(ctx, &user->pending, node) + list_for_each_entry(ctx, &user->pending, node) { + ctx->retval = -EPIPE; complete(&ctx->work); + } spin_unlock(&user->lock); } From 46248400d81e2aa0b65cd659d6f40188192a58b6 Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Tue, 23 May 2023 16:25:50 +0100 Subject: [PATCH 209/934] misc: fastrpc: reject new invocations during device removal The channel's rpmsg object allows new invocations to be made. After old invocations are already interrupted, the driver shouldn't try to invoke anymore. Invalidating the rpmsg at the end of the driver removal function makes it easy to cause a race condition in userspace. Even closing a file descriptor before the driver finishes its cleanup can cause an invocation via fastrpc_release_current_dsp_process() and subsequent timeout. Invalidate the channel before the invocations are interrupted to make sure that no invocations can be created to hang after the device closes. Fixes: c68cfb718c8f ("misc: fastrpc: Add support for context Invoke method") Cc: stable Signed-off-by: Richard Acayan Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230523152550.438363-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 964f67dad2f9..30d4d0476248 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -2363,7 +2363,9 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev) struct fastrpc_user *user; unsigned long flags; + /* No invocations past this point */ spin_lock_irqsave(&cctx->lock, flags); + cctx->rpdev = NULL; list_for_each_entry(user, &cctx->users, user) fastrpc_notify_users(user); spin_unlock_irqrestore(&cctx->lock, flags); @@ -2382,7 +2384,6 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev) of_platform_depopulate(&rpdev->dev); - cctx->rpdev = NULL; fastrpc_channel_ctx_put(cctx); } From dbe678f6192f27879ac9ff6bc7a1036aad85aae9 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Thu, 18 May 2023 11:49:45 -0400 Subject: [PATCH 210/934] usb: cdns3: fix NCM gadget RX speed 20x slow than expection at iMX8QM At iMX8QM platform, enable NCM gadget and run 'iperf3 -s'. At host, run 'iperf3 -V -c fe80::6863:98ff:feef:3e0%enxc6e147509498' [ 5] 0.00-1.00 sec 1.55 MBytes 13.0 Mbits/sec 90 4.18 KBytes [ 5] 1.00-2.00 sec 1.44 MBytes 12.0 Mbits/sec 75 4.18 KBytes [ 5] 2.00-3.00 sec 1.48 MBytes 12.4 Mbits/sec 75 4.18 KBytes Expected speed should be bigger than 300Mbits/sec. The root cause of this performance drop was found to be data corruption happening at 4K borders in some Ethernet packets, leading to TCP checksum errors. This corruption occurs from the position (4K - (address & 0x7F)) to 4K. The u_ether function's allocation of skb_buff reserves 64B, meaning all RX addresses resemble 0xXXXX0040. Force trb_burst_size to 16 can fix this problem. Cc: stable@vger.kernel.org Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20230518154946.3666662-1-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index ccfaebca6faa..1dcadef933e3 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -2097,6 +2097,19 @@ int cdns3_ep_config(struct cdns3_endpoint *priv_ep, bool enable) else priv_ep->trb_burst_size = 16; + /* + * In versions preceding DEV_VER_V2, for example, iMX8QM, there exit the bugs + * in the DMA. These bugs occur when the trb_burst_size exceeds 16 and the + * address is not aligned to 128 Bytes (which is a product of the 64-bit AXI + * and AXI maximum burst length of 16 or 0xF+1, dma_axi_ctrl0[3:0]). This + * results in data corruption when it crosses the 4K border. The corruption + * specifically occurs from the position (4K - (address & 0x7F)) to 4K. + * + * So force trb_burst_size to 16 at such platform. + */ + if (priv_dev->dev_ver < DEV_VER_V2) + priv_ep->trb_burst_size = 16; + mult = min_t(u8, mult, EP_CFG_MULT_MAX); buffering = min_t(u8, buffering, EP_CFG_BUFFERING_MAX); maxburst = min_t(u8, maxburst, EP_CFG_MAXBURST_MAX); From efb6b535207395a5c7317993602e2503ca8cb4b3 Mon Sep 17 00:00:00 2001 From: Uttkarsh Aggarwal Date: Thu, 25 May 2023 14:58:54 +0530 Subject: [PATCH 211/934] usb: gadget: f_fs: Add unbind event before functionfs_unbind While exercising the unbind path, with the current implementation the functionfs_unbind would be calling which waits for the ffs->mutex to be available, however within the same time ffs_ep0_read is invoked & if no setup packets are pending, it will invoke function wait_event_interruptible_exclusive_locked_irq which by definition waits for the ev.count to be increased inside the same mutex for which functionfs_unbind is waiting. This creates deadlock situation because the functionfs_unbind won't get the lock until ev.count is increased which can only happen if the caller ffs_func_unbind can proceed further. Following is the illustration: CPU1 CPU2 ffs_func_unbind() ffs_ep0_read() mutex_lock(ffs->mutex) wait_event(ffs->ev.count) functionfs_unbind() mutex_lock(ffs->mutex) mutex_unlock(ffs->mutex) ffs_event_add() Fix this by moving the event unbind before functionfs_unbind to ensure the ev.count is incrased properly. Fixes: 6a19da111057 ("usb: gadget: f_fs: Prevent race during ffs_ep0_queue_wait") Cc: stable Signed-off-by: Uttkarsh Aggarwal Link: https://lore.kernel.org/r/20230525092854.7992-1-quic_uaggarwa@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index a13c946e0663..f41a385a5c42 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3535,6 +3535,7 @@ static void ffs_func_unbind(struct usb_configuration *c, /* Drain any pending AIO completions */ drain_workqueue(ffs->io_completion_wq); + ffs_event_add(ffs, FUNCTIONFS_UNBIND); if (!--opts->refcnt) functionfs_unbind(ffs); @@ -3559,7 +3560,6 @@ static void ffs_func_unbind(struct usb_configuration *c, func->function.ssp_descriptors = NULL; func->interfaces_nums = NULL; - ffs_event_add(ffs, FUNCTIONFS_UNBIND); } static struct usb_function *ffs_alloc(struct usb_function_instance *fi) From 016da9c65fec9f0e78c4909ed9a0f2d567af6775 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 25 May 2023 18:38:37 +0300 Subject: [PATCH 212/934] usb: gadget: udc: fix NULL dereference in remove() The "udc" pointer was never set in the probe() function so it will lead to a NULL dereference in udc_pci_remove() when we do: usb_del_gadget_udc(&udc->gadget); Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/ZG+A/dNpFWAlCChk@kili Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/amd5536udc_pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/udc/amd5536udc_pci.c b/drivers/usb/gadget/udc/amd5536udc_pci.c index c80f9bd51b75..a36913ae31f9 100644 --- a/drivers/usb/gadget/udc/amd5536udc_pci.c +++ b/drivers/usb/gadget/udc/amd5536udc_pci.c @@ -170,6 +170,9 @@ static int udc_pci_probe( retval = -ENODEV; goto err_probe; } + + udc = dev; + return 0; err_probe: From 47fe1c3064c6bc1bfa3c032ff78e603e5dd6e5bc Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 29 May 2023 16:32:37 +0900 Subject: [PATCH 213/934] block: fix revalidate performance regression The scsi driver function sd_read_block_characteristics() always calls disk_set_zoned() to a disk zoned model correctly, in case the device model changed. This is done even for regular disks to set the zoned model to BLK_ZONED_NONE and free any zone related resources if the drive previously was zoned. This behavior significantly impact the time it takes to revalidate disks on a large system as the call to disk_clear_zone_settings() done from disk_set_zoned() for the BLK_ZONED_NONE case results in the device request queued to be frozen, even if there are no zone resources to free. Avoid this overhead for non-zoned devices by not calling disk_clear_zone_settings() in disk_set_zoned() if the device model was already set to BLK_ZONED_NONE, which is always the case for regular devices. Reported by: Brian Bunker Fixes: 508aebb80527 ("block: introduce blk_queue_clear_zone_settings()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230529073237.1339862-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-settings.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 896b4654ab00..4dd59059b788 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -915,6 +915,7 @@ static bool disk_has_partitions(struct gendisk *disk) void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model) { struct request_queue *q = disk->queue; + unsigned int old_model = q->limits.zoned; switch (model) { case BLK_ZONED_HM: @@ -952,7 +953,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model) */ blk_queue_zone_write_granularity(q, queue_logical_block_size(q)); - } else { + } else if (old_model != BLK_ZONED_NONE) { disk_clear_zone_settings(disk); } } From 7b32040f6d7f885ffc09a6df7c17992d56d2eab8 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 15 May 2023 19:24:56 +0200 Subject: [PATCH 214/934] dt-bindings: usb: snps,dwc3: Fix "snps,hsphy_interface" type The "snps,hsphy_interface" is string, not u8. Fix the type. Fixes: 389d77658801 ("dt-bindings: usb: Convert DWC USB3 bindings to DT schema") Cc: stable Reviewed-by: Krzysztof Kozlowski Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20230515172456.179049-1-marex@denx.de Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/snps,dwc3.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/snps,dwc3.yaml b/Documentation/devicetree/bindings/usb/snps,dwc3.yaml index 50edc4da780e..4f7625955ccc 100644 --- a/Documentation/devicetree/bindings/usb/snps,dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/snps,dwc3.yaml @@ -287,7 +287,7 @@ properties: description: High-Speed PHY interface selection between UTMI+ and ULPI when the DWC_USB3_HSPHY_INTERFACE has value 3. - $ref: /schemas/types.yaml#/definitions/uint8 + $ref: /schemas/types.yaml#/definitions/string enum: [utmi, ulpi] snps,quirk-frame-length-adjustment: From 0143d148d1e882fb1538dc9974c94d63961719b9 Mon Sep 17 00:00:00 2001 From: Ruihan Li Date: Mon, 15 May 2023 21:09:55 +0800 Subject: [PATCH 215/934] usb: usbfs: Enforce page requirements for mmap The current implementation of usbdev_mmap uses usb_alloc_coherent to allocate memory pages that will later be mapped into the user space. Meanwhile, usb_alloc_coherent employs three different methods to allocate memory, as outlined below: * If hcd->localmem_pool is non-null, it uses gen_pool_dma_alloc to allocate memory; * If DMA is not available, it uses kmalloc to allocate memory; * Otherwise, it uses dma_alloc_coherent. However, it should be noted that gen_pool_dma_alloc does not guarantee that the resulting memory will be page-aligned. Furthermore, trying to map slab pages (i.e., memory allocated by kmalloc) into the user space is not resonable and can lead to problems, such as a type confusion bug when PAGE_TABLE_CHECK=y [1]. To address these issues, this patch introduces hcd_alloc_coherent_pages, which addresses the above two problems. Specifically, hcd_alloc_coherent_pages uses gen_pool_dma_alloc_align instead of gen_pool_dma_alloc to ensure that the memory is page-aligned. To replace kmalloc, hcd_alloc_coherent_pages directly allocates pages by calling __get_free_pages. Reported-by: syzbot+fcf1a817ceb50935ce99@syzkaller.appspotmail.comm Closes: https://lore.kernel.org/lkml/000000000000258e5e05fae79fc1@google.com/ [1] Fixes: f7d34b445abc ("USB: Add support for usbfs zerocopy.") Fixes: ff2437befd8f ("usb: host: Fix excessive alignment restriction for local memory allocations") Cc: stable@vger.kernel.org Signed-off-by: Ruihan Li Acked-by: Alan Stern Link: https://lore.kernel.org/r/20230515130958.32471-2-lrh2000@pku.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/buffer.c | 41 +++++++++++++++++++++++++++++++++++++++ drivers/usb/core/devio.c | 9 +++++---- include/linux/usb/hcd.h | 5 +++++ 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index fbb087b728dc..268ccbec88f9 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -172,3 +172,44 @@ void hcd_buffer_free( } dma_free_coherent(hcd->self.sysdev, size, addr, dma); } + +void *hcd_buffer_alloc_pages(struct usb_hcd *hcd, + size_t size, gfp_t mem_flags, dma_addr_t *dma) +{ + if (size == 0) + return NULL; + + if (hcd->localmem_pool) + return gen_pool_dma_alloc_align(hcd->localmem_pool, + size, dma, PAGE_SIZE); + + /* some USB hosts just use PIO */ + if (!hcd_uses_dma(hcd)) { + *dma = DMA_MAPPING_ERROR; + return (void *)__get_free_pages(mem_flags, + get_order(size)); + } + + return dma_alloc_coherent(hcd->self.sysdev, + size, dma, mem_flags); +} + +void hcd_buffer_free_pages(struct usb_hcd *hcd, + size_t size, void *addr, dma_addr_t dma) +{ + if (!addr) + return; + + if (hcd->localmem_pool) { + gen_pool_free(hcd->localmem_pool, + (unsigned long)addr, size); + return; + } + + if (!hcd_uses_dma(hcd)) { + free_pages((unsigned long)addr, get_order(size)); + return; + } + + dma_free_coherent(hcd->self.sysdev, size, addr, dma); +} diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index e501a03d6c70..3936ca7f7d2f 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -186,6 +186,7 @@ static int connected(struct usb_dev_state *ps) static void dec_usb_memory_use_count(struct usb_memory *usbm, int *count) { struct usb_dev_state *ps = usbm->ps; + struct usb_hcd *hcd = bus_to_hcd(ps->dev->bus); unsigned long flags; spin_lock_irqsave(&ps->lock, flags); @@ -194,8 +195,8 @@ static void dec_usb_memory_use_count(struct usb_memory *usbm, int *count) list_del(&usbm->memlist); spin_unlock_irqrestore(&ps->lock, flags); - usb_free_coherent(ps->dev, usbm->size, usbm->mem, - usbm->dma_handle); + hcd_buffer_free_pages(hcd, usbm->size, + usbm->mem, usbm->dma_handle); usbfs_decrease_memory_usage( usbm->size + sizeof(struct usb_memory)); kfree(usbm); @@ -247,8 +248,8 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma) goto error_decrease_mem; } - mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN, - &dma_handle); + mem = hcd_buffer_alloc_pages(hcd, + size, GFP_USER | __GFP_NOWARN, &dma_handle); if (!mem) { ret = -ENOMEM; goto error_free_usbm; diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 094c77eaf455..0c7eff91adf4 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -501,6 +501,11 @@ void *hcd_buffer_alloc(struct usb_bus *bus, size_t size, void hcd_buffer_free(struct usb_bus *bus, size_t size, void *addr, dma_addr_t dma); +void *hcd_buffer_alloc_pages(struct usb_hcd *hcd, + size_t size, gfp_t mem_flags, dma_addr_t *dma); +void hcd_buffer_free_pages(struct usb_hcd *hcd, + size_t size, void *addr, dma_addr_t dma); + /* generic bus glue, needed for host controllers that don't use PCI */ extern irqreturn_t usb_hcd_irq(int irq, void *__hcd); From d0b861653f8c16839c3035875b556afc4472f941 Mon Sep 17 00:00:00 2001 From: Ruihan Li Date: Mon, 15 May 2023 21:09:56 +0800 Subject: [PATCH 216/934] usb: usbfs: Use consistent mmap functions When hcd->localmem_pool is non-null, localmem_pool is used to allocate DMA memory. In this case, the dma address will be properly returned (in dma_handle), and dma_mmap_coherent should be used to map this memory into the user space. However, the current implementation uses pfn_remap_range, which is supposed to map normal pages. Instead of repeating the logic in the memory allocation function, this patch introduces a more robust solution. Here, the type of allocated memory is checked by testing whether dma_handle is properly set. If dma_handle is properly returned, it means some DMA pages are allocated and dma_mmap_coherent should be used to map them. Otherwise, normal pages are allocated and pfn_remap_range should be called. This ensures that the correct mmap functions are used consistently, independently with logic details that determine which type of memory gets allocated. Fixes: a0e710a7def4 ("USB: usbfs: fix mmap dma mismatch") Cc: stable@vger.kernel.org Signed-off-by: Ruihan Li Link: https://lore.kernel.org/r/20230515130958.32471-3-lrh2000@pku.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 3936ca7f7d2f..fcf68818e999 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -235,7 +235,7 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma) size_t size = vma->vm_end - vma->vm_start; void *mem; unsigned long flags; - dma_addr_t dma_handle; + dma_addr_t dma_handle = DMA_MAPPING_ERROR; int ret; ret = usbfs_increase_memory_usage(size + sizeof(struct usb_memory)); @@ -265,7 +265,14 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma) usbm->vma_use_count = 1; INIT_LIST_HEAD(&usbm->memlist); - if (hcd->localmem_pool || !hcd_uses_dma(hcd)) { + /* + * In DMA-unavailable cases, hcd_buffer_alloc_pages allocates + * normal pages and assigns DMA_MAPPING_ERROR to dma_handle. Check + * whether we are in such cases, and then use remap_pfn_range (or + * dma_mmap_coherent) to map normal (or DMA) pages into the user + * space, respectively. + */ + if (dma_handle == DMA_MAPPING_ERROR) { if (remap_pfn_range(vma, vma->vm_start, virt_to_phys(usbm->mem) >> PAGE_SHIFT, size, vma->vm_page_prot) < 0) { From 81a31a860bb61d54eb688af2568d9332ed9b8942 Mon Sep 17 00:00:00 2001 From: Ruihan Li Date: Mon, 15 May 2023 21:09:57 +0800 Subject: [PATCH 217/934] mm: page_table_check: Make it dependent on EXCLUSIVE_SYSTEM_RAM Without EXCLUSIVE_SYSTEM_RAM, users are allowed to map arbitrary physical memory regions into the userspace via /dev/mem. At the same time, pages may change their properties (e.g., from anonymous pages to named pages) while they are still being mapped in the userspace, leading to "corruption" detected by the page table check. To avoid these false positives, this patch makes PAGE_TABLE_CHECK depends on EXCLUSIVE_SYSTEM_RAM. This dependency is understandable because PAGE_TABLE_CHECK is a hardening technique but /dev/mem without STRICT_DEVMEM (i.e., !EXCLUSIVE_SYSTEM_RAM) is itself a security problem. Even with EXCLUSIVE_SYSTEM_RAM, I/O pages may be still allowed to be mapped via /dev/mem. However, these pages are always considered as named pages, so they won't break the logic used in the page table check. Cc: # 5.17 Signed-off-by: Ruihan Li Acked-by: David Hildenbrand Acked-by: Pasha Tatashin Link: https://lore.kernel.org/r/20230515130958.32471-4-lrh2000@pku.edu.cn Signed-off-by: Greg Kroah-Hartman --- Documentation/mm/page_table_check.rst | 19 +++++++++++++++++++ mm/Kconfig.debug | 1 + 2 files changed, 20 insertions(+) diff --git a/Documentation/mm/page_table_check.rst b/Documentation/mm/page_table_check.rst index cfd8f4117cf3..c12838ce6b8d 100644 --- a/Documentation/mm/page_table_check.rst +++ b/Documentation/mm/page_table_check.rst @@ -52,3 +52,22 @@ Build kernel with: Optionally, build kernel with PAGE_TABLE_CHECK_ENFORCED in order to have page table support without extra kernel parameter. + +Implementation notes +==================== + +We specifically decided not to use VMA information in order to avoid relying on +MM states (except for limited "struct page" info). The page table check is a +separate from Linux-MM state machine that verifies that the user accessible +pages are not falsely shared. + +PAGE_TABLE_CHECK depends on EXCLUSIVE_SYSTEM_RAM. The reason is that without +EXCLUSIVE_SYSTEM_RAM, users are allowed to map arbitrary physical memory +regions into the userspace via /dev/mem. At the same time, pages may change +their properties (e.g., from anonymous pages to named pages) while they are +still being mapped in the userspace, leading to "corruption" detected by the +page table check. + +Even with EXCLUSIVE_SYSTEM_RAM, I/O pages may be still allowed to be mapped via +/dev/mem. However, these pages are always considered as named pages, so they +won't break the logic used in the page table check. diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index a925415b4d10..018a5bd2f576 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -98,6 +98,7 @@ config PAGE_OWNER config PAGE_TABLE_CHECK bool "Check for invalid mappings in user page tables" depends on ARCH_SUPPORTS_PAGE_TABLE_CHECK + depends on EXCLUSIVE_SYSTEM_RAM select PAGE_EXTENSION help Check that anonymous page is not being mapped twice with read write From 44d0fb387b53e56c8a050bac5c7d460e21eb226f Mon Sep 17 00:00:00 2001 From: Ruihan Li Date: Mon, 15 May 2023 21:09:58 +0800 Subject: [PATCH 218/934] mm: page_table_check: Ensure user pages are not slab pages The current uses of PageAnon in page table check functions can lead to type confusion bugs between struct page and slab [1], if slab pages are accidentally mapped into the user space. This is because slab reuses the bits in struct page to store its internal states, which renders PageAnon ineffective on slab pages. Since slab pages are not expected to be mapped into the user space, this patch adds BUG_ON(PageSlab(page)) checks to make sure that slab pages are not inadvertently mapped. Otherwise, there must be some bugs in the kernel. Reported-by: syzbot+fcf1a817ceb50935ce99@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/000000000000258e5e05fae79fc1@google.com/ [1] Fixes: df4e817b7108 ("mm: page table check") Cc: # 5.17 Signed-off-by: Ruihan Li Acked-by: Pasha Tatashin Link: https://lore.kernel.org/r/20230515130958.32471-5-lrh2000@pku.edu.cn Signed-off-by: Greg Kroah-Hartman --- include/linux/page-flags.h | 6 ++++++ mm/page_table_check.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1c68d67b832f..92a2063a0a23 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -617,6 +617,12 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) * Please note that, confusingly, "page_mapping" refers to the inode * address_space which maps the page from disk; whereas "page_mapped" * refers to user virtual address space into which the page is mapped. + * + * For slab pages, since slab reuses the bits in struct page to store its + * internal states, the page->mapping does not exist as such, nor do these + * flags below. So in order to avoid testing non-existent bits, please + * make sure that PageSlab(page) actually evaluates to false before calling + * the following functions (e.g., PageAnon). See mm/slab.h. */ #define PAGE_MAPPING_ANON 0x1 #define PAGE_MAPPING_MOVABLE 0x2 diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 25d8610c0042..f2baf97d5f38 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -71,6 +71,8 @@ static void page_table_check_clear(struct mm_struct *mm, unsigned long addr, page = pfn_to_page(pfn); page_ext = page_ext_get(page); + + BUG_ON(PageSlab(page)); anon = PageAnon(page); for (i = 0; i < pgcnt; i++) { @@ -107,6 +109,8 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr, page = pfn_to_page(pfn); page_ext = page_ext_get(page); + + BUG_ON(PageSlab(page)); anon = PageAnon(page); for (i = 0; i < pgcnt; i++) { @@ -133,6 +137,8 @@ void __page_table_check_zero(struct page *page, unsigned int order) struct page_ext *page_ext; unsigned long i; + BUG_ON(PageSlab(page)); + page_ext = page_ext_get(page); BUG_ON(!page_ext); for (i = 0; i < (1ul << order); i++) { From ffe14de983252862c91ad23bd5ca72fd9398d0e6 Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Thu, 25 May 2023 09:44:44 +0000 Subject: [PATCH 219/934] MAINTAINERS: Update maintainer of Amazon EFA driver Change EFA driver maintainer from Gal Pressman to myself. Keep Gal as a reviewer at his request. Link: https://lore.kernel.org/r/20230525094444.12570-1-mrgolin@amazon.com Signed-off-by: Michael Margolin Acked-by: Gal Pressman Signed-off-by: Jason Gunthorpe --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c56043737848..dc8c4708733b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -956,7 +956,8 @@ F: Documentation/networking/device_drivers/ethernet/amazon/ena.rst F: drivers/net/ethernet/amazon/ AMAZON RDMA EFA DRIVER -M: Gal Pressman +M: Michael Margolin +R: Gal Pressman R: Yossi Leybovich L: linux-rdma@vger.kernel.org S: Supported From c8f304d75f6c6cc679a73f89591f9a915da38f09 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 22 May 2023 10:56:53 -0500 Subject: [PATCH 220/934] RDMA/irdma: Prevent QP use after free There is a window where the poll cq may use a QP that has been freed. This can happen if a CQE is polled before irdma_clean_cqes() can clear the CQE's related to the QP and the destroy QP races to free the QP memory. then the QP structures are used in irdma_poll_cq. Fix this by moving the clearing of CQE's before the reference is removed and the QP is destroyed. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20230522155654.1309-3-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index ab5cdf782785..68ce3cd40002 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -522,11 +522,6 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (!iwqp->user_mode) cancel_delayed_work_sync(&iwqp->dwork_flush); - irdma_qp_rem_ref(&iwqp->ibqp); - wait_for_completion(&iwqp->free_qp); - irdma_free_lsmm_rsrc(iwqp); - irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); - if (!iwqp->user_mode) { if (iwqp->iwscq) { irdma_clean_cqes(iwqp, iwqp->iwscq); @@ -534,6 +529,12 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) irdma_clean_cqes(iwqp, iwqp->iwrcq); } } + + irdma_qp_rem_ref(&iwqp->ibqp); + wait_for_completion(&iwqp->free_qp); + irdma_free_lsmm_rsrc(iwqp); + irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); + irdma_remove_push_mmap_entries(iwqp); irdma_free_qp_rsrc(iwqp); From 5842d1d9c1b0d17e0c29eae65ae1f245f83682dd Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Mon, 22 May 2023 10:56:54 -0500 Subject: [PATCH 221/934] RDMA/irdma: Fix Local Invalidate fencing If the local invalidate fence is indicated in the WR, only the read fence is currently being set in WQE. Fix this to set both the read and local fence in the WQE. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20230522155654.1309-4-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 68ce3cd40002..eaa12c124598 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3292,6 +3292,7 @@ static int irdma_post_send(struct ib_qp *ibqp, break; case IB_WR_LOCAL_INV: info.op_type = IRDMA_OP_TYPE_INV_STAG; + info.local_fence = info.read_fence; info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; err = irdma_uk_stag_local_invalidate(ukqp, &info, true); break; From 7f875850f20a42f488840c9df7af91ef7db2d576 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 22 May 2023 20:09:57 +0900 Subject: [PATCH 222/934] ata: libata-scsi: Use correct device no in ata_find_dev() For devices not attached to a port multiplier and managed directly by libata, the device number passed to ata_find_dev() must always be lower than the maximum number of devices returned by ata_link_max_devices(). That is 1 for SATA devices or 2 for an IDE link with master+slave devices. This device number is the SCSI device ID which matches these constraints as the IDs are generated per port and so never exceed the maximum number of devices for the link being used. However, for libsas managed devices, SCSI device IDs are assigned per struct scsi_host, leading to device IDs for SATA devices that can be well in excess of libata per-link maximum number of devices. This results in ata_find_dev() to always return NULL for libsas managed devices except for the first device of the target scsi_host with ID (device number) equal to 0. This issue is visible by executing the hdparm utility, which fails. E.g.: hdparm -i /dev/sdX /dev/sdX: HDIO_GET_IDENTITY failed: No message of desired type Fix this by rewriting ata_find_dev() to ignore the device number for non-PMP attached devices with a link with at most 1 device, that is SATA devices. For these, the device number 0 is always used to return the correct pointer to the struct ata_device of the port link. This change excludes IDE master/slave setups (maximum number of devices per link is 2) and port-multiplier attached devices. Also, to be consistant with the fact that SCSI device IDs and channel numbers used as device numbers are both unsigned int, change the devno argument of ata_find_dev() to unsigned int. Reported-by: Xingui Yang Fixes: 41bda9c98035 ("libata-link: update hotplug to handle PMP links") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Jason Yan --- drivers/ata/libata-scsi.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7bb12deab70c..8ce90284eb34 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2694,18 +2694,36 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) return 0; } -static struct ata_device *ata_find_dev(struct ata_port *ap, int devno) +static struct ata_device *ata_find_dev(struct ata_port *ap, unsigned int devno) { - if (!sata_pmp_attached(ap)) { - if (likely(devno >= 0 && - devno < ata_link_max_devices(&ap->link))) + /* + * For the non-PMP case, ata_link_max_devices() returns 1 (SATA case), + * or 2 (IDE master + slave case). However, the former case includes + * libsas hosted devices which are numbered per scsi host, leading + * to devno potentially being larger than 0 but with each struct + * ata_device having its own struct ata_port and struct ata_link. + * To accommodate these, ignore devno and always use device number 0. + */ + if (likely(!sata_pmp_attached(ap))) { + int link_max_devices = ata_link_max_devices(&ap->link); + + if (link_max_devices == 1) + return &ap->link.device[0]; + + if (devno < link_max_devices) return &ap->link.device[devno]; - } else { - if (likely(devno >= 0 && - devno < ap->nr_pmp_links)) - return &ap->pmp_link[devno].device[0]; + + return NULL; } + /* + * For PMP-attached devices, the device number corresponds to C + * (channel) of SCSI [H:C:I:L], indicating the port pmp link + * for the device. + */ + if (devno < ap->nr_pmp_links) + return &ap->pmp_link[devno].device[0]; + return NULL; } From 36936a56e1814f6c526fe71fbf980beab4f5577a Mon Sep 17 00:00:00 2001 From: Sebastian Krzyszkowiak Date: Fri, 26 May 2023 16:38:11 +0200 Subject: [PATCH 223/934] net: usb: qmi_wwan: Set DTR quirk for BroadMobi BM818 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BM818 is based on Qualcomm MDM9607 chipset. Fixes: 9a07406b00cd ("net: usb: qmi_wwan: Add the BroadMobi BM818 card") Cc: stable@vger.kernel.org Signed-off-by: Sebastian Krzyszkowiak Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20230526-bm818-dtr-v1-1-64bbfa6ba8af@puri.sm Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 571e37e67f9c..f1865d047971 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1325,7 +1325,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2001, 0x7e3d, 4)}, /* D-Link DWM-222 A2 */ {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */ {QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */ - {QMI_FIXED_INTF(0x2020, 0x2060, 4)}, /* BroadMobi BM818 */ + {QMI_QUIRK_SET_DTR(0x2020, 0x2060, 4)}, /* BroadMobi BM818 */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ From 0684f29a89e58944a9bfae3a8b5c1a217e44c960 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 26 May 2023 15:06:53 -0700 Subject: [PATCH 224/934] netlink: specs: correct types of legacy arrays ethtool has some attrs which dump multiple scalars into an attribute. The spec currently expects one attr per entry. Fixes: a353318ebf24 ("tools: ynl: populate most of the ethtool spec") Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230526220653.65538-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 32 ++++++------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 129f413ea349..3abc576ff797 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -60,22 +60,6 @@ attribute-sets: type: nest nested-attributes: bitset-bits - - - name: u64-array - attributes: - - - name: u64 - type: nest - multi-attr: true - nested-attributes: u64 - - - name: s32-array - attributes: - - - name: s32 - type: nest - multi-attr: true - nested-attributes: s32 - name: string attributes: @@ -705,16 +689,16 @@ attribute-sets: type: u8 - name: corrected - type: nest - nested-attributes: u64-array + type: binary + sub-type: u64 - name: uncorr - type: nest - nested-attributes: u64-array + type: binary + sub-type: u64 - name: corr-bits - type: nest - nested-attributes: u64-array + type: binary + sub-type: u64 - name: fec attributes: @@ -827,8 +811,8 @@ attribute-sets: type: u32 - name: index - type: nest - nested-attributes: s32-array + type: binary + sub-type: s32 - name: module attributes: From 6ffc57ea004234d9373c57b204fd10370a69f392 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 May 2023 15:43:42 +0000 Subject: [PATCH 225/934] af_packet: do not use READ_ONCE() in packet_bind() A recent patch added READ_ONCE() in packet_bind() and packet_bind_spkt() This is better handled by reading pkt_sk(sk)->num later in packet_do_bind() while appropriate lock is held. READ_ONCE() in writers are often an evidence of something being wrong. Fixes: 822b5a1c17df ("af_packet: Fix data-races of pkt_sk(sk)->num.") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Jiri Pirko Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230526154342.2533026-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a1f9a0e9f3c8..a2dbeb264f26 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3201,6 +3201,9 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, lock_sock(sk); spin_lock(&po->bind_lock); + if (!proto) + proto = po->num; + rcu_read_lock(); if (po->fanout) { @@ -3299,7 +3302,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); name[sizeof(uaddr->sa_data_min)] = 0; - return packet_do_bind(sk, name, 0, READ_ONCE(pkt_sk(sk)->num)); + return packet_do_bind(sk, name, 0, 0); } static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) @@ -3316,8 +3319,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_family != AF_PACKET) return -EINVAL; - return packet_do_bind(sk, NULL, sll->sll_ifindex, - sll->sll_protocol ? : READ_ONCE(pkt_sk(sk)->num)); + return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol); } static struct proto packet_proto = { From 4faeee0cf8a5d88d63cdbc3bab124fb0e6aed08c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 May 2023 16:34:58 +0000 Subject: [PATCH 226/934] tcp: deny tcp_disconnect() when threads are waiting Historically connect(AF_UNSPEC) has been abused by syzkaller and other fuzzers to trigger various bugs. A recent one triggers a divide-by-zero [1], and Paolo Abeni was able to diagnose the issue. tcp_recvmsg_locked() has tests about sk_state being not TCP_LISTEN and TCP REPAIR mode being not used. Then later if socket lock is released in sk_wait_data(), another thread can call connect(AF_UNSPEC), then make this socket a TCP listener. When recvmsg() is resumed, it can eventually call tcp_cleanup_rbuf() and attempt a divide by 0 in tcp_rcv_space_adjust() [1] This patch adds a new socket field, counting number of threads blocked in sk_wait_event() and inet_wait_for_connect(). If this counter is not zero, tcp_disconnect() returns an error. This patch adds code in blocking socket system calls, thus should not hurt performance of non blocking ones. Note that we probably could revert commit 499350a5a6e7 ("tcp: initialize rcv_mss to TCP_MIN_MSS instead of 0") to restore original tcpi_rcv_mss meaning (was 0 if no payload was ever received on a socket) [1] divide error: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 13832 Comm: syz-executor.5 Not tainted 6.3.0-rc4-syzkaller-00224-g00c7b5f4ddc5 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023 RIP: 0010:tcp_rcv_space_adjust+0x36e/0x9d0 net/ipv4/tcp_input.c:740 Code: 00 00 00 00 fc ff df 4c 89 64 24 48 8b 44 24 04 44 89 f9 41 81 c7 80 03 00 00 c1 e1 04 44 29 f0 48 63 c9 48 01 e9 48 0f af c1 <49> f7 f6 48 8d 04 41 48 89 44 24 40 48 8b 44 24 30 48 c1 e8 03 48 RSP: 0018:ffffc900033af660 EFLAGS: 00010206 RAX: 4a66b76cbade2c48 RBX: ffff888076640cc0 RCX: 00000000c334e4ac RDX: 0000000000000000 RSI: dffffc0000000000 RDI: 0000000000000001 RBP: 00000000c324e86c R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8880766417f8 R13: ffff888028fbb980 R14: 0000000000000000 R15: 0000000000010344 FS: 00007f5bffbfe700(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32f25000 CR3: 000000007ced0000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_recvmsg_locked+0x100e/0x22e0 net/ipv4/tcp.c:2616 tcp_recvmsg+0x117/0x620 net/ipv4/tcp.c:2681 inet6_recvmsg+0x114/0x640 net/ipv6/af_inet6.c:670 sock_recvmsg_nosec net/socket.c:1017 [inline] sock_recvmsg+0xe2/0x160 net/socket.c:1038 ____sys_recvmsg+0x210/0x5a0 net/socket.c:2720 ___sys_recvmsg+0xf2/0x180 net/socket.c:2762 do_recvmmsg+0x25e/0x6e0 net/socket.c:2856 __sys_recvmmsg net/socket.c:2935 [inline] __do_sys_recvmmsg net/socket.c:2958 [inline] __se_sys_recvmmsg net/socket.c:2951 [inline] __x64_sys_recvmmsg+0x20f/0x260 net/socket.c:2951 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f5c0108c0f9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f5bffbfe168 EFLAGS: 00000246 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 00007f5c011ac050 RCX: 00007f5c0108c0f9 RDX: 0000000000000001 RSI: 0000000020000bc0 RDI: 0000000000000003 RBP: 00007f5c010e7b39 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000122 R11: 0000000000000246 R12: 0000000000000000 R13: 00007f5c012cfb1f R14: 00007f5bffbfe300 R15: 0000000000022000 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Reported-by: Paolo Abeni Diagnosed-by: Paolo Abeni Signed-off-by: Eric Dumazet Tested-by: Paolo Abeni Link: https://lore.kernel.org/r/20230526163458.2880232-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 4 ++++ net/ipv4/af_inet.c | 2 ++ net/ipv4/inet_connection_sock.c | 1 + net/ipv4/tcp.c | 6 ++++++ 4 files changed, 13 insertions(+) diff --git a/include/net/sock.h b/include/net/sock.h index 656ea89f60ff..b418425d7230 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -336,6 +336,7 @@ struct sk_filter; * @sk_cgrp_data: cgroup data for this cgroup * @sk_memcg: this socket's memory cgroup association * @sk_write_pending: a write to stream socket waits to start + * @sk_wait_pending: number of threads blocked on this socket * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed * @sk_write_space: callback to indicate there is bf sending space available @@ -428,6 +429,7 @@ struct sock { unsigned int sk_napi_id; #endif int sk_rcvbuf; + int sk_wait_pending; struct sk_filter __rcu *sk_filter; union { @@ -1174,6 +1176,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) #define sk_wait_event(__sk, __timeo, __condition, __wait) \ ({ int __rc; \ + __sk->sk_wait_pending++; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ @@ -1183,6 +1186,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } \ sched_annotate_sleep(); \ lock_sock(__sk); \ + __sk->sk_wait_pending--; \ __rc = __condition; \ __rc; \ }) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c4aab3aacbd8..4a76ebf793b8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -586,6 +586,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; + sk->sk_wait_pending++; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -601,6 +602,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) } remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; + sk->sk_wait_pending--; return timeo; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 65ad4251f6fd..1386787eaf1a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1142,6 +1142,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); + newsk->sk_wait_pending = 0; inet_sk_set_state(newsk, TCP_SYN_RECV); newicsk->icsk_bind_hash = NULL; newicsk->icsk_bind2_hash = NULL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a60f6f4e7cd9..635607ac37e4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3081,6 +3081,12 @@ int tcp_disconnect(struct sock *sk, int flags) int old_state = sk->sk_state; u32 seq; + /* Deny disconnect if other threads are blocked in sk_wait_event() + * or inet_wait_for_connect(). + */ + if (sk->sk_wait_pending) + return -EBUSY; + if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); From 34dfde4ad87b84d21278a7e19d92b5b2c68e6c4d Mon Sep 17 00:00:00 2001 From: Cambda Zhu Date: Sat, 27 May 2023 12:03:17 +0800 Subject: [PATCH 227/934] tcp: Return user_mss for TCP_MAXSEG in CLOSE/LISTEN state if user_mss set This patch replaces the tp->mss_cache check in getting TCP_MAXSEG with tp->rx_opt.user_mss check for CLOSE/LISTEN sock. Since tp->mss_cache is initialized with TCP_MSS_DEFAULT, checking if it's zero is probably a bug. With this change, getting TCP_MAXSEG before connecting will return default MSS normally, and return user_mss if user_mss is set. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Jack Yang Suggested-by: Eric Dumazet Link: https://lore.kernel.org/netdev/CANn89i+3kL9pYtkxkwxwNMzvC_w3LNUum_2=3u+UyLBmGmifHA@mail.gmail.com/#t Signed-off-by: Cambda Zhu Link: https://lore.kernel.org/netdev/14D45862-36EA-4076-974C-EA67513C92F6@linux.alibaba.com/ Reviewed-by: Jason Xing Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230527040317.68247-1-cambda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 635607ac37e4..8d20d9221238 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4078,7 +4078,8 @@ int do_tcp_getsockopt(struct sock *sk, int level, switch (optname) { case TCP_MAXSEG: val = tp->mss_cache; - if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + if (tp->rx_opt.user_mss && + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) val = tp->rx_opt.user_mss; if (tp->repair) val = tp->rx_opt.mss_clamp; From 81d358b118dc364bd147432db569d4d400a5a4f2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 25 May 2023 12:43:21 +1000 Subject: [PATCH 228/934] powerpc/crypto: Fix aes-gcm-p10 link errors The recently added P10 AES/GCM code added some files containing CRYPTOGAMS perl-asm code which are near duplicates of the p8 files found in drivers/crypto/vmx. In particular the newly added files produce functions with identical names to the existing code. When the kernel is built with CONFIG_CRYPTO_AES_GCM_P10=y and CONFIG_CRYPTO_DEV_VMX_ENCRYPT=y that leads to link errors, eg: ld: drivers/crypto/vmx/aesp8-ppc.o: in function `aes_p8_set_encrypt_key': (.text+0xa0): multiple definition of `aes_p8_set_encrypt_key'; arch/powerpc/crypto/aesp8-ppc.o:(.text+0xa0): first defined here ... ld: drivers/crypto/vmx/ghashp8-ppc.o: in function `gcm_ghash_p8': (.text+0x140): multiple definition of `gcm_ghash_p8'; arch/powerpc/crypto/ghashp8-ppc.o:(.text+0x2e4): first defined here Fix it for now by renaming the newly added files and functions to use "p10" instead of "p8" in the names. Fixes: 45a4672b9a6e ("crypto: p10-aes-gcm - Update Kconfig and Makefile") Tested-by: Vishal Chourasia Signed-off-by: Michael Ellerman Link: https://msgid.link/20230525150501.37081-1-mpe@ellerman.id.au --- arch/powerpc/crypto/Makefile | 10 +++++----- arch/powerpc/crypto/aes-gcm-p10-glue.c | 18 +++++++++--------- .../crypto/{aesp8-ppc.pl => aesp10-ppc.pl} | 2 +- .../crypto/{ghashp8-ppc.pl => ghashp10-ppc.pl} | 12 ++++++------ 4 files changed, 21 insertions(+), 21 deletions(-) rename arch/powerpc/crypto/{aesp8-ppc.pl => aesp10-ppc.pl} (99%) rename arch/powerpc/crypto/{ghashp8-ppc.pl => ghashp10-ppc.pl} (97%) diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 05c7486f42c5..7b4f516abec1 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -22,15 +22,15 @@ sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o -aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp8-ppc.o aesp8-ppc.o +aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@ -targets += aesp8-ppc.S ghashp8-ppc.S +targets += aesp10-ppc.S ghashp10-ppc.S -$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE +$(obj)/aesp10-ppc.S $(obj)/ghashp10-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE $(call if_changed,perl) -OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y -OBJECT_FILES_NON_STANDARD_ghashp8-ppc.o := y +OBJECT_FILES_NON_STANDARD_aesp10-ppc.o := y +OBJECT_FILES_NON_STANDARD_ghashp10-ppc.o := y diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c index bd3475f5348d..4b6e899895e7 100644 --- a/arch/powerpc/crypto/aes-gcm-p10-glue.c +++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c @@ -30,15 +30,15 @@ MODULE_AUTHOR("Danny Tsen aadLen = alen; i = alen & ~0xf; if (i) { - gcm_ghash_p8(nXi, hash->Htable+32, aad, i); + gcm_ghash_p10(nXi, hash->Htable+32, aad, i); aad += i; alen -= i; } @@ -102,7 +102,7 @@ static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash, nXi[i] ^= aad[i]; memset(gctx->aad_hash, 0, 16); - gcm_ghash_p8(gctx->aad_hash, hash->Htable+32, nXi, 16); + gcm_ghash_p10(gctx->aad_hash, hash->Htable+32, nXi, 16); } else { memcpy(gctx->aad_hash, nXi, 16); } @@ -115,7 +115,7 @@ static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey, { __be32 counter = cpu_to_be32(1); - aes_p8_encrypt(hash->H, hash->H, rdkey); + aes_p10_encrypt(hash->H, hash->H, rdkey); set_subkey(hash->H); gcm_init_htable(hash->Htable+32, hash->H); @@ -126,7 +126,7 @@ static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey, /* * Encrypt counter vector as iv tag and increment counter. */ - aes_p8_encrypt(iv, gctx->ivtag, rdkey); + aes_p10_encrypt(iv, gctx->ivtag, rdkey); counter = cpu_to_be32(2); *((__be32 *)(iv+12)) = counter; @@ -160,7 +160,7 @@ static void finish_tag(struct gcm_ctx *gctx, struct Hash_ctx *hash, int len) /* * hash (AAD len and len) */ - gcm_ghash_p8(hash->Htable, hash->Htable+32, aclen, 16); + gcm_ghash_p10(hash->Htable, hash->Htable+32, aclen, 16); for (i = 0; i < 16; i++) hash->Htable[i] ^= gctx->ivtag[i]; @@ -192,7 +192,7 @@ static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key, int ret; vsx_begin(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); + ret = aes_p10_set_encrypt_key(key, keylen * 8, &ctx->enc_key); vsx_end(); return ret ? -EINVAL : 0; diff --git a/arch/powerpc/crypto/aesp8-ppc.pl b/arch/powerpc/crypto/aesp10-ppc.pl similarity index 99% rename from arch/powerpc/crypto/aesp8-ppc.pl rename to arch/powerpc/crypto/aesp10-ppc.pl index 1f22aec27d79..2c06ce2a2c7c 100644 --- a/arch/powerpc/crypto/aesp8-ppc.pl +++ b/arch/powerpc/crypto/aesp10-ppc.pl @@ -110,7 +110,7 @@ die "can't locate ppc-xlate.pl"; open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; $FRAME=8*$SIZE_T; -$prefix="aes_p8"; +$prefix="aes_p10"; $sp="r1"; $vrsave="r12"; diff --git a/arch/powerpc/crypto/ghashp8-ppc.pl b/arch/powerpc/crypto/ghashp10-ppc.pl similarity index 97% rename from arch/powerpc/crypto/ghashp8-ppc.pl rename to arch/powerpc/crypto/ghashp10-ppc.pl index b56603b4a893..27a6b0bec645 100644 --- a/arch/powerpc/crypto/ghashp8-ppc.pl +++ b/arch/powerpc/crypto/ghashp10-ppc.pl @@ -64,7 +64,7 @@ $code=<<___; .text -.globl .gcm_init_p8 +.globl .gcm_init_p10 lis r0,0xfff0 li r8,0x10 mfspr $vrsave,256 @@ -110,7 +110,7 @@ $code=<<___; .long 0 .byte 0,12,0x14,0,0,0,2,0 .long 0 -.size .gcm_init_p8,.-.gcm_init_p8 +.size .gcm_init_p10,.-.gcm_init_p10 .globl .gcm_init_htable lis r0,0xfff0 @@ -237,7 +237,7 @@ $code=<<___; .long 0 .size .gcm_init_htable,.-.gcm_init_htable -.globl .gcm_gmult_p8 +.globl .gcm_gmult_p10 lis r0,0xfff8 li r8,0x10 mfspr $vrsave,256 @@ -283,9 +283,9 @@ $code=<<___; .long 0 .byte 0,12,0x14,0,0,0,2,0 .long 0 -.size .gcm_gmult_p8,.-.gcm_gmult_p8 +.size .gcm_gmult_p10,.-.gcm_gmult_p10 -.globl .gcm_ghash_p8 +.globl .gcm_ghash_p10 lis r0,0xfff8 li r8,0x10 mfspr $vrsave,256 @@ -350,7 +350,7 @@ Loop: .long 0 .byte 0,12,0x14,0,0,0,4,0 .long 0 -.size .gcm_ghash_p8,.-.gcm_ghash_p8 +.size .gcm_ghash_p10,.-.gcm_ghash_p10 .asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by " .align 2 From 9d2ccf00bddc268045e3d65a8108d61ada0e4b4e Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Thu, 25 May 2023 09:34:54 -0500 Subject: [PATCH 229/934] powerpc/iommu: Limit number of TCEs to 512 for H_STUFF_TCE hcall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently in tce_freemulti_pSeriesLP() there is no limit on how many TCEs are passed to the H_STUFF_TCE hcall. This has not caused an issue until now, but newer firmware releases have started enforcing a limit of 512 TCEs per call. The limit is correct per the specification (PAPR v2.12 § 14.5.4.2.3). The code has been in it's current form since it was initially merged. Cc: stable@vger.kernel.org Signed-off-by: Gaurav Batra Reviewed-by: Brian King [mpe: Tweak change log wording & add PAPR reference] Signed-off-by: Michael Ellerman Link: https://msgid.link/20230525143454.56878-1-gbatra@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/iommu.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 918f511837db..d59e8a98a200 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -317,13 +317,22 @@ static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) { u64 rc; + long rpages = npages; + unsigned long limit; if (!firmware_has_feature(FW_FEATURE_STUFF_TCE)) return tce_free_pSeriesLP(tbl->it_index, tcenum, tbl->it_page_shift, npages); - rc = plpar_tce_stuff((u64)tbl->it_index, - (u64)tcenum << tbl->it_page_shift, 0, npages); + do { + limit = min_t(unsigned long, rpages, 512); + + rc = plpar_tce_stuff((u64)tbl->it_index, + (u64)tcenum << tbl->it_page_shift, 0, limit); + + rpages -= limit; + tcenum += limit; + } while (rpages > 0 && !rc); if (rc && printk_ratelimit()) { printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); From 719dfd5925e186e09a2a6f23016936ac436f3d78 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Mon, 29 May 2023 16:43:37 +0530 Subject: [PATCH 230/934] powerpc/xmon: Use KSYM_NAME_LEN in array size kallsyms_lookup() which in turn calls kallsyms_lookup_buildid() writes to index "KSYM_NAME_LEN - 1". Thus the array passed as namebuf to kallsyms_lookup() should be KSYM_NAME_LEN in size. In xmon.c the array was defined to be "128" bytes directly, without using KSYM_NAME_LEN. Commit b8a94bfb3395 ("kallsyms: increase maximum kernel symbol length to 512") changed the value to 512, but missed updating the xmon code. Fixes: b8a94bfb3395 ("kallsyms: increase maximum kernel symbol length to 512") Cc: stable@vger.kernel.org # v6.1+ Co-developed-by: Onkarnath Signed-off-by: Onkarnath Signed-off-by: Maninder Singh [mpe: Tweak change log wording and fix commit reference] Signed-off-by: Michael Ellerman Link: https://msgid.link/20230529111337.352990-2-maninder1.s@samsung.com --- arch/powerpc/xmon/xmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 728d3c257e4a..70c4c59a1a8f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -88,7 +88,7 @@ static unsigned long ndump = 64; static unsigned long nidump = 16; static unsigned long ncsum = 4096; static int termch; -static char tmpstr[128]; +static char tmpstr[KSYM_NAME_LEN]; static int tracing_enabled; static long bus_error_jmp[JMP_BUF_LEN]; From 811154e234db72f0a11557a84ba9640f8b3bc823 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 30 May 2023 11:46:51 +0900 Subject: [PATCH 231/934] KVM: arm64: Populate fault info for watchpoint When handling ESR_ELx_EC_WATCHPT_LOW, far_el2 member of struct kvm_vcpu_fault_info will be copied to far member of struct kvm_debug_exit_arch and exposed to the userspace. The userspace will see stale values from older faults if the fault info does not get populated. Fixes: 8fb2046180a0 ("KVM: arm64: Move early handlers to per-EC handlers") Suggested-by: Marc Zyngier Signed-off-by: Akihiko Odaki Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230530024651.10014-1-akihiko.odaki@daynix.com Cc: stable@vger.kernel.org --- arch/arm64/kvm/hyp/include/hyp/switch.h | 8 ++++++-- arch/arm64/kvm/hyp/nvhe/switch.c | 2 ++ arch/arm64/kvm/hyp/vhe/switch.c | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index e78a08a72a3c..5c15c58f90cc 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -412,17 +412,21 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code) { if (!__populate_fault_info(vcpu)) return true; return false; } +static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) + __alias(kvm_hyp_handle_memory_fault); +static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) + __alias(kvm_hyp_handle_memory_fault); static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) { - if (!__populate_fault_info(vcpu)) + if (kvm_hyp_handle_memory_fault(vcpu, exit_code)) return true; if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 71fa16a0dc77..77791495c995 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -186,6 +186,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; @@ -196,6 +197,7 @@ static const exit_handler_fn pvm_exit_handlers[] = { [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 3d868e84c7a0..7a1aa511e7da 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -110,6 +110,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; From 020c69c1a793ed29d28793808eddd75210c858dd Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 26 May 2023 12:34:54 +0100 Subject: [PATCH 232/934] rxrpc: Truncate UTS_RELEASE for rxrpc version UTS_RELEASE has a maximum length of 64 which can cause rxrpc_version to exceed the 65 byte message limit. Per the rx spec[1]: "If a server receives a packet with a type value of 13, and the client-initiated flag set, it should respond with a 65-byte payload containing a string that identifies the version of AFS software it is running." The current implementation causes a compile error when WERROR is turned on and/or UTS_RELEASE exceeds the length of 49 (making the version string more than 64 characters). Fix this by generating the string during module initialisation and limiting the UTS_RELEASE segment of the string does not exceed 49 chars. We need to make sure that the 64 bytes includes "linux-" at the front and " AF_RXRPC" at the back as this may be used in pattern matching. Fixes: 44ba06987c0b ("RxRPC: Handle VERSION Rx protocol packets") Reported-by: Kenny Ho Link: https://lore.kernel.org/r/20230523223944.691076-1-Kenny.Ho@amd.com/ Signed-off-by: David Howells Acked-by: Kenny Ho cc: Marc Dionne cc: Andrew Lunn cc: David Laight cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: linux-afs@lists.infradead.org cc: netdev@vger.kernel.org Link: https://web.mit.edu/kolya/afs/rx/rx-spec [1] Reviewed-by: Simon Horman Reviewed-by: Jeffrey Altman Link: https://lore.kernel.org/r/654974.1685100894@warthog.procyon.org.uk Signed-off-by: Paolo Abeni --- net/rxrpc/af_rxrpc.c | 1 + net/rxrpc/ar-internal.h | 1 + net/rxrpc/local_event.c | 11 ++++++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 31f738d65f1c..da0b3b5157d5 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -980,6 +980,7 @@ static int __init af_rxrpc_init(void) BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb)); ret = -ENOMEM; + rxrpc_gen_version_string(); rxrpc_call_jar = kmem_cache_create( "rxrpc_call_jar", sizeof(struct rxrpc_call), 0, SLAB_HWCACHE_ALIGN, NULL); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5d44dc08f66d..e8e14c6f904d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1068,6 +1068,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t, /* * local_event.c */ +void rxrpc_gen_version_string(void); void rxrpc_send_version_request(struct rxrpc_local *local, struct rxrpc_host_header *hdr, struct sk_buff *skb); diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 5e69ea6b233d..993c69f97488 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -16,7 +16,16 @@ #include #include "ar-internal.h" -static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC"; +static char rxrpc_version_string[65]; // "linux-" UTS_RELEASE " AF_RXRPC"; + +/* + * Generate the VERSION packet string. + */ +void rxrpc_gen_version_string(void) +{ + snprintf(rxrpc_version_string, sizeof(rxrpc_version_string), + "linux-%.49s AF_RXRPC", UTS_RELEASE); +} /* * Reply to a version request From aafbb1eeabdc4e9241b400146f77369f041ec11b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 29 May 2023 04:40:02 -0400 Subject: [PATCH 233/934] tracing/rv/rtla: Update MAINTAINERS file to point to proper mailing list The mailing list that goes to linux-trace-devel is for the tracing libraries, and the patchwork associated to the tracing libraries keys off of that mailing list. For anything that lives in the Linux kernel proper (including the tools directory) must go through linux-trace-kernel, as the patchwork to that list keys off of the Linux kernel proper. Update the MAINTAINERS file to reflect the proper mailing lists. Link: https://lore.kernel.org/linux-trace-kernel/20230529044002.0481452b@rorschach.local.home Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) Acked-by: Daniel Bristot de Oliveira --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 27ef11624748..725d35b5d328 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17806,7 +17806,7 @@ F: tools/testing/selftests/rtc/ Real-time Linux Analysis (RTLA) tools M: Daniel Bristot de Oliveira M: Steven Rostedt -L: linux-trace-devel@vger.kernel.org +L: linux-trace-kernel@vger.kernel.org S: Maintained F: Documentation/tools/rtla/ F: tools/tracing/rtla/ @@ -18368,7 +18368,7 @@ F: drivers/infiniband/ulp/rtrs/ RUNTIME VERIFICATION (RV) M: Daniel Bristot de Oliveira M: Steven Rostedt -L: linux-trace-devel@vger.kernel.org +L: linux-trace-kernel@vger.kernel.org S: Maintained F: Documentation/trace/rv/ F: include/linux/rv.h From ed08d937eaa4f18aa26e47fe6b937205a4745045 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 25 May 2023 22:50:41 +0200 Subject: [PATCH 234/934] platform/surface: aggregator: Make to_ssam_device_driver() respect constness Make to_ssam_device_driver() a bit safer by replacing container_of() with container_of_const() to respect the constness of the passed in pointer, instead of silently discarding any const specifications. This change also makes it more similar to to_ssam_device(), which already uses container_of_const(). Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20230525205041.2774947-1-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- include/linux/surface_aggregator/device.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index df81043b9e71..42b249b4c24b 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -243,11 +243,7 @@ static inline bool is_ssam_device(struct device *d) * Return: Returns the pointer to the &struct ssam_device_driver wrapping the * given device driver @d. */ -static inline -struct ssam_device_driver *to_ssam_device_driver(struct device_driver *d) -{ - return container_of(d, struct ssam_device_driver, driver); -} +#define to_ssam_device_driver(d) container_of_const(d, struct ssam_device_driver, driver) const struct ssam_device_id *ssam_device_id_match(const struct ssam_device_id *table, const struct ssam_device_uid uid); From 539e0a7f9105d19c00629c3f4da00330488e8c60 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 25 May 2023 23:01:10 +0200 Subject: [PATCH 235/934] platform/surface: aggregator: Allow completion work-items to be executed in parallel Currently, event completion work-items are restricted to be run strictly in non-parallel fashion by the respective workqueue. However, this has lead to some problems: In some instances, the event notifier function called inside this completion workqueue takes a non-negligible amount of time to execute. One such example is the battery event handling code (surface_battery.c), which can result in a full battery information refresh, involving further synchronous communication with the EC inside the event handler. This is made worse if the communication fails spuriously, generally incurring a multi-second timeout. Since the event completions are run strictly non-parallel, this blocks other events from being propagated to the respective subsystems. This becomes especially noticeable for keyboard and touchpad input, which also funnel their events through this system. Here, users have reported occasional multi-second "freezes". Note, however, that the event handling system was never intended to run purely sequentially. Instead, we have one work struct per EC/SAM subsystem, processing the event queue for that subsystem. These work structs were intended to run in parallel, allowing sequential processing of work items for each subsystem but parallel processing of work items across subsystems. The only restriction to this is the way the workqueue is created. Therefore, replace create_workqueue() with alloc_workqueue() and do not restrict the maximum number of parallel work items to be executed on that queue, resolving any cross-subsystem blockage. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Link: https://github.com/linux-surface/linux-surface/issues/1026 Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20230525210110.2785470-1-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/surface/aggregator/controller.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 535581c0471c..7fc602e01487 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -825,7 +825,7 @@ static int ssam_cplt_init(struct ssam_cplt *cplt, struct device *dev) cplt->dev = dev; - cplt->wq = create_workqueue(SSAM_CPLT_WQ_NAME); + cplt->wq = alloc_workqueue(SSAM_CPLT_WQ_NAME, WQ_UNBOUND | WQ_MEM_RECLAIM, 0); if (!cplt->wq) return -ENOMEM; From 9bed667033e66083d363a11e9414ad401ecc242c Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 25 May 2023 23:32:17 +0200 Subject: [PATCH 236/934] platform/surface: aggregator_tabletsw: Add support for book mode in KIP subsystem Devices with a type-cover have an additional "book" mode, deactivating type-cover input and turning off its backlight. This is currently unsupported, leading to the warning surface_aggregator_tablet_mode_switch 01:0e:01:00:01: unknown KIP cover state: 6 Therefore, add support for this state and map it to enable tablet-mode. Fixes: 9f794056db5b ("platform/surface: Add KIP/POS tablet-mode switch driver") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20230525213218.2797480-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/surface/surface_aggregator_tabletsw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_tabletsw.c b/drivers/platform/surface/surface_aggregator_tabletsw.c index 8f52b62d1c19..4a029f5db20a 100644 --- a/drivers/platform/surface/surface_aggregator_tabletsw.c +++ b/drivers/platform/surface/surface_aggregator_tabletsw.c @@ -210,6 +210,7 @@ enum ssam_kip_cover_state { SSAM_KIP_COVER_STATE_LAPTOP = 0x03, SSAM_KIP_COVER_STATE_FOLDED_CANVAS = 0x04, SSAM_KIP_COVER_STATE_FOLDED_BACK = 0x05, + SSAM_KIP_COVER_STATE_BOOK = 0x06, }; static const char *ssam_kip_cover_state_name(struct ssam_tablet_sw *sw, @@ -231,6 +232,9 @@ static const char *ssam_kip_cover_state_name(struct ssam_tablet_sw *sw, case SSAM_KIP_COVER_STATE_FOLDED_BACK: return "folded-back"; + case SSAM_KIP_COVER_STATE_BOOK: + return "book"; + default: dev_warn(&sw->sdev->dev, "unknown KIP cover state: %u\n", state->state); return ""; @@ -244,6 +248,7 @@ static bool ssam_kip_cover_state_is_tablet_mode(struct ssam_tablet_sw *sw, case SSAM_KIP_COVER_STATE_DISCONNECTED: case SSAM_KIP_COVER_STATE_FOLDED_CANVAS: case SSAM_KIP_COVER_STATE_FOLDED_BACK: + case SSAM_KIP_COVER_STATE_BOOK: return true; case SSAM_KIP_COVER_STATE_CLOSED: From 061c228967f0e3f7aecdd32ee370ee745d96168d Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Thu, 25 May 2023 23:32:18 +0200 Subject: [PATCH 237/934] platform/surface: aggregator_tabletsw: Add support for book mode in POS subsystem Devices with a type-cover have an additional "book" mode, deactivating type-cover input and turning off its backlight. This is currently unsupported, leading to the warning surface_aggregator_tablet_mode_switch 01:26:01:00:01: unknown device posture for type-cover: 6 Therefore, add support for this state and map it to enable tablet-mode. Fixes: 37ff64cd81ff ("platform/surface: aggregator_tabletsw: Add support for Type-Cover posture source") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20230525213218.2797480-3-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/surface/surface_aggregator_tabletsw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_tabletsw.c b/drivers/platform/surface/surface_aggregator_tabletsw.c index 4a029f5db20a..c0a1a5869246 100644 --- a/drivers/platform/surface/surface_aggregator_tabletsw.c +++ b/drivers/platform/surface/surface_aggregator_tabletsw.c @@ -340,6 +340,7 @@ enum ssam_pos_state_cover { SSAM_POS_COVER_LAPTOP = 0x03, SSAM_POS_COVER_FOLDED_CANVAS = 0x04, SSAM_POS_COVER_FOLDED_BACK = 0x05, + SSAM_POS_COVER_BOOK = 0x06, }; enum ssam_pos_state_sls { @@ -372,6 +373,9 @@ static const char *ssam_pos_state_name_cover(struct ssam_tablet_sw *sw, u32 stat case SSAM_POS_COVER_FOLDED_BACK: return "folded-back"; + case SSAM_POS_COVER_BOOK: + return "book"; + default: dev_warn(&sw->sdev->dev, "unknown device posture for type-cover: %u\n", state); return ""; @@ -421,6 +425,7 @@ static bool ssam_pos_state_is_tablet_mode_cover(struct ssam_tablet_sw *sw, u32 s case SSAM_POS_COVER_DISCONNECTED: case SSAM_POS_COVER_FOLDED_CANVAS: case SSAM_POS_COVER_FOLDED_BACK: + case SSAM_POS_COVER_BOOK: return true; case SSAM_POS_COVER_CLOSED: From b24aa141c2ff26c919237aee61ea1818fc6780d9 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Fri, 26 May 2023 19:49:00 +0800 Subject: [PATCH 238/934] net/smc: Scan from current RMB list when no position specified When finding the first RMB of link group, it should start from the current RMB list whose index is 0. So fix it. Fixes: b4ba4652b3f8 ("net/smc: extend LLC layer for SMC-Rv2") Signed-off-by: Wen Gu Signed-off-by: Paolo Abeni --- net/smc/smc_llc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index a0840b8c935b..8423e8e0063f 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -578,7 +578,10 @@ static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr, { struct smc_buf_desc *buf_next; - if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) { + if (!buf_pos) + return _smc_llc_get_next_rmb(lgr, buf_lst); + + if (list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) { (*buf_lst)++; return _smc_llc_get_next_rmb(lgr, buf_lst); } From 71c6aa0305e3d2365d3bfd0134b4025d9e7ba388 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Fri, 26 May 2023 19:49:01 +0800 Subject: [PATCH 239/934] net/smc: Don't use RMBs not mapped to new link in SMCRv2 ADD LINK We encountered a crash when using SMCRv2. It is caused by a logical error in smc_llc_fill_ext_v2(). BUG: kernel NULL pointer dereference, address: 0000000000000014 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 7 PID: 453 Comm: kworker/7:4 Kdump: loaded Tainted: G W E 6.4.0-rc3+ #44 Workqueue: events smc_llc_add_link_work [smc] RIP: 0010:smc_llc_fill_ext_v2+0x117/0x280 [smc] RSP: 0018:ffffacb5c064bd88 EFLAGS: 00010282 RAX: ffff9a6bc1c3c02c RBX: ffff9a6be3558000 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 0000000000000002 RDI: 000000000000000a RBP: ffffacb5c064bdb8 R08: 0000000000000040 R09: 000000000000000c R10: ffff9a6bc0910300 R11: 0000000000000002 R12: 0000000000000000 R13: 0000000000000002 R14: ffff9a6bc1c3c02c R15: ffff9a6be3558250 FS: 0000000000000000(0000) GS:ffff9a6eefdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000014 CR3: 000000010b078003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: smc_llc_send_add_link+0x1ae/0x2f0 [smc] smc_llc_srv_add_link+0x2c9/0x5a0 [smc] ? cc_mkenc+0x40/0x60 smc_llc_add_link_work+0xb8/0x140 [smc] process_one_work+0x1e5/0x3f0 worker_thread+0x4d/0x2f0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 When an alernate RNIC is available in system, SMC will try to add a new link based on the RNIC for resilience. All the RMBs in use will be mapped to the new link. Then the RMBs' MRs corresponding to the new link will be filled into SMCRv2 LLC ADD LINK messages. However, smc_llc_fill_ext_v2() mistakenly accesses to unused RMBs which haven't been mapped to the new link and have no valid MRs, thus causing a crash. So this patch fixes the logic. Fixes: b4ba4652b3f8 ("net/smc: extend LLC layer for SMC-Rv2") Signed-off-by: Wen Gu Signed-off-by: Paolo Abeni --- net/smc/smc_llc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 8423e8e0063f..7a8d9163d186 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -617,6 +617,8 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext, goto out; buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst); for (i = 0; i < ext->num_rkeys; i++) { + while (buf_pos && !(buf_pos)->used) + buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); if (!buf_pos) break; rmb = buf_pos; @@ -626,8 +628,6 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext, cpu_to_be64((uintptr_t)rmb->cpu_addr) : cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl)); buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); - while (buf_pos && !(buf_pos)->used) - buf_pos = smc_llc_get_next_rmb(lgr, &buf_lst, buf_pos); } len += i * sizeof(ext->rt[0]); out: From 929f4e7c06ca40c8e58b418c94708d880518d9b4 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Tue, 30 May 2023 21:49:35 +0800 Subject: [PATCH 240/934] MAINTAINERS: update Microchip MPF FPGA reviewers As I'm leaving Metrotek, hand over reviewing duty of Microchip MPF FPGA driver to Vladimir. Signed-off-by: Ivan Bornyakov Acked-by: Conor Dooley Acked-by: Vladimir Georgiev Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20230429104838.5064-2-i.bornyakov@metrotek.ru Signed-off-by: Xu Yilun Link: https://lore.kernel.org/r/20230530134936.634370-2-yilun.xu@intel.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 250518fc70ff..205b545d0a4a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13836,7 +13836,7 @@ F: drivers/tty/serial/8250/8250_pci1xxxx.c MICROCHIP POLARFIRE FPGA DRIVERS M: Conor Dooley -R: Ivan Bornyakov +R: Vladimir Georgiev L: linux-fpga@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml From 9da6225bc7377941acff4476493d515b9fdfea48 Mon Sep 17 00:00:00 2001 From: Ivan Bornyakov Date: Tue, 30 May 2023 21:49:36 +0800 Subject: [PATCH 241/934] dt-bindings: fpga: replace Ivan Bornyakov maintainership As I'm leaving Metrotek, hand over Lattice Slave SPI sysCONFIG FPGA manager and Microchip Polarfire FPGA manager maintainership duties to Vladimir. Signed-off-by: Ivan Bornyakov Acked-by: Conor Dooley Acked-by: Vladimir Georgiev Acked-by: Rob Herring Link: https://lore.kernel.org/r/20230429104838.5064-3-i.bornyakov@metrotek.ru Signed-off-by: Xu Yilun Link: https://lore.kernel.org/r/20230530134936.634370-3-yilun.xu@intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml | 2 +- .../devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml b/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml index 4fb05eb84e2a..164331eb6275 100644 --- a/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml +++ b/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Lattice Slave SPI sysCONFIG FPGA manager maintainers: - - Ivan Bornyakov + - Vladimir Georgiev description: | Lattice sysCONFIG port, which is used for FPGA configuration, among others, diff --git a/Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml b/Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml index 527532f039ce..a157eecfb5fc 100644 --- a/Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml +++ b/Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Microchip Polarfire FPGA manager. maintainers: - - Ivan Bornyakov + - Vladimir Georgiev description: Device Tree Bindings for Microchip Polarfire FPGA Manager using slave SPI to From 71698da37ce70c457751baea507a122851a2d481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= Date: Tue, 30 May 2023 10:35:46 +0200 Subject: [PATCH 242/934] MAINTAINERS: Vaibhav Gupta is the new ipack maintainer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I have no longer access to the HW, nor time to properly maintain it. Adding Vaibhav as maintainer as he currently has access to the HW, he is working at CERN (user of these drivers) and he is maintaining them internally there. Signed-off-by: Samuel Iglesias Gonsálvez Acked-by: Vaibhav Gupta Link: https://lore.kernel.org/r/20230530083546.4831-1-vaibhavgupta40@gmail.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 205b545d0a4a..4d623e78ac2f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10114,7 +10114,7 @@ S: Maintained F: Documentation/process/kernel-docs.rst INDUSTRY PACK SUBSYSTEM (IPACK) -M: Samuel Iglesias Gonsalvez +M: Vaibhav Gupta M: Jens Taprogge M: Greg Kroah-Hartman L: industrypack-devel@lists.sourceforge.net From 91539341a3b6e9c868024a4292455dae36e6f58c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 30 May 2023 11:01:22 +0100 Subject: [PATCH 243/934] irqchip/gic: Correctly validate OF quirk descriptors When checking for OF quirks, make sure either 'compatible' or 'property' is set, and give up otherwise. This avoids non-OF quirks being randomly applied as they don't have any of the OF data that need checking. Cc: Douglas Anderson Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Fixes: 44bd78dd2b88 ("irqchip/gic-v3: Disable pseudo NMIs on Mediatek devices w/ firmware issues") Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index de47b51cdadb..afd6a1841715 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -16,6 +16,8 @@ void gic_enable_of_quirks(const struct device_node *np, const struct gic_quirk *quirks, void *data) { for (; quirks->desc; quirks++) { + if (!quirks->compatible && !quirks->property) + continue; if (quirks->compatible && !of_device_is_compatible(np, quirks->compatible)) continue; From 1919b39fc6eabb9a6f9a51706ff6d03865f5df29 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 26 May 2023 08:38:57 -0700 Subject: [PATCH 244/934] net: mana: Fix perf regression: remove rx_cqes, tx_cqes counters The apc->eth_stats.rx_cqes is one per NIC (vport), and it's on the frequent and parallel code path of all queues. So, r/w into this single shared variable by many threads on different CPUs creates a lot caching and memory overhead, hence perf regression. And, it's not accurate due to the high volume concurrent r/w. For example, a workload is iperf with 128 threads, and with RPS enabled. We saw perf regression of 25% with the previous patch adding the counters. And this patch eliminates the regression. Since the error path of mana_poll_rx_cq() already has warnings, so keeping the counter and convert it to a per-queue variable is not necessary. So, just remove this counter from this high frequency code path. Also, remove the tx_cqes counter for the same reason. We have warnings & other counters for errors on that path, and don't need to count every normal cqe processing. Cc: stable@vger.kernel.org Fixes: bd7fc6e1957c ("net: mana: Add new MANA VF performance counters for easier troubleshooting") Signed-off-by: Haiyang Zhang Reviewed-by: Horatiu Vultur Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/1685115537-31675-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 10 ---------- drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 2 -- include/net/mana/mana.h | 2 -- 3 files changed, 14 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 06d6292e09b3..d907727c7b7a 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1279,8 +1279,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq) if (comp_read < 1) return; - apc->eth_stats.tx_cqes = comp_read; - for (i = 0; i < comp_read; i++) { struct mana_tx_comp_oob *cqe_oob; @@ -1363,8 +1361,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq) WARN_ON_ONCE(1); cq->work_done = pkt_transmitted; - - apc->eth_stats.tx_cqes -= pkt_transmitted; } static void mana_post_pkt_rxq(struct mana_rxq *rxq) @@ -1626,15 +1622,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq) { struct gdma_comp *comp = cq->gdma_comp_buf; struct mana_rxq *rxq = cq->rxq; - struct mana_port_context *apc; int comp_read, i; - apc = netdev_priv(rxq->ndev); - comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); - apc->eth_stats.rx_cqes = comp_read; rxq->xdp_flush = false; for (i = 0; i < comp_read; i++) { @@ -1646,8 +1638,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) return; mana_process_rx_cqe(rxq, cq, &comp[i]); - - apc->eth_stats.rx_cqes--; } if (rxq->xdp_flush) diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index a64c81410dc1..0dc78679f620 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -13,11 +13,9 @@ static const struct { } mana_eth_stats[] = { {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, - {"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)}, {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)}, {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, tx_cqe_unknown_type)}, - {"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)}, {"rx_coalesced_err", offsetof(struct mana_ethtool_stats, rx_coalesced_err)}, {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index cd386aa7c7cc..9eef19972845 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -347,10 +347,8 @@ struct mana_tx_qp { struct mana_ethtool_stats { u64 stop_queue; u64 wake_queue; - u64 tx_cqes; u64 tx_cqe_err; u64 tx_cqe_unknown_type; - u64 rx_cqes; u64 rx_coalesced_err; u64 rx_cqe_unknown_type; }; From fb109fba728407fa4a84d659b5cb87cd8399d7b3 Mon Sep 17 00:00:00 2001 From: Hao Yao Date: Wed, 24 May 2023 11:51:33 +0800 Subject: [PATCH 245/934] platform/x86: int3472: Avoid crash in unregistering regulator gpio When int3472 is loaded before GPIO driver, acpi_get_and_request_gpiod() failed but the returned gpio descriptor is not NULL, it will cause panic in later gpiod_put(), so set the gpio_desc to NULL in register error handling to avoid such crash. Signed-off-by: Hao Yao Signed-off-by: Bingbu Cao Link: https://lore.kernel.org/r/20230524035135.90315-1-bingbu.cao@intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- .../platform/x86/intel/int3472/clk_and_regulator.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c index 1086c3d83494..399f0623ca1b 100644 --- a/drivers/platform/x86/intel/int3472/clk_and_regulator.c +++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c @@ -101,9 +101,11 @@ int skl_int3472_register_clock(struct int3472_discrete_device *int3472, int3472->clock.ena_gpio = acpi_get_and_request_gpiod(path, agpio->pin_table[0], "int3472,clk-enable"); - if (IS_ERR(int3472->clock.ena_gpio)) - return dev_err_probe(int3472->dev, PTR_ERR(int3472->clock.ena_gpio), - "getting clk-enable GPIO\n"); + if (IS_ERR(int3472->clock.ena_gpio)) { + ret = PTR_ERR(int3472->clock.ena_gpio); + int3472->clock.ena_gpio = NULL; + return dev_err_probe(int3472->dev, ret, "getting clk-enable GPIO\n"); + } if (polarity == GPIO_ACTIVE_LOW) gpiod_toggle_active_low(int3472->clock.ena_gpio); @@ -199,8 +201,9 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472, int3472->regulator.gpio = acpi_get_and_request_gpiod(path, agpio->pin_table[0], "int3472,regulator"); if (IS_ERR(int3472->regulator.gpio)) { - dev_err(int3472->dev, "Failed to get regulator GPIO line\n"); - return PTR_ERR(int3472->regulator.gpio); + ret = PTR_ERR(int3472->regulator.gpio); + int3472->regulator.gpio = NULL; + return dev_err_probe(int3472->dev, ret, "getting regulator GPIO\n"); } /* Ensure the pin is in output mode and non-active state */ From d328fe87067480cf2bd0b58dab428a98d31dbb7e Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sun, 28 May 2023 19:35:26 +0200 Subject: [PATCH 246/934] selftests: mptcp: join: avoid using 'cmp --bytes' BusyBox's 'cmp' command doesn't support the '--bytes' parameter. Some CIs -- i.e. LKFT -- use BusyBox and have the mptcp_join.sh test failing [1] because their 'cmp' command doesn't support this '--bytes' option: cmp: unrecognized option '--bytes=1024' BusyBox v1.35.0 () multi-call binary. Usage: cmp [-ls] [-n NUM] FILE1 [FILE2] Instead, 'head --bytes' can be used as this option is supported by BusyBox. A temporary file is needed for this operation. Because it is apparently quite common to use BusyBox, it is certainly better to backport this fix to impacted kernels. Fixes: 6bf41020b72b ("selftests: mptcp: update and extend fastclose test-cases") Cc: stable@vger.kernel.org Link: https://qa-reports.linaro.org/lkft/linux-mainline-master/build/v6.3-rc5-5-g148341f0a2f5/testrun/16088933/suite/kselftest-net-mptcp/test/net_mptcp_userspace_pm_sh/log [1] Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 26310c17b4c6..4f3fe45f8f71 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -17,6 +17,7 @@ sout="" cin="" cinfail="" cinsent="" +tmpfile="" cout="" capout="" ns1="" @@ -175,6 +176,7 @@ cleanup() { rm -f "$cin" "$cout" "$sinfail" rm -f "$sin" "$sout" "$cinsent" "$cinfail" + rm -f "$tmpfile" rm -rf $evts_ns1 $evts_ns2 cleanup_partial } @@ -383,9 +385,16 @@ check_transfer() fail_test return 1 fi - bytes="--bytes=${bytes}" + + # note: BusyBox's "cmp" command doesn't support --bytes + tmpfile=$(mktemp) + head --bytes="$bytes" "$in" > "$tmpfile" + mv "$tmpfile" "$in" + head --bytes="$bytes" "$out" > "$tmpfile" + mv "$tmpfile" "$out" + tmpfile="" fi - cmp -l "$in" "$out" ${bytes} | while read -r i a b; do + cmp -l "$in" "$out" | while read -r i a b; do local sum=$((0${a} + 0${b})) if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then echo "[ FAIL ] $what does not match (in, out):" From d83013bdf90a7994a474b0e650a7fc94b0d4ded6 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sun, 28 May 2023 19:35:27 +0200 Subject: [PATCH 247/934] selftests: mptcp: connect: skip if MPTCP is not supported Selftests are supposed to run on any kernels, including the old ones not supporting MPTCP. A new check is then added to make sure MPTCP is supported. If not, the test stops and is marked as "skipped". Note that this check can also mark the test as failed if 'SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES' env var is set to 1: by doing that, we can make sure a test is not being skipped by mistake. A new shared file is added here to be able to re-used the same check in the different selftests we have. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/Makefile | 2 +- .../selftests/net/mptcp/mptcp_connect.sh | 4 ++ .../testing/selftests/net/mptcp/mptcp_lib.sh | 40 +++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/mptcp/mptcp_lib.sh diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 43a723626126..7b936a926859 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -9,7 +9,7 @@ TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq -TEST_FILES := settings +TEST_FILES := mptcp_lib.sh settings EXTRA_CLEAN := *.pcap diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index a43d3e2f59bb..c1f7bac19942 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + time_start=$(date +%s) optstring="S:R:d:e:l:r:h4cm:f:tC" @@ -141,6 +143,8 @@ cleanup() done } +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh new file mode 100644 index 000000000000..3286536b79d5 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -0,0 +1,40 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly KSFT_FAIL=1 +readonly KSFT_SKIP=4 + +# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all +# features using the last version of the kernel and the selftests to make sure +# a test is not being skipped by mistake. +mptcp_lib_expect_all_features() { + [ "${SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES:-}" = "1" ] +} + +# $1: msg +mptcp_lib_fail_if_expected_feature() { + if mptcp_lib_expect_all_features; then + echo "ERROR: missing feature: ${*}" + exit ${KSFT_FAIL} + fi + + return 1 +} + +# $1: file +mptcp_lib_has_file() { + local f="${1}" + + if [ -f "${f}" ]; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${f} file not found" +} + +mptcp_lib_check_mptcp() { + if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then + echo "SKIP: MPTCP support is not available" + exit ${KSFT_SKIP} + fi +} From 0f4955a40dafe18a1122e3714d8173e4b018e869 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sun, 28 May 2023 19:35:28 +0200 Subject: [PATCH 248/934] selftests: mptcp: pm nl: skip if MPTCP is not supported Selftests are supposed to run on any kernels, including the old ones not supporting MPTCP. A new check is then added to make sure MPTCP is supported. If not, the test stops and is marked as "skipped". Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: eedbc685321b ("selftests: add PM netlink functional tests") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 89839d1ff9d8..32f7533e0919 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + ksft_skip=4 ret=0 @@ -34,6 +36,8 @@ cleanup() ip netns del $ns1 } +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" From 715c78a82e00f848f99ef76e6f6b89216ccba268 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sun, 28 May 2023 19:35:29 +0200 Subject: [PATCH 249/934] selftests: mptcp: join: skip if MPTCP is not supported Selftests are supposed to run on any kernels, including the old ones not supporting MPTCP. A new check is then added to make sure MPTCP is supported. If not, the test stops and is marked as "skipped". Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: b08fbf241064 ("selftests: add test-cases for MPTCP MP_JOIN") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 4f3fe45f8f71..96f63172b8fe 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -10,6 +10,8 @@ # because it's invoked by variable name, see how the "tests" array is used #shellcheck disable=SC2317 +. "$(dirname "${0}")/mptcp_lib.sh" + ret=0 sin="" sinfail="" @@ -137,6 +139,8 @@ cleanup_partial() check_tools() { + mptcp_lib_check_mptcp + if ! ip -Version &> /dev/null; then echo "SKIP: Could not run test without ip tool" exit $ksft_skip From 46565acdd29facbf418a11e4a3791b3c8967308d Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sun, 28 May 2023 19:35:30 +0200 Subject: [PATCH 250/934] selftests: mptcp: diag: skip if MPTCP is not supported Selftests are supposed to run on any kernels, including the old ones not supporting MPTCP. A new check is then added to make sure MPTCP is supported. If not, the test stops and is marked as "skipped". Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: df62f2ec3df6 ("selftests/mptcp: add diag interface tests") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/diag.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index ef628b16fe9b..4eacdb1ab962 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns="ns1-$rndh" @@ -31,6 +33,8 @@ cleanup() ip netns del $ns } +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" From 9161f21c74a1a0e7bb39eb84ea0c86b23c92fc87 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sun, 28 May 2023 19:35:31 +0200 Subject: [PATCH 251/934] selftests: mptcp: simult flows: skip if MPTCP is not supported Selftests are supposed to run on any kernels, including the old ones not supporting MPTCP. A new check is then added to make sure MPTCP is supported. If not, the test stops and is marked as "skipped". Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/simult_flows.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 9f22f7e5027d..36a3c9d92e20 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns1="ns1-$rndh" @@ -34,6 +36,8 @@ cleanup() done } +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" From cf6f0fda7af7e8e016070bfee6b189e671a0c776 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sun, 28 May 2023 19:35:32 +0200 Subject: [PATCH 252/934] selftests: mptcp: sockopt: skip if MPTCP is not supported Selftests are supposed to run on any kernels, including the old ones not supporting MPTCP. A new check is then added to make sure MPTCP is supported. If not, the test stops and is marked as "skipped". Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: dc65fe82fb07 ("selftests: mptcp: add packet mark test case") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 1b70c0a304ce..ff5adbb9c7f2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + ret=0 sin="" sout="" @@ -84,6 +86,8 @@ cleanup() rm -f "$sin" "$sout" } +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" From 63212608a92a1ff10ae56dbb14e9fb685f7e4ffa Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sun, 28 May 2023 19:35:33 +0200 Subject: [PATCH 253/934] selftests: mptcp: userspace pm: skip if MPTCP is not supported Selftests are supposed to run on any kernels, including the old ones not supporting MPTCP. A new check is then added to make sure MPTCP is supported. If not, the test stops and is marked as "skipped". Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 259a834fadda ("selftests: mptcp: functional tests for the userspace PM type") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index b1eb7bce599d..8092399d911f 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -1,6 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Cannot not run test without ip tool" From 134f49dec0b6aca3259cd8259de4c572048bd207 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 May 2023 13:25:42 +0200 Subject: [PATCH 254/934] serial: 8250_tegra: Fix an error handling path in tegra_uart_probe() If an error occurs after reset_control_deassert(), it must be re-asserted, as already done in the .remove() function. Fixes: c6825c6395b7 ("serial: 8250_tegra: Create Tegra specific 8250 driver") Cc: stable Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/f8130f35339cc80edc6b9aac4bb2a60b60a226bf.1684063511.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_tegra.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_tegra.c b/drivers/tty/serial/8250/8250_tegra.c index 2509e7f74ccf..89956bbf34d9 100644 --- a/drivers/tty/serial/8250/8250_tegra.c +++ b/drivers/tty/serial/8250/8250_tegra.c @@ -113,13 +113,15 @@ static int tegra_uart_probe(struct platform_device *pdev) ret = serial8250_register_8250_port(&port8250); if (ret < 0) - goto err_clkdisable; + goto err_ctrl_assert; platform_set_drvdata(pdev, uart); uart->line = ret; return 0; +err_ctrl_assert: + reset_control_assert(uart->rst); err_clkdisable: clk_disable_unprepare(uart->clk); From 2474e05467c00f7d51af3039b664de6886325257 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Fri, 19 May 2023 17:47:51 +0800 Subject: [PATCH 255/934] tty: serial: fsl_lpuart: use UARTCTRL_TXINV to send break instead of UARTCTRL_SBK LPUART IP now has two known bugs, one is that CTS has higher priority than the break signal, which causes the break signal sending through UARTCTRL_SBK may impacted by the CTS input if the HW flow control is enabled. It exists on all platforms we support in this driver. So we add a workaround patch for this issue: commit c4c81db5cf8b ("tty: serial: fsl_lpuart: disable the CTS when send break signal"). Another IP bug is i.MX8QM LPUART may have an additional break character being sent after SBK was cleared. It may need to add some delay between clearing SBK and re-enabling CTS to ensure that the SBK latch are completely cleared. But we found that during the delay period before CTS is enabled, there is still a risk that Bluetooth data in TX FIFO may be sent out during this period because of break off and CTS disabled(even if BT sets CTS line deasserted, data is still sent to BT). Due to this risk, we have to drop the CTS-disabling workaround for SBK bugs, use TXINV seems to be a better way to replace SBK feature and avoid above risk. Also need to disable the transmitter to prevent any data from being sent out during break, then invert the TX line to send break. Then disable the TXINV when turn off break and re-enable transmitter. Fixes: c4c81db5cf8b ("tty: serial: fsl_lpuart: disable the CTS when send break signal") Cc: stable Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20230519094751.28948-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 44 +++++++++++++++++---------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index c91916e13648..7486a2b8556c 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -1495,34 +1495,36 @@ static void lpuart_break_ctl(struct uart_port *port, int break_state) static void lpuart32_break_ctl(struct uart_port *port, int break_state) { - unsigned long temp, modem; - struct tty_struct *tty; - unsigned int cflag = 0; + unsigned long temp; - tty = tty_port_tty_get(&port->state->port); - if (tty) { - cflag = tty->termios.c_cflag; - tty_kref_put(tty); - } - - temp = lpuart32_read(port, UARTCTRL) & ~UARTCTRL_SBK; - modem = lpuart32_read(port, UARTMODIR); + temp = lpuart32_read(port, UARTCTRL); + /* + * LPUART IP now has two known bugs, one is CTS has higher priority than the + * break signal, which causes the break signal sending through UARTCTRL_SBK + * may impacted by the CTS input if the HW flow control is enabled. It + * exists on all platforms we support in this driver. + * Another bug is i.MX8QM LPUART may have an additional break character + * being sent after SBK was cleared. + * To avoid above two bugs, we use Transmit Data Inversion function to send + * the break signal instead of UARTCTRL_SBK. + */ if (break_state != 0) { - temp |= UARTCTRL_SBK; /* - * LPUART CTS has higher priority than SBK, need to disable CTS before - * asserting SBK to avoid any interference if flow control is enabled. + * Disable the transmitter to prevent any data from being sent out + * during break, then invert the TX line to send break. */ - if (cflag & CRTSCTS && modem & UARTMODIR_TXCTSE) - lpuart32_write(port, modem & ~UARTMODIR_TXCTSE, UARTMODIR); + temp &= ~UARTCTRL_TE; + lpuart32_write(port, temp, UARTCTRL); + temp |= UARTCTRL_TXINV; + lpuart32_write(port, temp, UARTCTRL); } else { - /* Re-enable the CTS when break off. */ - if (cflag & CRTSCTS && !(modem & UARTMODIR_TXCTSE)) - lpuart32_write(port, modem | UARTMODIR_TXCTSE, UARTMODIR); + /* Disable the TXINV to turn off break and re-enable transmitter. */ + temp &= ~UARTCTRL_TXINV; + lpuart32_write(port, temp, UARTCTRL); + temp |= UARTCTRL_TE; + lpuart32_write(port, temp, UARTCTRL); } - - lpuart32_write(port, temp, UARTCTRL); } static void lpuart_setup_watermark(struct lpuart_port *sport) From 192d43e91e0fdbd51b64ba36e773cbdf38dda505 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Tue, 23 May 2023 10:59:01 +0200 Subject: [PATCH 256/934] soc: fsl: cpm1: Fix TSA and QMC dependencies in case of COMPILE_TEST In order to compile tsa.c and qmc.c, CONFIG_CPM must be set. Without this dependency, the linker fails with some missing symbols for COMPILE_TEST configurations that need QMC without enabling CPM. Signed-off-by: Herve Codina Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202305160221.9XgweObz-lkp@intel.com/ Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20230523085902.75837-2-herve.codina@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/soc/fsl/qe/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/fsl/qe/Kconfig b/drivers/soc/fsl/qe/Kconfig index 7268c2fbcbc1..e0d096607fef 100644 --- a/drivers/soc/fsl/qe/Kconfig +++ b/drivers/soc/fsl/qe/Kconfig @@ -36,7 +36,7 @@ config UCC config CPM_TSA tristate "CPM TSA support" depends on OF && HAS_IOMEM - depends on CPM1 || COMPILE_TEST + depends on CPM1 || (CPM && COMPILE_TEST) help Freescale CPM Time Slot Assigner (TSA) controller. @@ -47,7 +47,7 @@ config CPM_TSA config CPM_QMC tristate "CPM QMC support" depends on OF && HAS_IOMEM - depends on CPM1 || (FSL_SOC && COMPILE_TEST) + depends on CPM1 || (FSL_SOC && CPM && COMPILE_TEST) depends on CPM_TSA help Freescale CPM QUICC Multichannel Controller From 7183c37fd53eee1e795206e625da12a5d7ec1e1a Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Tue, 23 May 2023 10:59:02 +0200 Subject: [PATCH 257/934] serial: cpm_uart: Fix a COMPILE_TEST dependency In a COMPILE_TEST configuration, the cpm_uart driver uses symbols from the cpm_uart_cpm2.c file. This file is compiled only when CONFIG_CPM2 is set. Without this dependency, the linker fails with some missing symbols for COMPILE_TEST configuration that needs SERIAL_CPM without enabling CPM2. This lead to: depends on CPM2 || CPM1 || (PPC32 && CPM2 && COMPILE_TEST) This dependency does not make sense anymore and can be simplified removing all the COMPILE_TEST part. Signed-off-by: Herve Codina Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202305160221.9XgweObz-lkp@intel.com/ Fixes: e3e7b13bffae ("serial: allow COMPILE_TEST for some drivers") Cc: stable Link: https://lore.kernel.org/r/20230523085902.75837-3-herve.codina@bootlin.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 2 +- drivers/tty/serial/cpm_uart/cpm_uart.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 398e5aac2e77..3e3fb377d90d 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -762,7 +762,7 @@ config SERIAL_PMACZILOG_CONSOLE config SERIAL_CPM tristate "CPM SCC/SMC serial port support" - depends on CPM2 || CPM1 || (PPC32 && COMPILE_TEST) + depends on CPM2 || CPM1 select SERIAL_CORE help This driver supports the SCC and SMC serial ports on Motorola diff --git a/drivers/tty/serial/cpm_uart/cpm_uart.h b/drivers/tty/serial/cpm_uart/cpm_uart.h index 0577618e78c0..46c03ed71c31 100644 --- a/drivers/tty/serial/cpm_uart/cpm_uart.h +++ b/drivers/tty/serial/cpm_uart/cpm_uart.h @@ -19,8 +19,6 @@ struct gpio_desc; #include "cpm_uart_cpm2.h" #elif defined(CONFIG_CPM1) #include "cpm_uart_cpm1.h" -#elif defined(CONFIG_COMPILE_TEST) -#include "cpm_uart_cpm2.h" #endif #define SERIAL_CPM_MAJOR 204 From b6b5c6426efe27cbd954409a50604d99c79bd42b Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 25 May 2023 16:59:55 +0530 Subject: [PATCH 258/934] ASoC: amd: ps: fix for acp_lock access in pdm driver Sending the mutex address(acp_lock) as platform data during ACP PDM platform driver register sequence, its creating copy of the platform data. Referencing this platform data in ACP PDM driver results incorrect reference to the common lock usage. Instead of directly passing the lock address as platform data, retrieve it from parent driver data structure and use the same lock reference in ACP PDM driver. Fixes: 45aa83cb9388 ("ASoC: amd: ps: use acp_lock to protect common registers in pdm driver") Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20230525113000.1290758-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/ps/pci-ps.c | 3 +-- sound/soc/amd/ps/ps-pdm-dma.c | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c index afddb9a77ba4..b1337b96ea8d 100644 --- a/sound/soc/amd/ps/pci-ps.c +++ b/sound/soc/amd/ps/pci-ps.c @@ -211,8 +211,7 @@ static int create_acp63_platform_devs(struct pci_dev *pci, struct acp63_dev_data case ACP63_PDM_DEV_MASK: adata->pdm_dev_index = 0; acp63_fill_platform_dev_info(&pdevinfo[0], parent, NULL, "acp_ps_pdm_dma", - 0, adata->res, 1, &adata->acp_lock, - sizeof(adata->acp_lock)); + 0, adata->res, 1, NULL, 0); acp63_fill_platform_dev_info(&pdevinfo[1], parent, NULL, "dmic-codec", 0, NULL, 0, NULL, 0); acp63_fill_platform_dev_info(&pdevinfo[2], parent, NULL, "acp_ps_mach", diff --git a/sound/soc/amd/ps/ps-pdm-dma.c b/sound/soc/amd/ps/ps-pdm-dma.c index 46b91327168f..3a83dc178e7d 100644 --- a/sound/soc/amd/ps/ps-pdm-dma.c +++ b/sound/soc/amd/ps/ps-pdm-dma.c @@ -361,12 +361,12 @@ static int acp63_pdm_audio_probe(struct platform_device *pdev) { struct resource *res; struct pdm_dev_data *adata; + struct acp63_dev_data *acp_data; + struct device *parent; int status; - if (!pdev->dev.platform_data) { - dev_err(&pdev->dev, "platform_data not retrieved\n"); - return -ENODEV; - } + parent = pdev->dev.parent; + acp_data = dev_get_drvdata(parent); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(&pdev->dev, "IORESOURCE_MEM FAILED\n"); @@ -382,7 +382,7 @@ static int acp63_pdm_audio_probe(struct platform_device *pdev) return -ENOMEM; adata->capture_stream = NULL; - adata->acp_lock = pdev->dev.platform_data; + adata->acp_lock = &acp_data->acp_lock; dev_set_drvdata(&pdev->dev, adata); status = devm_snd_soc_register_component(&pdev->dev, &acp63_pdm_component, From e384dba03e3294ce7ea69e4da558e9bf8f0e8946 Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Mon, 29 May 2023 15:19:11 -0300 Subject: [PATCH 259/934] ASoC: nau8824: Add quirk to active-high jack-detect Add entries for Positivo laptops: CW14Q01P, K1424G, N14ZP74G to the DMI table, so that active-high jack-detect will work properly on these laptops. Signed-off-by: Edson Juliano Drosdeck Link: https://lore.kernel.org/r/20230529181911.632851-1-edson.drosdeck@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/nau8824.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index 4f19fd9b65d1..5a4db8944d06 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -1903,6 +1903,30 @@ static const struct dmi_system_id nau8824_quirk_table[] = { }, .driver_data = (void *)(NAU8824_MONO_SPEAKER), }, + { + /* Positivo CW14Q01P */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"), + DMI_MATCH(DMI_BOARD_NAME, "CW14Q01P"), + }, + .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH), + }, + { + /* Positivo K1424G */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"), + DMI_MATCH(DMI_BOARD_NAME, "K1424G"), + }, + .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH), + }, + { + /* Positivo N14ZP74G */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"), + DMI_MATCH(DMI_BOARD_NAME, "N14ZP74G"), + }, + .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH), + }, {} }; From 0c331fd1dccfba657129380ee084b95c1cedfbef Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 18 May 2023 15:04:25 +0200 Subject: [PATCH 260/934] spi: qup: Request DMA before enabling clocks It is usually better to request all necessary resources (clocks, regulators, ...) before starting to make use of them. That way they do not change state in case one of the resources is not available yet and probe deferral (-EPROBE_DEFER) is necessary. This is particularly important for DMA channels and IOMMUs which are not enforced by fw_devlink yet (unless you use fw_devlink.strict=1). spi-qup does this in the wrong order, the clocks are enabled and disabled again when the DMA channels are not available yet. This causes issues in some cases: On most SoCs one of the SPI QUP clocks is shared with the UART controller. When using earlycon UART is actively used during boot but might not have probed yet, usually for the same reason (waiting for the DMA controller). In this case, the brief enable/disable cycle ends up gating the clock and further UART console output will halt the system completely. Avoid this by requesting the DMA channels before changing the clock state. Fixes: 612762e82ae6 ("spi: qup: Add DMA capabilities") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20230518-spi-qup-clk-defer-v1-1-f49fc9ca4e02@gerhold.net Signed-off-by: Mark Brown --- drivers/spi/spi-qup.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index 944ef6b42bce..00e5e88e72c4 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -1028,23 +1028,8 @@ static int spi_qup_probe(struct platform_device *pdev) return -ENXIO; } - ret = clk_prepare_enable(cclk); - if (ret) { - dev_err(dev, "cannot enable core clock\n"); - return ret; - } - - ret = clk_prepare_enable(iclk); - if (ret) { - clk_disable_unprepare(cclk); - dev_err(dev, "cannot enable iface clock\n"); - return ret; - } - master = spi_alloc_master(dev, sizeof(struct spi_qup)); if (!master) { - clk_disable_unprepare(cclk); - clk_disable_unprepare(iclk); dev_err(dev, "cannot allocate master\n"); return -ENOMEM; } @@ -1092,6 +1077,19 @@ static int spi_qup_probe(struct platform_device *pdev) spin_lock_init(&controller->lock); init_completion(&controller->done); + ret = clk_prepare_enable(cclk); + if (ret) { + dev_err(dev, "cannot enable core clock\n"); + goto error_dma; + } + + ret = clk_prepare_enable(iclk); + if (ret) { + clk_disable_unprepare(cclk); + dev_err(dev, "cannot enable iface clock\n"); + goto error_dma; + } + iomode = readl_relaxed(base + QUP_IO_M_MODES); size = QUP_IO_M_OUTPUT_BLOCK_SIZE(iomode); @@ -1121,7 +1119,7 @@ static int spi_qup_probe(struct platform_device *pdev) ret = spi_qup_set_state(controller, QUP_STATE_RESET); if (ret) { dev_err(dev, "cannot set RESET state\n"); - goto error_dma; + goto error_clk; } writel_relaxed(0, base + QUP_OPERATIONAL); @@ -1145,7 +1143,7 @@ static int spi_qup_probe(struct platform_device *pdev) ret = devm_request_irq(dev, irq, spi_qup_qup_irq, IRQF_TRIGGER_HIGH, pdev->name, controller); if (ret) - goto error_dma; + goto error_clk; pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC); pm_runtime_use_autosuspend(dev); @@ -1160,11 +1158,12 @@ static int spi_qup_probe(struct platform_device *pdev) disable_pm: pm_runtime_disable(&pdev->dev); +error_clk: + clk_disable_unprepare(cclk); + clk_disable_unprepare(iclk); error_dma: spi_qup_release_dma(master); error: - clk_disable_unprepare(cclk); - clk_disable_unprepare(iclk); spi_master_put(master); return ret; } From fcfe84236ec0974fe92f0578d1d58ed805c4b70f Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 30 May 2023 09:59:26 +0300 Subject: [PATCH 261/934] usb: typec: tps6598x: Fix broken polling mode after system suspend/resume During system resume we need to resume the polling workqueue if client->irq is not set else polling will no longer work. Fixes: 0d6a119cecd7 ("usb: typec: tps6598x: Add support for polling interrupts status") Signed-off-by: Roger Quadros Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230530065926.6161-1-rogerq@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 438cc40660a1..603dbd44deba 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -920,7 +920,7 @@ static int __maybe_unused tps6598x_resume(struct device *dev) enable_irq(client->irq); } - if (client->irq) + if (!client->irq) queue_delayed_work(system_power_efficient_wq, &tps->wq_poll, msecs_to_jiffies(POLL_INTERVAL)); From 31a5978243d24d77be4bacca56c78a0fbc43b00d Mon Sep 17 00:00:00 2001 From: "min15.li" Date: Fri, 26 May 2023 17:06:56 +0000 Subject: [PATCH 262/934] nvme: fix miss command type check In the function nvme_passthru_end(), only the value of the command opcode is checked, without checking the command type (IO command or Admin command). When we send a Dataset Management command (The opcode of the Dataset Management command is the same as the Set Feature command), kernel thinks it is a set feature command, then sets the controller's keep alive interval, and calls nvme_keep_alive_work(). Signed-off-by: min15.li Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 +++- drivers/nvme/host/ioctl.c | 2 +- drivers/nvme/host/nvme.h | 2 +- drivers/nvme/target/passthru.c | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1f0cbb77b249..c6cba1cc101c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1115,7 +1115,7 @@ u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) } EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU); -void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, +void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, struct nvme_command *cmd, int status) { if (effects & NVME_CMD_EFFECTS_CSE_MASK) { @@ -1132,6 +1132,8 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, nvme_queue_scan(ctrl); flush_work(&ctrl->scan_work); } + if (ns) + return; switch (cmd->common.opcode) { case nvme_admin_set_features: diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 81c5c9e38477..f15e7330b75a 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -254,7 +254,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, blk_mq_free_request(req); if (effects) - nvme_passthru_end(ctrl, effects, cmd, ret); + nvme_passthru_end(ctrl, ns, effects, cmd, ret); return ret; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a2d4f59e0535..2dd52739236e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1077,7 +1077,7 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); int nvme_execute_rq(struct request *rq, bool at_head); -void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, +void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, struct nvme_command *cmd, int status); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 511c980d538d..71a9c1cc57f5 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -243,7 +243,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) blk_mq_free_request(rq); if (effects) - nvme_passthru_end(ctrl, effects, req->cmd, status); + nvme_passthru_end(ctrl, ns, effects, req->cmd, status); } static enum rq_end_io_ret nvmet_passthru_req_done(struct request *rq, From ea4d453b9ec9ea279c39744cd0ecb47ef48ede35 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Thu, 25 May 2023 12:22:02 -0600 Subject: [PATCH 263/934] nvme: double KA polling frequency to avoid KATO with TBKAS on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With TBKAS on, the completion of one command can defer sending a keep alive for up to twice the delay between successive runs of nvme_keep_alive_work. The current delay of KATO / 2 thus makes it possible for one command to defer sending a keep alive for up to KATO, which can result in the controller detecting a KATO. The following trace demonstrates the issue, taking KATO = 8 for simplicity: 1. t = 0: run nvme_keep_alive_work, no keep-alive sent 2. t = ε: I/O completion seen, set comp_seen = true 3. t = 4: run nvme_keep_alive_work, see comp_seen == true, skip sending keep-alive, set comp_seen = false 4. t = 8: run nvme_keep_alive_work, see comp_seen == false, send a keep-alive command. Here, there is a delay of 8 - ε between receiving a command completion and sending the next command. With ε small, the controller is likely to detect a keep alive timeout. Fix this by running nvme_keep_alive_work with a delay of KATO / 4 whenever TBKAS is on. Going through the above trace now gives us a worst-case delay of 4 - ε, which is in line with the recommendation of sending a command every KATO / 2 in the NVMe specification. Reported-by: Costa Sapuntzakis Reported-by: Randy Jennings Signed-off-by: Uday Shankar Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c6cba1cc101c..6417c7928fa3 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1163,9 +1163,25 @@ EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU); * The host should send Keep Alive commands at half of the Keep Alive Timeout * accounting for transport roundtrip times [..]. */ +static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl) +{ + unsigned long delay = ctrl->kato * HZ / 2; + + /* + * When using Traffic Based Keep Alive, we need to run + * nvme_keep_alive_work at twice the normal frequency, as one + * command completion can postpone sending a keep alive command + * by up to twice the delay between runs. + */ + if (ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) + delay /= 2; + return delay; +} + static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) { - queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2); + queue_delayed_work(nvme_wq, &ctrl->ka_work, + nvme_keep_alive_work_period(ctrl)); } static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, From 774a9636514764ddc0d072ae0d1d1c01a47e6ddd Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Thu, 25 May 2023 12:22:03 -0600 Subject: [PATCH 264/934] nvme: check IO start time when deciding to defer KA When a command completes, we set a flag which will skip sending a keep alive at the next run of nvme_keep_alive_work when TBKAS is on. However, if the command was submitted long ago, it's possible that the controller may have also restarted its keep alive timer (as a result of receiving the command) long ago. The following trace demonstrates the issue, assuming TBKAS is on and KATO = 8 for simplicity: 1. t = 0: submit I/O commands A, B, C, D, E 2. t = 0.5: commands A, B, C, D, E reach controller, restart its keep alive timer 3. t = 1: A completes 4. t = 2: run nvme_keep_alive_work, see recent completion, do nothing 5. t = 3: B completes 6. t = 4: run nvme_keep_alive_work, see recent completion, do nothing 7. t = 5: C completes 8. t = 6: run nvme_keep_alive_work, see recent completion, do nothing 9. t = 7: D completes 10. t = 8: run nvme_keep_alive_work, see recent completion, do nothing 11. t = 9: E completes At this point, 8.5 seconds have passed without restarting the controller's keep alive timer, so the controller will detect a keep alive timeout. Fix this by checking the IO start time when deciding to defer sending a keep alive command. Only set comp_seen if the command started after the most recent run of nvme_keep_alive_work. With this change, the completions of B, C, and D will not set comp_seen and the run of nvme_keep_alive_work at t = 4 will send a keep alive. Reported-by: Costa Sapuntzakis Reported-by: Randy Jennings Signed-off-by: Uday Shankar Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 14 +++++++++++++- drivers/nvme/host/nvme.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6417c7928fa3..dbfca1b925f8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -397,7 +397,16 @@ void nvme_complete_rq(struct request *req) trace_nvme_complete_rq(req); nvme_cleanup_cmd(req); - if (ctrl->kas) + /* + * Completions of long-running commands should not be able to + * defer sending of periodic keep alives, since the controller + * may have completed processing such commands a long time ago + * (arbitrarily close to command submission time). + * req->deadline - req->timeout is the command submission time + * in jiffies. + */ + if (ctrl->kas && + req->deadline - req->timeout >= ctrl->ka_last_check_time) ctrl->comp_seen = true; switch (nvme_decide_disposition(req)) { @@ -1200,6 +1209,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, return RQ_END_IO_NONE; } + ctrl->ka_last_check_time = jiffies; ctrl->comp_seen = false; spin_lock_irqsave(&ctrl->lock, flags); if (ctrl->state == NVME_CTRL_LIVE || @@ -1218,6 +1228,8 @@ static void nvme_keep_alive_work(struct work_struct *work) bool comp_seen = ctrl->comp_seen; struct request *rq; + ctrl->ka_last_check_time = jiffies; + if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) { dev_dbg(ctrl->device, "reschedule traffic based keep-alive timer\n"); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2dd52739236e..8657811f8b88 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -328,6 +328,7 @@ struct nvme_ctrl { struct delayed_work ka_work; struct delayed_work failfast_work; struct nvme_command ka_cmd; + unsigned long ka_last_check_time; struct work_struct fw_act_work; unsigned long events; From db3e33dd8bd956f165436afdbdbf1c653fb3c8e6 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sun, 28 May 2023 16:00:41 -0700 Subject: [PATCH 265/934] module: fix module load for ia64 Frank reported boot regression in ia64 as: ELILO v3.16 for EFI/IA-64 .. Uncompressing Linux... done Loading file AC100221.initrd.img...done [ 0.000000] Linux version 6.4.0-rc3 (root@x4270) (ia64-linux-gcc (GCC) 12.2.0, GNU ld (GNU Binutils) 2.39) #1 SMP Thu May 25 15:52:20 CEST 2023 [ 0.000000] efi: EFI v1.1 by HP [ 0.000000] efi: SALsystab=0x3ee7a000 ACPI 2.0=0x3fe2a000 ESI=0x3ee7b000 SMBIOS=0x3ee7c000 HCDP=0x3fe28000 [ 0.000000] PCDP: v3 at 0x3fe28000 [ 0.000000] earlycon: uart8250 at MMIO 0x00000000f4050000 (options '9600n8') [ 0.000000] printk: bootconsole [uart8250] enabled [ 0.000000] ACPI: Early table checksum verification disabled [ 0.000000] ACPI: RSDP 0x000000003FE2A000 000028 (v02 HP ) [ 0.000000] ACPI: XSDT 0x000000003FE2A02C 0000CC (v01 HP rx2620 00000000 HP 00000000) [...] [ 3.793350] Run /init as init process Loading, please wait... Starting systemd-udevd version 252.6-1 [ 3.951100] ------------[ cut here ]------------ [ 3.951100] WARNING: CPU: 6 PID: 140 at kernel/module/main.c:1547 __layout_sections+0x370/0x3c0 [ 3.949512] Unable to handle kernel paging request at virtual address 1000000000000000 [ 3.951100] Modules linked in: [ 3.951100] CPU: 6 PID: 140 Comm: (udev-worker) Not tainted 6.4.0-rc3 #1 [ 3.956161] (udev-worker)[142]: Oops 11003706212352 [1] [ 3.951774] Hardware name: hp server rx2620 , BIOS 04.29 11/30/2007 [ 3.951774] [ 3.951774] Call Trace: [ 3.958339] Unable to handle kernel paging request at virtual address 1000000000000000 [ 3.956161] Modules linked in: [ 3.951774] [] show_stack.part.0+0x30/0x60 [ 3.951774] sp=e000000183a67b20 bsp=e000000183a61628 [ 3.956161] [ 3.956161] which bisect to module_memory change [1]. Debug showed that ia64 uses some special sections: __layout_sections: section .got (sh_flags 10000002) matched to MOD_INVALID __layout_sections: section .sdata (sh_flags 10000003) matched to MOD_INVALID __layout_sections: section .sbss (sh_flags 10000003) matched to MOD_INVALID All these sections are loaded to module core memory before [1]. Fix ia64 boot by loading these sections to MOD_DATA (core rw data). [1] commit ac3b43283923 ("module: replace module_layout with module_memory") Fixes: ac3b43283923 ("module: replace module_layout with module_memory") Reported-by: Frank Scheiner Closes: https://lists.debian.org/debian-ia64/2023/05/msg00010.html Closes: https://marc.info/?l=linux-ia64&m=168509859125505 Cc: Linus Torvalds Signed-off-by: Song Liu Tested-by: John Paul Adrian Glaubitz Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/module/main.c b/kernel/module/main.c index 044aa2c9e3cb..4e2cf784cf8c 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1521,14 +1521,14 @@ static void __layout_sections(struct module *mod, struct load_info *info, bool i MOD_RODATA, MOD_RO_AFTER_INIT, MOD_DATA, - MOD_INVALID, /* This is needed to match the masks array */ + MOD_DATA, }; static const int init_m_to_mem_type[] = { MOD_INIT_TEXT, MOD_INIT_RODATA, MOD_INVALID, MOD_INIT_DATA, - MOD_INVALID, /* This is needed to match the masks array */ + MOD_INIT_DATA, }; for (m = 0; m < ARRAY_SIZE(masks); ++m) { From d00394e1395be9e27f58d58a61642a2a5df10405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 19 Mar 2023 00:53:38 +0100 Subject: [PATCH 266/934] fbdev: au1100fb: Drop if with an always false condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver core never calls a remove callback with the platform_device pointer being NULL. So the check for this condition can just be dropped. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/au1100fb.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c index 519313b8bb00..cd27e3b81bb8 100644 --- a/drivers/video/fbdev/au1100fb.c +++ b/drivers/video/fbdev/au1100fb.c @@ -524,9 +524,6 @@ int au1100fb_drv_remove(struct platform_device *dev) { struct au1100fb_device *fbdev = NULL; - if (!dev) - return -ENODEV; - fbdev = platform_get_drvdata(dev); #if !defined(CONFIG_FRAMEBUFFER_CONSOLE) && defined(CONFIG_LOGO) From 4369e38a6f1978b48f9200d2a69ce4658db76047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 19 Mar 2023 00:53:39 +0100 Subject: [PATCH 267/934] fbdev: arcfb: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/arcfb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/arcfb.c b/drivers/video/fbdev/arcfb.c index 024d0ee4f04f..08d15e408413 100644 --- a/drivers/video/fbdev/arcfb.c +++ b/drivers/video/fbdev/arcfb.c @@ -590,7 +590,7 @@ err_fb_alloc: return retval; } -static int arcfb_remove(struct platform_device *dev) +static void arcfb_remove(struct platform_device *dev) { struct fb_info *info = platform_get_drvdata(dev); @@ -601,12 +601,11 @@ static int arcfb_remove(struct platform_device *dev) vfree((void __force *)info->screen_base); framebuffer_release(info); } - return 0; } static struct platform_driver arcfb_driver = { .probe = arcfb_probe, - .remove = arcfb_remove, + .remove_new = arcfb_remove, .driver = { .name = "arcfb", }, From bf76544d4515ac543cb9e5c666eebf3866e662a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 19 Mar 2023 00:53:40 +0100 Subject: [PATCH 268/934] fbdev: au1100fb: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/au1100fb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c index cd27e3b81bb8..648d6cac86e8 100644 --- a/drivers/video/fbdev/au1100fb.c +++ b/drivers/video/fbdev/au1100fb.c @@ -520,7 +520,7 @@ failed: return -ENODEV; } -int au1100fb_drv_remove(struct platform_device *dev) +void au1100fb_drv_remove(struct platform_device *dev) { struct au1100fb_device *fbdev = NULL; @@ -540,8 +540,6 @@ int au1100fb_drv_remove(struct platform_device *dev) clk_disable_unprepare(fbdev->lcdclk); clk_put(fbdev->lcdclk); } - - return 0; } #ifdef CONFIG_PM @@ -590,9 +588,9 @@ static struct platform_driver au1100fb_driver = { .name = "au1100-lcd", }, .probe = au1100fb_drv_probe, - .remove = au1100fb_drv_remove, + .remove_new = au1100fb_drv_remove, .suspend = au1100fb_drv_suspend, - .resume = au1100fb_drv_resume, + .resume = au1100fb_drv_resume, }; module_platform_driver(au1100fb_driver); From 4b88b6e1c4ec832b02a65b51489692b58d2c6f84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 19 Mar 2023 00:53:41 +0100 Subject: [PATCH 269/934] fbdev: au1200fb: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/au1200fb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c index b6b22fa4a8a0..aed88ce45bf0 100644 --- a/drivers/video/fbdev/au1200fb.c +++ b/drivers/video/fbdev/au1200fb.c @@ -1765,7 +1765,7 @@ failed: return ret; } -static int au1200fb_drv_remove(struct platform_device *dev) +static void au1200fb_drv_remove(struct platform_device *dev) { struct au1200fb_platdata *pd = platform_get_drvdata(dev); struct fb_info *fbi; @@ -1788,8 +1788,6 @@ static int au1200fb_drv_remove(struct platform_device *dev) } free_irq(platform_get_irq(dev, 0), (void *)dev); - - return 0; } #ifdef CONFIG_PM @@ -1840,7 +1838,7 @@ static struct platform_driver au1200fb_driver = { .pm = AU1200FB_PMOPS, }, .probe = au1200fb_drv_probe, - .remove = au1200fb_drv_remove, + .remove_new = au1200fb_drv_remove, }; module_platform_driver(au1200fb_driver); From a3ce8c8ffb6fdee842b703da78bd81eac0f16d71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 19 Mar 2023 00:53:42 +0100 Subject: [PATCH 270/934] fbdev: broadsheetfb: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/broadsheetfb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/broadsheetfb.c b/drivers/video/fbdev/broadsheetfb.c index 55e62dd96f9b..b518cacbf7cd 100644 --- a/drivers/video/fbdev/broadsheetfb.c +++ b/drivers/video/fbdev/broadsheetfb.c @@ -1193,7 +1193,7 @@ err: } -static int broadsheetfb_remove(struct platform_device *dev) +static void broadsheetfb_remove(struct platform_device *dev) { struct fb_info *info = platform_get_drvdata(dev); @@ -1209,12 +1209,11 @@ static int broadsheetfb_remove(struct platform_device *dev) module_put(par->board->owner); framebuffer_release(info); } - return 0; } static struct platform_driver broadsheetfb_driver = { .probe = broadsheetfb_probe, - .remove = broadsheetfb_remove, + .remove_new = broadsheetfb_remove, .driver = { .name = "broadsheetfb", }, From d19663edc91de65ae85eea9902addc9d04b0ceb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 19 Mar 2023 00:53:43 +0100 Subject: [PATCH 271/934] fbdev: bw2: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Signed-off-by: Uwe Kleine-König Signed-off-by: Helge Deller --- drivers/video/fbdev/bw2.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/bw2.c b/drivers/video/fbdev/bw2.c index 9cbadcd18b25..025d663dc6fd 100644 --- a/drivers/video/fbdev/bw2.c +++ b/drivers/video/fbdev/bw2.c @@ -352,7 +352,7 @@ out_err: return err; } -static int bw2_remove(struct platform_device *op) +static void bw2_remove(struct platform_device *op) { struct fb_info *info = dev_get_drvdata(&op->dev); struct bw2_par *par = info->par; @@ -363,8 +363,6 @@ static int bw2_remove(struct platform_device *op) of_iounmap(&op->resource[0], info->screen_base, info->fix.smem_len); framebuffer_release(info); - - return 0; } static const struct of_device_id bw2_match[] = { @@ -381,7 +379,7 @@ static struct platform_driver bw2_driver = { .of_match_table = bw2_match, }, .probe = bw2_probe, - .remove = bw2_remove, + .remove_new = bw2_remove, }; static int __init bw2_init(void) From b928dfdcb27d8fa59917b794cfba53052a2f050f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 23 May 2023 23:49:49 -0400 Subject: [PATCH 272/934] ext4: set lockdep subclass for the ea_inode in ext4_xattr_inode_cache_find() If the ea_inode has been pushed out of the inode cache while there is still a reference in the mb_cache, the lockdep subclass will not be set on the inode, which can lead to some lockdep false positives. Fixes: 33d201e0277b ("ext4: fix lockdep warning about recursive inode locking") Cc: stable@kernel.org Reported-by: syzbot+d4b971e744b1f5439336@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Link: https://lore.kernel.org/r/20230524034951.779531-3-tytso@mit.edu Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index a27208129a80..ff7ab63c5b4f 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1539,6 +1539,7 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, EXT4_IGET_EA_INODE); if (IS_ERR(ea_inode)) goto next_entry; + ext4_xattr_inode_set_class(ea_inode); if (i_size_read(ea_inode) == value_len && !ext4_xattr_inode_read(ea_inode, ea_data, value_len) && !ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data, From 2bc7e7c1a3bc9bd0cbf0f71006f6fe7ef24a00c2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 23 May 2023 23:49:50 -0400 Subject: [PATCH 273/934] ext4: disallow ea_inodes with extended attributes An ea_inode stores the value of an extended attribute; it can not have extended attributes itself, or this will cause recursive nightmares. Add a check in ext4_iget() to make sure this is the case. Cc: stable@kernel.org Reported-by: syzbot+e44749b6ba4d0434cd47@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Link: https://lore.kernel.org/r/20230524034951.779531-4-tytso@mit.edu Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 258f3cbed347..02de439bf1f0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4647,6 +4647,9 @@ static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags) if (flags & EXT4_IGET_EA_INODE) { if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) return "missing EA_INODE flag"; + if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) || + EXT4_I(inode)->i_file_acl) + return "ea_inode with extended attributes"; } else { if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) return "unexpected EA_INODE flag"; From aff3bea95388299eec63440389b4545c8041b357 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 23 May 2023 23:49:51 -0400 Subject: [PATCH 274/934] ext4: add lockdep annotations for i_data_sem for ea_inode's Treat i_data_sem for ea_inodes as being in their own lockdep class to avoid lockdep complaints about ext4_setattr's use of inode_lock() on normal inodes potentially causing lock ordering with i_data_sem on ea_inodes in ext4_xattr_inode_write(). However, ea_inodes will be operated on by ext4_setattr(), so this isn't a problem. Cc: stable@kernel.org Link: https://syzkaller.appspot.com/bug?extid=298c5d8fb4a128bc27b0 Reported-by: syzbot+298c5d8fb4a128bc27b0@syzkaller.appspotmail.com Signed-off-by: Theodore Ts'o Link: https://lore.kernel.org/r/20230524034951.779531-5-tytso@mit.edu Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 ++ fs/ext4/xattr.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9525c52b78dc..8104a21b001a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -918,11 +918,13 @@ do { \ * where the second inode has larger inode number * than the first * I_DATA_SEM_QUOTA - Used for quota inodes only + * I_DATA_SEM_EA - Used for ea_inodes only */ enum { I_DATA_SEM_NORMAL = 0, I_DATA_SEM_OTHER, I_DATA_SEM_QUOTA, + I_DATA_SEM_EA }; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index ff7ab63c5b4f..13d7f17a9c8c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -121,7 +121,11 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array, #ifdef CONFIG_LOCKDEP void ext4_xattr_inode_set_class(struct inode *ea_inode) { + struct ext4_inode_info *ei = EXT4_I(ea_inode); + lockdep_set_subclass(&ea_inode->i_rwsem, 1); + (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */ + lockdep_set_subclass(&ei->i_data_sem, I_DATA_SEM_EA); } #endif From 1077b2d53ef53629c14106aecf633bebd286c04c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 24 May 2023 12:44:53 +0200 Subject: [PATCH 275/934] ext4: fix fsync for non-directories Commit e360c6ed7274 ("ext4: Drop special handling of journalled data from ext4_sync_file()") simplified ext4_sync_file() by dropping special handling of journalled data mode as it was not needed anymore. However that branch was also used for directories and symlinks and since the fastcommit code does not track metadata changes to non-regular files, the change has caused e.g. fsync(2) on directories to not commit transaction as it should. Fix the problem by adding handling for non-regular files. Fixes: e360c6ed7274 ("ext4: Drop special handling of journalled data from ext4_sync_file()") Reported-by: Eric Whitney Link: https://lore.kernel.org/all/ZFqO3xVnmhL7zv1x@debian-BULLSEYE-live-builder-AMD64 Signed-off-by: Jan Kara Tested-by: Eric Whitney Link: https://lore.kernel.org/r/20230524104453.8734-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/fsync.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index f65fdb27ce14..2a143209aa0c 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -108,6 +108,13 @@ static int ext4_fsync_journal(struct inode *inode, bool datasync, journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; + /* + * Fastcommit does not really support fsync on directories or other + * special files. Force a full commit. + */ + if (!S_ISREG(inode->i_mode)) + return ext4_force_commit(inode->i_sb); + if (journal->j_flags & JBD2_BARRIER && !jbd2_trans_will_send_data_barrier(journal, commit_tid)) *needs_barrier = true; From eb1f822c76beeaa76ab8b6737ab9dc9f9798408c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 26 May 2023 23:57:29 -0400 Subject: [PATCH 276/934] ext4: enable the lazy init thread when remounting read/write In commit a44be64bbecb ("ext4: don't clear SB_RDONLY when remounting r/w until quota is re-enabled") we defer clearing tyhe SB_RDONLY flag in struct super. However, we didn't defer when we checked sb_rdonly() to determine the lazy itable init thread should be enabled, with the next result that the lazy inode table initialization would not be properly started. This can cause generic/231 to fail in ext4's nojournal mode. Fix this by moving when we decide to start or stop the lazy itable init thread to after we clear the SB_RDONLY flag when we are remounting the file system read/write. Fixes a44be64bbecb ("ext4: don't clear SB_RDONLY when remounting r/w until...") Signed-off-by: Theodore Ts'o Link: https://lore.kernel.org/r/20230527035729.1001605-1-tytso@mit.edu Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9680fe753e59..56a5d1c469fc 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6588,18 +6588,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) } } - /* - * Reinitialize lazy itable initialization thread based on - * current settings - */ - if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE)) - ext4_unregister_li_request(sb); - else { - ext4_group_t first_not_zeroed; - first_not_zeroed = ext4_has_uninit_itable(sb); - ext4_register_li_request(sb, first_not_zeroed); - } - /* * Handle creation of system zone data early because it can fail. * Releasing of existing data is done when we are sure remount will @@ -6637,6 +6625,18 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) if (enable_rw) sb->s_flags &= ~SB_RDONLY; + /* + * Reinitialize lazy itable initialization thread based on + * current settings + */ + if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE)) + ext4_unregister_li_request(sb); + else { + ext4_group_t first_not_zeroed; + first_not_zeroed = ext4_has_uninit_itable(sb); + ext4_register_li_request(sb, first_not_zeroed); + } + if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) ext4_stop_mmpd(sbi); From c7275ce6a5fd32ca9f5a6294ed89cf0523181af9 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Thu, 25 May 2023 12:22:04 -0600 Subject: [PATCH 277/934] nvme: improve handling of long keep alives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upon keep alive completion, nvme_keep_alive_work is scheduled with the same delay every time. If keep alive commands are completing slowly, this may cause a keep alive timeout. The following trace illustrates the issue, taking KATO = 8 and TBKAS off for simplicity: 1. t = 0: run nvme_keep_alive_work, send keep alive 2. t = ε: keep alive reaches controller, controller restarts its keep alive timer 3. t = 4: host receives keep alive completion, schedules nvme_keep_alive_work with delay 4 4. t = 8: run nvme_keep_alive_work, send keep alive Here, a keep alive having RTT of 4 causes a delay of at least 8 - ε between the controller receiving successive keep alives. With ε small, the controller is likely to detect a keep alive timeout. Fix this by calculating the RTT of the keep alive command, and adjusting the scheduling delay of the next keep alive work accordingly. Reported-by: Costa Sapuntzakis Reported-by: Randy Jennings Signed-off-by: Uday Shankar Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dbfca1b925f8..3ec38e2b9173 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1199,6 +1199,20 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, struct nvme_ctrl *ctrl = rq->end_io_data; unsigned long flags; bool startka = false; + unsigned long rtt = jiffies - (rq->deadline - rq->timeout); + unsigned long delay = nvme_keep_alive_work_period(ctrl); + + /* + * Subtract off the keepalive RTT so nvme_keep_alive_work runs + * at the desired frequency. + */ + if (rtt <= delay) { + delay -= rtt; + } else { + dev_warn(ctrl->device, "long keepalive RTT (%u ms)\n", + jiffies_to_msecs(rtt)); + delay = 0; + } blk_mq_free_request(rq); @@ -1217,7 +1231,7 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, startka = true; spin_unlock_irqrestore(&ctrl->lock, flags); if (startka) - nvme_queue_keep_alive_work(ctrl); + queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); return RQ_END_IO_NONE; } From c7cfbd115001f94de9e4053657946a383147e803 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Mon, 29 May 2023 12:52:55 -0700 Subject: [PATCH 278/934] net/sched: sch_ingress: Only create under TC_H_INGRESS ingress Qdiscs are only supposed to be created under TC_H_INGRESS. Return -EOPNOTSUPP if 'parent' is not TC_H_INGRESS, similar to mq_init(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+b53a9c0d1ea4ad62da8b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/0000000000006cf87705f79acf1a@google.com/ Tested-by: Pedro Tammela Acked-by: Jamal Hadi Salim Reviewed-by: Jamal Hadi Salim Reviewed-by: Vlad Buslov Signed-off-by: Peilin Ye Signed-off-by: Jakub Kicinski --- net/sched/sch_ingress.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 84838128b9c5..f9ef6deb2770 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -80,6 +80,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); int err; + if (sch->parent != TC_H_INGRESS) + return -EOPNOTSUPP; + net_inc_ingress_queue(); mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); @@ -101,6 +104,9 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); + if (sch->parent != TC_H_INGRESS) + return; + tcf_block_put_ext(q->block, sch, &q->block_info); net_dec_ingress_queue(); } From 5eeebfe6c493192b10d516abfd72742900f2a162 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Mon, 29 May 2023 12:53:21 -0700 Subject: [PATCH 279/934] net/sched: sch_clsact: Only create under TC_H_CLSACT clsact Qdiscs are only supposed to be created under TC_H_CLSACT (which equals TC_H_INGRESS). Return -EOPNOTSUPP if 'parent' is not TC_H_CLSACT. Fixes: 1f211a1b929c ("net, sched: add clsact qdisc") Tested-by: Pedro Tammela Acked-by: Jamal Hadi Salim Reviewed-by: Jamal Hadi Salim Reviewed-by: Vlad Buslov Signed-off-by: Peilin Ye Signed-off-by: Jakub Kicinski --- net/sched/sch_ingress.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index f9ef6deb2770..35963929e117 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -225,6 +225,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); int err; + if (sch->parent != TC_H_CLSACT) + return -EOPNOTSUPP; + net_inc_ingress_queue(); net_inc_egress_queue(); @@ -254,6 +257,9 @@ static void clsact_destroy(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); + if (sch->parent != TC_H_CLSACT) + return; + tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); From f85fa45d4a9408d98c46c8fa45ba2e3b2f4bf219 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Mon, 29 May 2023 12:54:03 -0700 Subject: [PATCH 280/934] net/sched: Reserve TC_H_INGRESS (TC_H_CLSACT) for ingress (clsact) Qdiscs Currently it is possible to add e.g. an HTB Qdisc under ffff:fff1 (TC_H_INGRESS, TC_H_CLSACT): $ ip link add name ifb0 type ifb $ tc qdisc add dev ifb0 parent ffff:fff1 htb $ tc qdisc add dev ifb0 clsact Error: Exclusivity flag on, cannot modify. $ drgn ... >>> ifb0 = netdev_get_by_name(prog, "ifb0") >>> qdisc = ifb0.ingress_queue.qdisc_sleeping >>> print(qdisc.ops.id.string_().decode()) htb >>> qdisc.flags.value_() # TCQ_F_INGRESS 2 Only allow ingress and clsact Qdiscs under ffff:fff1. Return -EINVAL for everything else. Make TCQ_F_INGRESS a static flag of ingress and clsact Qdiscs. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Fixes: 1f211a1b929c ("net, sched: add clsact qdisc") Tested-by: Pedro Tammela Acked-by: Jamal Hadi Salim Reviewed-by: Jamal Hadi Salim Reviewed-by: Vlad Buslov Signed-off-by: Peilin Ye Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 7 ++++++- net/sched/sch_ingress.c | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index fdb8f429333d..383195955b7d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1252,7 +1252,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev, sch->parent = parent; if (handle == TC_H_INGRESS) { - sch->flags |= TCQ_F_INGRESS; + if (!(sch->flags & TCQ_F_INGRESS)) { + NL_SET_ERR_MSG(extack, + "Specified parent ID is reserved for ingress and clsact Qdiscs"); + err = -EINVAL; + goto err_out3; + } handle = TC_H_MAKE(TC_H_INGRESS, 0); } else { if (handle == 0) { diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 35963929e117..e43a45499372 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -140,7 +140,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .cl_ops = &ingress_class_ops, .id = "ingress", .priv_size = sizeof(struct ingress_sched_data), - .static_flags = TCQ_F_CPUSTATS, + .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS, .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, @@ -281,7 +281,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { .cl_ops = &clsact_class_ops, .id = "clsact", .priv_size = sizeof(struct clsact_sched_data), - .static_flags = TCQ_F_CPUSTATS, + .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS, .init = clsact_init, .destroy = clsact_destroy, .dump = ingress_dump, From 9de95df5d15baa956c2b70b9e794842e790a8a13 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Mon, 29 May 2023 12:54:26 -0700 Subject: [PATCH 281/934] net/sched: Prohibit regrafting ingress or clsact Qdiscs Currently, after creating an ingress (or clsact) Qdisc and grafting it under TC_H_INGRESS (TC_H_CLSACT), it is possible to graft it again under e.g. a TBF Qdisc: $ ip link add ifb0 type ifb $ tc qdisc add dev ifb0 handle 1: root tbf rate 20kbit buffer 1600 limit 3000 $ tc qdisc add dev ifb0 clsact $ tc qdisc link dev ifb0 handle ffff: parent 1:1 $ tc qdisc show dev ifb0 qdisc tbf 1: root refcnt 2 rate 20Kbit burst 1600b lat 560.0ms qdisc clsact ffff: parent ffff:fff1 refcnt 2 ^^^^^^^^ clsact's refcount has increased: it is now grafted under both TC_H_CLSACT and 1:1. ingress and clsact Qdiscs should only be used under TC_H_INGRESS (TC_H_CLSACT). Prohibit regrafting them. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Fixes: 1f211a1b929c ("net, sched: add clsact qdisc") Tested-by: Pedro Tammela Acked-by: Jamal Hadi Salim Reviewed-by: Jamal Hadi Salim Reviewed-by: Vlad Buslov Signed-off-by: Peilin Ye Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 383195955b7d..49b9c1bbfdd9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1596,6 +1596,11 @@ replay: NL_SET_ERR_MSG(extack, "Invalid qdisc name"); return -EINVAL; } + if (q->flags & TCQ_F_INGRESS) { + NL_SET_ERR_MSG(extack, + "Cannot regraft ingress or clsact Qdiscs"); + return -EINVAL; + } if (q == p || (p && check_loop(q, p, 0))) { NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); From 36eec020fab668719b541f34d97f44e232ffa165 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Sat, 27 May 2023 17:37:47 +0800 Subject: [PATCH 282/934] net: sched: fix NULL pointer dereference in mq_attach When use the following command to test: 1)ip link add bond0 type bond 2)ip link set bond0 up 3)tc qdisc add dev bond0 root handle ffff: mq 4)tc qdisc replace dev bond0 parent ffff:fff1 handle ffff: mq The kernel reports NULL pointer dereference issue. The stack information is as follows: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Internal error: Oops: 0000000096000006 [#1] SMP Modules linked in: pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : mq_attach+0x44/0xa0 lr : qdisc_graft+0x20c/0x5cc sp : ffff80000e2236a0 x29: ffff80000e2236a0 x28: ffff0000c0e59d80 x27: ffff0000c0be19c0 x26: ffff0000cae3e800 x25: 0000000000000010 x24: 00000000fffffff1 x23: 0000000000000000 x22: ffff0000cae3e800 x21: ffff0000c9df4000 x20: ffff0000c9df4000 x19: 0000000000000000 x18: ffff80000a934000 x17: ffff8000f5b56000 x16: ffff80000bb08000 x15: 0000000000000000 x14: 0000000000000000 x13: 6b6b6b6b6b6b6b6b x12: 6b6b6b6b00000001 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff0000c0be0730 x7 : bbbbbbbbbbbbbbbb x6 : 0000000000000008 x5 : ffff0000cae3e864 x4 : 0000000000000000 x3 : 0000000000000001 x2 : 0000000000000001 x1 : ffff8000090bc23c x0 : 0000000000000000 Call trace: mq_attach+0x44/0xa0 qdisc_graft+0x20c/0x5cc tc_modify_qdisc+0x1c4/0x664 rtnetlink_rcv_msg+0x354/0x440 netlink_rcv_skb+0x64/0x144 rtnetlink_rcv+0x28/0x34 netlink_unicast+0x1e8/0x2a4 netlink_sendmsg+0x308/0x4a0 sock_sendmsg+0x64/0xac ____sys_sendmsg+0x29c/0x358 ___sys_sendmsg+0x90/0xd0 __sys_sendmsg+0x7c/0xd0 __arm64_sys_sendmsg+0x2c/0x38 invoke_syscall+0x54/0x114 el0_svc_common.constprop.1+0x90/0x174 do_el0_svc+0x3c/0xb0 el0_svc+0x24/0xec el0t_64_sync_handler+0x90/0xb4 el0t_64_sync+0x174/0x178 This is because when mq is added for the first time, qdiscs in mq is set to NULL in mq_attach(). Therefore, when replacing mq after adding mq, we need to initialize qdiscs in the mq before continuing to graft. Otherwise, it will couse NULL pointer dereference issue in mq_attach(). And the same issue will occur in the attach functions of mqprio, taprio and htb. ffff:fff1 means that the repalce qdisc is ingress. Ingress does not allow any qdisc to be attached. Therefore, ffff:fff1 is incorrectly used, and the command should be dropped. Fixes: 6ec1c69a8f64 ("net_sched: add classful multiqueue dummy scheduler") Signed-off-by: Zhengchao Shao Tested-by: Peilin Ye Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20230527093747.3583502-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 49b9c1bbfdd9..014209b1dd58 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1606,6 +1606,10 @@ replay: NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); return -ELOOP; } + if (clid == TC_H_INGRESS) { + NL_SET_ERR_MSG(extack, "Ingress cannot graft directly"); + return -EINVAL; + } qdisc_refcount_inc(q); goto graft; } else { From f6a27d6dc51b288106adaf053cff9c9b9cc12c4e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 30 May 2023 19:32:13 +0000 Subject: [PATCH 283/934] KVM: arm64: Drop last page ref in kvm_pgtable_stage2_free_removed() The reference count on page table allocations is increased for every 'counted' PTE (valid or donated) in the table in addition to the initial reference from ->zalloc_page(). kvm_pgtable_stage2_free_removed() fails to drop the last reference on the root of the table walk, meaning we leak memory. Fix it by dropping the last reference after the free walker returns, at which point all references for 'counted' PTEs have been released. Cc: stable@vger.kernel.org Fixes: 5c359cca1faf ("KVM: arm64: Tear down unlinked stage-2 subtree after break-before-make") Reported-by: Yu Zhao Signed-off-by: Oliver Upton Tested-by: Yu Zhao Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230530193213.1663411-1-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index e1eacffbc41f..95dae02ccc2e 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1332,4 +1332,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg }; WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1)); + + WARN_ON(mm_ops->page_count(pgtable) != 1); + mm_ops->put_page(pgtable); } From f4e4534850a9d18c250a93f8d7fbb51310828110 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Mon, 29 May 2023 12:33:35 -0300 Subject: [PATCH 284/934] net/netlink: fix NETLINK_LIST_MEMBERSHIPS length report The current code for the length calculation wrongly truncates the reported length of the groups array, causing an under report of the subscribed groups. To fix this, use 'BITS_TO_BYTES()' which rounds up the division by 8. Fixes: b42be38b2778 ("netlink: add API to retrieve all group memberships") Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230529153335.389815-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c87804112d0c..3a1e0fd5bf14 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1779,7 +1779,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, break; } } - if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen)) + if (put_user(ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)), optlen)) err = -EFAULT; netlink_unlock_table(); return err; From 9f9666e65359d5047089aef97ac87c50f624ecb0 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 30 May 2023 08:48:29 +0300 Subject: [PATCH 285/934] thunderbolt: Mask ring interrupt on Intel hardware as well When resuming from system sleep states the driver issues following warning on Intel hardware: thunderbolt 0000:07:00.0: interrupt for TX ring 0 is already enabled The reason for this is that the commit in question did not mask the ring interrupt on Intel hardware leaving the interrupt active. Fix this by masking it also in Intel hardware. Reported-by: beld zhang Tested-by: beld zhang Closes: https://lore.kernel.org/linux-usb/ZHKW5NeabmfhgLbY@debian.me/ Fixes: c4af8e3fecd0 ("thunderbolt: Clear registers properly when auto clear isn't in use") Cc: stable@vger.kernel.org Reviewed-by: Mario Limonciello Signed-off-by: Mika Westerberg --- drivers/thunderbolt/nhi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index c0aee5dc5237..e58beac44295 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -56,9 +56,14 @@ static int ring_interrupt_index(const struct tb_ring *ring) static void nhi_mask_interrupt(struct tb_nhi *nhi, int mask, int ring) { - if (nhi->quirks & QUIRK_AUTO_CLEAR_INT) - return; - iowrite32(mask, nhi->iobase + REG_RING_INTERRUPT_MASK_CLEAR_BASE + ring); + if (nhi->quirks & QUIRK_AUTO_CLEAR_INT) { + u32 val; + + val = ioread32(nhi->iobase + REG_RING_INTERRUPT_BASE + ring); + iowrite32(val & ~mask, nhi->iobase + REG_RING_INTERRUPT_BASE + ring); + } else { + iowrite32(mask, nhi->iobase + REG_RING_INTERRUPT_MASK_CLEAR_BASE + ring); + } } static void nhi_clear_interrupt(struct tb_nhi *nhi, int ring) From 1c913a1c35aa61cf280173b2bcc133c3953c38fc Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 25 May 2023 21:27:21 +0000 Subject: [PATCH 286/934] KVM: arm64: Iterate arm_pmus list to probe for default PMU To date KVM has relied on using a perf event to probe the core PMU at the time of vPMU initialization. Behind the scenes perf_event_init() would iteratively walk the PMUs of the system and return the PMU that could handle the event. However, an upcoming change in perf core will drop the iterative walk, thereby breaking the fragile dance we do on the KVM side. Avoid the problem altogether by iterating over the list of supported PMUs maintained in KVM, returning the core PMU that matches the CPU we were called on. Tested-by: Nathan Chancellor Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230525212723.3361524-2-oliver.upton@linux.dev --- arch/arm64/kvm/pmu-emul.c | 46 ++++++++++----------------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 45727d50d18d..5deddc49e745 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -694,45 +694,23 @@ out_unlock: static struct arm_pmu *kvm_pmu_probe_armpmu(void) { - struct perf_event_attr attr = { }; - struct perf_event *event; - struct arm_pmu *pmu = NULL; + struct arm_pmu *tmp, *pmu = NULL; + struct arm_pmu_entry *entry; + int cpu; - /* - * Create a dummy event that only counts user cycles. As we'll never - * leave this function with the event being live, it will never - * count anything. But it allows us to probe some of the PMU - * details. Yes, this is terrible. - */ - attr.type = PERF_TYPE_RAW; - attr.size = sizeof(attr); - attr.pinned = 1; - attr.disabled = 0; - attr.exclude_user = 0; - attr.exclude_kernel = 1; - attr.exclude_hv = 1; - attr.exclude_host = 1; - attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; - attr.sample_period = GENMASK(63, 0); + mutex_lock(&arm_pmus_lock); - event = perf_event_create_kernel_counter(&attr, -1, current, - kvm_pmu_perf_overflow, &attr); + cpu = smp_processor_id(); + list_for_each_entry(entry, &arm_pmus, entry) { + tmp = entry->arm_pmu; - if (IS_ERR(event)) { - pr_err_once("kvm: pmu event creation failed %ld\n", - PTR_ERR(event)); - return NULL; + if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) { + pmu = tmp; + break; + } } - if (event->pmu) { - pmu = to_arm_pmu(event->pmu); - if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || - pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) - pmu = NULL; - } - - perf_event_disable(event); - perf_event_release_kernel(event); + mutex_unlock(&arm_pmus_lock); return pmu; } From 40e54cad454076172cc3e2bca60aa924650a3e4b Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 25 May 2023 21:27:22 +0000 Subject: [PATCH 287/934] KVM: arm64: Document default vPMU behavior on heterogeneous systems KVM maintains a mask of supported CPUs when a vPMU type is explicitly selected by userspace and is used to reject any attempt to run the vCPU on an unsupported CPU. This is great, but we're still beholden to the default behavior where vCPUs can be scheduled anywhere and guest counters may silently stop working. Avoid confusing the next poor sod to look at this code and document the intended behavior. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230525212723.3361524-3-oliver.upton@linux.dev --- arch/arm64/kvm/pmu-emul.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 5deddc49e745..491ca7eb2a4c 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -890,7 +890,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -EBUSY; if (!kvm->arch.arm_pmu) { - /* No PMU set, get the default one */ + /* + * No PMU set, get the default one. + * + * The observant among you will notice that the supported_cpus + * mask does not get updated for the default PMU even though it + * is quite possible the selected instance supports only a + * subset of cores in the system. This is intentional, and + * upholds the preexisting behavior on heterogeneous systems + * where vCPUs can be scheduled on any core but the guest + * counters could stop working. + */ kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); if (!kvm->arch.arm_pmu) return -ENODEV; From 448a5ce1120c5bdbce1f1ccdabcd31c7d029f328 Mon Sep 17 00:00:00 2001 From: Vladislav Efanov Date: Tue, 30 May 2023 14:39:41 +0300 Subject: [PATCH 288/934] udp6: Fix race condition in udp6_sendmsg & connect Syzkaller got the following report: BUG: KASAN: use-after-free in sk_setup_caps+0x621/0x690 net/core/sock.c:2018 Read of size 8 at addr ffff888027f82780 by task syz-executor276/3255 The function sk_setup_caps (called by ip6_sk_dst_store_flow-> ip6_dst_store) referenced already freed memory as this memory was freed by parallel task in udpv6_sendmsg->ip6_sk_dst_lookup_flow-> sk_dst_check. task1 (connect) task2 (udp6_sendmsg) sk_setup_caps->sk_dst_set | | sk_dst_check-> | sk_dst_set | dst_release sk_setup_caps references | to already freed dst_entry| The reason for this race condition is: sk_setup_caps() keeps using the dst after transferring the ownership to the dst cache. Found by Linux Verification Center (linuxtesting.org) with syzkaller. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Vladislav Efanov Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 5440e67bcfe3..24f2761bdb1d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2381,7 +2381,6 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { u32 max_segs = 1; - sk_dst_set(sk, dst); sk->sk_route_caps = dst->dev->features; if (sk_is_tcp(sk)) sk->sk_route_caps |= NETIF_F_GSO; @@ -2400,6 +2399,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) } } sk->sk_gso_max_segs = max_segs; + sk_dst_set(sk, dst); } EXPORT_SYMBOL_GPL(sk_setup_caps); From 81d0fa4cb4fc0e1a49c2b22f92c43d9fe972ebcf Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Sat, 28 Jan 2023 16:23:41 +0000 Subject: [PATCH 289/934] tracing/probe: trace_probe_primary_from_call(): checked list_first_entry All callers of trace_probe_primary_from_call() check the return value to be non NULL. However, the function returns list_first_entry(&tpe->probes, ...) which can never be NULL. Additionally, it does not check for the list being possibly empty, possibly causing a type confusion on empty lists. Use list_first_entry_or_null() which solves both problems. Link: https://lore.kernel.org/linux-trace-kernel/20230128-list-entry-null-check-v1-1-8bde6a3da2ef@diag.uniroma1.it/ Fixes: 60d53e2c3b75 ("tracing/probe: Split trace_event related data from trace_probe") Signed-off-by: Pietro Borrello Reviewed-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Acked-by: Mukesh Ojha Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_probe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index ef8ed3b65d05..6a4ecfb1da43 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -308,7 +308,7 @@ trace_probe_primary_from_call(struct trace_event_call *call) { struct trace_probe_event *tpe = trace_probe_event_from_call(call); - return list_first_entry(&tpe->probes, struct trace_probe, list); + return list_first_entry_or_null(&tpe->probes, struct trace_probe, list); } static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp) From bed61c8fc7ba17d0f259c674137a5198fd2e3334 Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Tue, 30 May 2023 12:12:40 +0800 Subject: [PATCH 290/934] drm/ast: Fix long time waiting on s3/s4 resume In resume, DP's launch function, ast_dp_launch, could wait at most 30 seconds before timeout to check if DP is enabled. It could lead to 'DPM device timeout' and trigger unrecoverable kernel panic. To avoid this problem, we check if DP enable or not at driver probe only. Reported-and-tested-by: Wendy Wang Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217278 Acked-by: Thomas Zimmermann Signed-off-by: Jammy Huang Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230530041240.13427-1-jammy_huang@aspeedtech.com --- drivers/gpu/drm/ast/ast_dp.c | 57 +++++++++++----------------------- drivers/gpu/drm/ast/ast_drv.h | 5 +-- drivers/gpu/drm/ast/ast_main.c | 11 +++++-- drivers/gpu/drm/ast/ast_post.c | 3 +- 4 files changed, 30 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index fbb070f63e36..6dc1a09504e1 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -119,53 +119,32 @@ err_astdp_edid_not_ready: /* * Launch Aspeed DP */ -void ast_dp_launch(struct drm_device *dev, u8 bPower) +void ast_dp_launch(struct drm_device *dev) { - u32 i = 0, j = 0, WaitCount = 1; - u8 bDPTX = 0; + u32 i = 0; u8 bDPExecute = 1; - struct ast_device *ast = to_ast_device(dev); - // S3 come back, need more time to wait BMC ready. - if (bPower) - WaitCount = 300; - - - // Wait total count by different condition. - for (j = 0; j < WaitCount; j++) { - bDPTX = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xD1, TX_TYPE_MASK); - - if (bDPTX) - break; + // Wait one second then timeout. + while (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xD1, ASTDP_MCU_FW_EXECUTING) != + ASTDP_MCU_FW_EXECUTING) { + i++; + // wait 100 ms msleep(100); - } - // 0xE : ASTDP with DPMCU FW handling - if (bDPTX == ASTDP_DPMCU_TX) { - // Wait one second then timeout. - i = 0; - - while (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xD1, COPROCESSOR_LAUNCH) != - COPROCESSOR_LAUNCH) { - i++; - // wait 100 ms - msleep(100); - - if (i >= 10) { - // DP would not be ready. - bDPExecute = 0; - break; - } + if (i >= 10) { + // DP would not be ready. + bDPExecute = 0; + break; } - - if (bDPExecute) - ast->tx_chip_types |= BIT(AST_TX_ASTDP); - - ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xE5, - (u8) ~ASTDP_HOST_EDID_READ_DONE_MASK, - ASTDP_HOST_EDID_READ_DONE); } + + if (!bDPExecute) + drm_err(dev, "Wait DPMCU executing timeout\n"); + + ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xE5, + (u8) ~ASTDP_HOST_EDID_READ_DONE_MASK, + ASTDP_HOST_EDID_READ_DONE); } diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index a501169cddad..5498a6676f2e 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -350,9 +350,6 @@ int ast_mode_config_init(struct ast_device *ast); #define AST_DP501_LINKRATE 0xf014 #define AST_DP501_EDID_DATA 0xf020 -/* Define for Soc scratched reg */ -#define COPROCESSOR_LAUNCH BIT(5) - /* * Display Transmitter Type: */ @@ -480,7 +477,7 @@ struct ast_i2c_chan *ast_i2c_create(struct drm_device *dev); /* aspeed DP */ int ast_astdp_read_edid(struct drm_device *dev, u8 *ediddata); -void ast_dp_launch(struct drm_device *dev, u8 bPower); +void ast_dp_launch(struct drm_device *dev); void ast_dp_power_on_off(struct drm_device *dev, bool no); void ast_dp_set_on_off(struct drm_device *dev, bool no); void ast_dp_set_mode(struct drm_crtc *crtc, struct ast_vbios_mode_info *vbios_mode); diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index f32ce29edba7..1f35438f614a 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -254,8 +254,13 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) case 0x0c: ast->tx_chip_types = AST_TX_DP501_BIT; } - } else if (ast->chip == AST2600) - ast_dp_launch(&ast->base, 0); + } else if (ast->chip == AST2600) { + if (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xD1, TX_TYPE_MASK) == + ASTDP_DPMCU_TX) { + ast->tx_chip_types = AST_TX_ASTDP_BIT; + ast_dp_launch(&ast->base); + } + } /* Print stuff for diagnostic purposes */ if (ast->tx_chip_types & AST_TX_NONE_BIT) @@ -264,6 +269,8 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) drm_info(dev, "Using Sil164 TMDS transmitter\n"); if (ast->tx_chip_types & AST_TX_DP501_BIT) drm_info(dev, "Using DP501 DisplayPort transmitter\n"); + if (ast->tx_chip_types & AST_TX_ASTDP_BIT) + drm_info(dev, "Using ASPEED DisplayPort transmitter\n"); return 0; } diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 71bb36b865fd..a005aec18a02 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -380,7 +380,8 @@ void ast_post_gpu(struct drm_device *dev) ast_set_def_ext_reg(dev); if (ast->chip == AST2600) { - ast_dp_launch(dev, 1); + if (ast->tx_chip_types & AST_TX_ASTDP_BIT) + ast_dp_launch(dev); } else if (ast->config_mode == ast_use_p2a) { if (ast->chip == AST2500) ast_post_chip_2500(dev); From 57d1e8900495cf1751cec74db16fe1a0fe47efbb Mon Sep 17 00:00:00 2001 From: Sicong Jiang Date: Wed, 31 May 2023 21:06:35 +1200 Subject: [PATCH 291/934] ASoC: amd: yc: Add Thinkpad Neo14 to quirks list for acp6x Thinkpad Neo14 Ryzen Edition uses Ryzen 6800H processor, and adding to quirks list for acp6x will enable internal mic. Signed-off-by: Sicong Jiang Link: https://lore.kernel.org/r/20230531090635.89565-1-kevin.jiangsc@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 4406a5def076..246299a178f9 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -171,6 +171,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21CL"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21EF"), + } + }, { .driver_data = &acp6x_card, .matches = { From c034203b6a9dae6751ef4371c18cb77983e30c28 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 29 May 2023 14:35:55 +0300 Subject: [PATCH 292/934] nfsd: fix double fget() bug in __write_ports_addfd() The bug here is that you cannot rely on getting the same socket from multiple calls to fget() because userspace can influence that. This is a kind of double fetch bug. The fix is to delete the svc_alien_sock() function and instead do the checking inside the svc_addsock() function. Fixes: 3064639423c4 ("nfsd: check passed socket's net matches NFSd superblock's one") Signed-off-by: Dan Carpenter Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 7 +------ include/linux/sunrpc/svcsock.h | 7 +++---- net/sunrpc/svcsock.c | 24 ++++++------------------ 3 files changed, 10 insertions(+), 28 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c159817d1282..b4fd7a7062d5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -690,16 +690,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred if (err != 0 || fd < 0) return -EINVAL; - if (svc_alien_sock(net, fd)) { - printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__); - return -EINVAL; - } - err = nfsd_create_serv(net); if (err != 0) return err; - err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); + err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); if (err >= 0 && !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index d16ae621782c..a7116048a4d4 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -61,10 +61,9 @@ int svc_recv(struct svc_rqst *, long); void svc_send(struct svc_rqst *rqstp); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); -bool svc_alien_sock(struct net *net, int fd); -int svc_addsock(struct svc_serv *serv, const int fd, - char *name_return, const size_t len, - const struct cred *cred); +int svc_addsock(struct svc_serv *serv, struct net *net, + const int fd, char *name_return, const size_t len, + const struct cred *cred); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 63fe7a338992..f77cebe2c071 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1480,25 +1480,10 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } -bool svc_alien_sock(struct net *net, int fd) -{ - int err; - struct socket *sock = sockfd_lookup(fd, &err); - bool ret = false; - - if (!sock) - goto out; - if (sock_net(sock->sk) != net) - ret = true; - sockfd_put(sock); -out: - return ret; -} -EXPORT_SYMBOL_GPL(svc_alien_sock); - /** * svc_addsock - add a listener socket to an RPC service * @serv: pointer to RPC service to which to add a new listener + * @net: caller's network namespace * @fd: file descriptor of the new listener * @name_return: pointer to buffer to fill in with name of listener * @len: size of the buffer @@ -1508,8 +1493,8 @@ EXPORT_SYMBOL_GPL(svc_alien_sock); * Name is terminated with '\n'. On error, returns a negative errno * value. */ -int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, - const size_t len, const struct cred *cred) +int svc_addsock(struct svc_serv *serv, struct net *net, const int fd, + char *name_return, const size_t len, const struct cred *cred) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); @@ -1520,6 +1505,9 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, if (!so) return err; + err = -EINVAL; + if (sock_net(so->sk) != net) + goto out; err = -EAFNOSUPPORT; if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6)) goto out; From 8dc2a7e8027fbeca0c7df81d4c82e735a59b5741 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 26 May 2023 17:46:30 +0200 Subject: [PATCH 293/934] riscv: Fix relocatable kernels with early alternatives using -fno-pie Early alternatives are called with the mmu disabled, and then should not access any global symbols through the GOT since it requires relocations, relocations that we do before but *virtually*. So only use medany code model for this early code. Signed-off-by: Alexandre Ghiti Tested-by: Conor Dooley # booted on nezha & unmatched Fixes: 39b33072941f ("riscv: Introduce CONFIG_RELOCATABLE") Link: https://lore.kernel.org/r/20230526154630.289374-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/Makefile | 4 ++++ arch/riscv/kernel/Makefile | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile index a1055965fbee..7b2637c8c332 100644 --- a/arch/riscv/errata/Makefile +++ b/arch/riscv/errata/Makefile @@ -1,2 +1,6 @@ +ifdef CONFIG_RELOCATABLE +KBUILD_CFLAGS += -fno-pie +endif + obj-$(CONFIG_ERRATA_SIFIVE) += sifive/ obj-$(CONFIG_ERRATA_THEAD) += thead/ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index fbdccc21418a..153864e4f399 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -23,6 +23,10 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE) endif +ifdef CONFIG_RELOCATABLE +CFLAGS_alternative.o += -fno-pie +CFLAGS_cpufeature.o += -fno-pie +endif ifdef CONFIG_KASAN KASAN_SANITIZE_alternative.o := n KASAN_SANITIZE_cpufeature.o := n From 6199d23c91ce53bfed455f09a8c5ed170d516824 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Wed, 31 May 2023 10:24:28 +0200 Subject: [PATCH 294/934] HID: logitech-hidpp: Handle timeout differently from busy If an attempt at contacting a receiver or a device fails because the receiver or device never responds, don't restart the communication, only restart it if the receiver or device answers that it's busy, as originally intended. This was the behaviour on communication timeout before commit 586e8fede795 ("HID: logitech-hidpp: Retry commands when device is busy"). This fixes some overly long waits in a critical path on boot, when checking whether the device is connected by getting its HID++ version. Signed-off-by: Bastien Nocera Suggested-by: Mark Lord Fixes: 586e8fede795 ("HID: logitech-hidpp: Retry commands when device is busy") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217412 Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 0fcfd85fea0f..2246044b1639 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -314,6 +314,7 @@ static int hidpp_send_message_sync(struct hidpp_device *hidpp, dbg_hid("%s:timeout waiting for response\n", __func__); memset(response, 0, sizeof(struct hidpp_report)); ret = -ETIMEDOUT; + goto exit; } if (response->report_id == REPORT_ID_HIDPP_SHORT && From 6d074ce231772c66e648a61f6bd2245e7129d1f5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 29 May 2023 12:50:34 -0700 Subject: [PATCH 295/934] scsi: stex: Fix gcc 13 warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc 13 may assign another type to enumeration constants than gcc 12. Split the large enum at the top of source file stex.c such that the type of the constants used in time expressions is changed back to the same type chosen by gcc 12. This patch suppresses compiler warnings like this one: In file included from ./include/linux/bitops.h:7, from ./include/linux/kernel.h:22, from drivers/scsi/stex.c:13: drivers/scsi/stex.c: In function ‘stex_common_handshake’: ./include/linux/typecheck.h:12:25: error: comparison of distinct pointer types lacks a cast [-Werror] 12 | (void)(&__dummy == &__dummy2); \ | ^~ ./include/linux/jiffies.h:106:10: note: in expansion of macro ‘typecheck’ 106 | typecheck(unsigned long, b) && \ | ^~~~~~~~~ drivers/scsi/stex.c:1035:29: note: in expansion of macro ‘time_after’ 1035 | if (time_after(jiffies, before + MU_MAX_DELAY * HZ)) { | ^~~~~~~~~~ See also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107405. Cc: stable@vger.kernel.org Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20230529195034.3077-1-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/stex.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index 5b230e149c3d..8ffb75be99bc 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -109,7 +109,9 @@ enum { TASK_ATTRIBUTE_HEADOFQUEUE = 0x1, TASK_ATTRIBUTE_ORDERED = 0x2, TASK_ATTRIBUTE_ACA = 0x4, +}; +enum { SS_STS_NORMAL = 0x80000000, SS_STS_DONE = 0x40000000, SS_STS_HANDSHAKE = 0x20000000, @@ -121,7 +123,9 @@ enum { SS_I2H_REQUEST_RESET = 0x2000, SS_MU_OPERATIONAL = 0x80000000, +}; +enum { STEX_CDB_LENGTH = 16, STATUS_VAR_LEN = 128, From 856303797724d28f1d65b702f0eadcee1ea7abf5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 14:54:54 +0200 Subject: [PATCH 296/934] nvme: fix the name of Zone Append for verbose logging No Management involved in Zone Appened. Fixes: bd83fe6f2cd2 ("nvme: add verbose error logging") Signed-off-by: Christoph Hellwig Reviewed-by: Alan Adamson Signed-off-by: Keith Busch --- drivers/nvme/host/constants.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c index bc523ca02254..5e4f8848dce0 100644 --- a/drivers/nvme/host/constants.c +++ b/drivers/nvme/host/constants.c @@ -21,7 +21,7 @@ static const char * const nvme_ops[] = { [nvme_cmd_resv_release] = "Reservation Release", [nvme_cmd_zone_mgmt_send] = "Zone Management Send", [nvme_cmd_zone_mgmt_recv] = "Zone Management Receive", - [nvme_cmd_zone_append] = "Zone Management Append", + [nvme_cmd_zone_append] = "Zone Append", }; static const char * const nvme_admin_ops[] = { From 40994ce0ea010b1843e620bacc26d29ddebfc08d Mon Sep 17 00:00:00 2001 From: Zhu YiXin Date: Fri, 19 May 2023 12:45:55 +0800 Subject: [PATCH 297/934] MAINTAINERS: Add Chuanhua Lei as Intel LGM GW PCIe maintainer Rahul Tanwar is no longer at Maxlinear, so update the MAINTAINERS entry for the PCIe driver for Intel LGM GW SoC. Link: https://lore.kernel.org/r/20230519044555.3750-2-yzhu@maxlinear.com Signed-off-by: Zhu YiXin Signed-off-by: Bjorn Helgaas Acked-by: Rahul Tanwar Acked-by: Lei Chuanhua --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7e0b87d5aa2e..31c4e5e9c411 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16349,7 +16349,7 @@ F: Documentation/devicetree/bindings/pci/intel,keembay-pcie* F: drivers/pci/controller/dwc/pcie-keembay.c PCIE DRIVER FOR INTEL LGM GW SOC -M: Rahul Tanwar +M: Chuanhua Lei L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml From 8fe72b76db79d694858e872370df49676bc3be8c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 May 2023 12:22:09 +0300 Subject: [PATCH 298/934] mailbox: mailbox-test: fix a locking issue in mbox_test_message_write() There was a bug where this code forgot to unlock the tdev->mutex if the kzalloc() failed. Fix this issue, by moving the allocation outside the lock. Fixes: 2d1e952a2b8e ("mailbox: mailbox-test: Fix potential double-free in mbox_test_message_write()") Signed-off-by: Dan Carpenter Reviewed-by: Lee Jones Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox-test.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index c4a705c30331..fc6a12a51b40 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -98,6 +98,7 @@ static ssize_t mbox_test_message_write(struct file *filp, size_t count, loff_t *ppos) { struct mbox_test_device *tdev = filp->private_data; + char *message; void *data; int ret; @@ -113,12 +114,13 @@ static ssize_t mbox_test_message_write(struct file *filp, return -EINVAL; } - mutex_lock(&tdev->mutex); - - tdev->message = kzalloc(MBOX_MAX_MSG_LEN, GFP_KERNEL); - if (!tdev->message) + message = kzalloc(MBOX_MAX_MSG_LEN, GFP_KERNEL); + if (!message) return -ENOMEM; + mutex_lock(&tdev->mutex); + + tdev->message = message; ret = copy_from_user(tdev->message, userbuf, count); if (ret) { ret = -EFAULT; From ffa28312e2837ae788855840e126fa6f8a1ac35d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 16 May 2023 11:12:51 +0300 Subject: [PATCH 299/934] firmware_loader: Fix a NULL vs IS_ERR() check The crypto_alloc_shash() function doesn't return NULL, it returns error pointers. Update the check accordingly. Fixes: 02fe26f25325 ("firmware_loader: Add debug message with checksum for FW file") Signed-off-by: Dan Carpenter Reviewed-by: Cezary Rojewski Acked-by: Luis Chamberlain Link: https://lore.kernel.org/r/36ef6042-ce74-4e8e-9e2c-5b5c28940610@kili.mountain Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_loader/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 9d79d5ad9102..b58c42f1b1ce 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -812,7 +812,7 @@ static void fw_log_firmware_info(const struct firmware *fw, const char *name, st char *outbuf; alg = crypto_alloc_shash("sha256", 0, 0); - if (!alg) + if (IS_ERR(alg)) return; sha256buf = kmalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); From 4acfe3dfde685a5a9eaec5555351918e2d7266a1 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Tue, 9 May 2023 10:47:45 +0200 Subject: [PATCH 300/934] test_firmware: prevent race conditions by a correct implementation of locking Dan Carpenter spotted a race condition in a couple of situations like these in the test_firmware driver: static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg) { u8 val; int ret; ret = kstrtou8(buf, 10, &val); if (ret) return ret; mutex_lock(&test_fw_mutex); *(u8 *)cfg = val; mutex_unlock(&test_fw_mutex); /* Always return full write size even if we didn't consume all */ return size; } static ssize_t config_num_requests_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int rc; mutex_lock(&test_fw_mutex); if (test_fw_config->reqs) { pr_err("Must call release_all_firmware prior to changing config\n"); rc = -EINVAL; mutex_unlock(&test_fw_mutex); goto out; } mutex_unlock(&test_fw_mutex); rc = test_dev_config_update_u8(buf, count, &test_fw_config->num_requests); out: return rc; } static ssize_t config_read_fw_idx_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { return test_dev_config_update_u8(buf, count, &test_fw_config->read_fw_idx); } The function test_dev_config_update_u8() is called from both the locked and the unlocked context, function config_num_requests_store() and config_read_fw_idx_store() which can both be called asynchronously as they are driver's methods, while test_dev_config_update_u8() and siblings change their argument pointed to by u8 *cfg or similar pointer. To avoid deadlock on test_fw_mutex, the lock is dropped before calling test_dev_config_update_u8() and re-acquired within test_dev_config_update_u8() itself, but alas this creates a race condition. Having two locks wouldn't assure a race-proof mutual exclusion. This situation is best avoided by the introduction of a new, unlocked function __test_dev_config_update_u8() which can be called from the locked context and reducing test_dev_config_update_u8() to: static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg) { int ret; mutex_lock(&test_fw_mutex); ret = __test_dev_config_update_u8(buf, size, cfg); mutex_unlock(&test_fw_mutex); return ret; } doing the locking and calling the unlocked primitive, which enables both locked and unlocked versions without duplication of code. The similar approach was applied to all functions called from the locked and the unlocked context, which safely mitigates both deadlocks and race conditions in the driver. __test_dev_config_update_bool(), __test_dev_config_update_u8() and __test_dev_config_update_size_t() unlocked versions of the functions were introduced to be called from the locked contexts as a workaround without releasing the main driver's lock and thereof causing a race condition. The test_dev_config_update_bool(), test_dev_config_update_u8() and test_dev_config_update_size_t() locked versions of the functions are being called from driver methods without the unnecessary multiplying of the locking and unlocking code for each method, and complicating the code with saving of the return value across lock. Fixes: 7feebfa487b92 ("test_firmware: add support for request_firmware_into_buf") Cc: Luis Chamberlain Cc: Greg Kroah-Hartman Cc: Russ Weight Cc: Takashi Iwai Cc: Tianfei Zhang Cc: Shuah Khan Cc: Colin Ian King Cc: Randy Dunlap Cc: linux-kselftest@vger.kernel.org Cc: stable@vger.kernel.org # v5.4 Suggested-by: Dan Carpenter Signed-off-by: Mirsad Goran Todorovac Link: https://lore.kernel.org/r/20230509084746.48259-1-mirsad.todorovac@alu.unizg.hr Signed-off-by: Greg Kroah-Hartman --- lib/test_firmware.c | 56 ++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 05ed84c2fc4c..35417e0af3f4 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -353,16 +353,26 @@ static ssize_t config_test_show_str(char *dst, return len; } +static inline int __test_dev_config_update_bool(const char *buf, size_t size, + bool *cfg) +{ + int ret; + + if (kstrtobool(buf, cfg) < 0) + ret = -EINVAL; + else + ret = size; + + return ret; +} + static int test_dev_config_update_bool(const char *buf, size_t size, bool *cfg) { int ret; mutex_lock(&test_fw_mutex); - if (kstrtobool(buf, cfg) < 0) - ret = -EINVAL; - else - ret = size; + ret = __test_dev_config_update_bool(buf, size, cfg); mutex_unlock(&test_fw_mutex); return ret; @@ -373,7 +383,8 @@ static ssize_t test_dev_config_show_bool(char *buf, bool val) return snprintf(buf, PAGE_SIZE, "%d\n", val); } -static int test_dev_config_update_size_t(const char *buf, +static int __test_dev_config_update_size_t( + const char *buf, size_t size, size_t *cfg) { @@ -384,9 +395,7 @@ static int test_dev_config_update_size_t(const char *buf, if (ret) return ret; - mutex_lock(&test_fw_mutex); *(size_t *)cfg = new; - mutex_unlock(&test_fw_mutex); /* Always return full write size even if we didn't consume all */ return size; @@ -402,7 +411,7 @@ static ssize_t test_dev_config_show_int(char *buf, int val) return snprintf(buf, PAGE_SIZE, "%d\n", val); } -static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg) +static int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg) { u8 val; int ret; @@ -411,14 +420,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg) if (ret) return ret; - mutex_lock(&test_fw_mutex); *(u8 *)cfg = val; - mutex_unlock(&test_fw_mutex); /* Always return full write size even if we didn't consume all */ return size; } +static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg) +{ + int ret; + + mutex_lock(&test_fw_mutex); + ret = __test_dev_config_update_u8(buf, size, cfg); + mutex_unlock(&test_fw_mutex); + + return ret; +} + static ssize_t test_dev_config_show_u8(char *buf, u8 val) { return snprintf(buf, PAGE_SIZE, "%u\n", val); @@ -471,10 +489,10 @@ static ssize_t config_num_requests_store(struct device *dev, mutex_unlock(&test_fw_mutex); goto out; } - mutex_unlock(&test_fw_mutex); - rc = test_dev_config_update_u8(buf, count, - &test_fw_config->num_requests); + rc = __test_dev_config_update_u8(buf, count, + &test_fw_config->num_requests); + mutex_unlock(&test_fw_mutex); out: return rc; @@ -518,10 +536,10 @@ static ssize_t config_buf_size_store(struct device *dev, mutex_unlock(&test_fw_mutex); goto out; } - mutex_unlock(&test_fw_mutex); - rc = test_dev_config_update_size_t(buf, count, - &test_fw_config->buf_size); + rc = __test_dev_config_update_size_t(buf, count, + &test_fw_config->buf_size); + mutex_unlock(&test_fw_mutex); out: return rc; @@ -548,10 +566,10 @@ static ssize_t config_file_offset_store(struct device *dev, mutex_unlock(&test_fw_mutex); goto out; } - mutex_unlock(&test_fw_mutex); - rc = test_dev_config_update_size_t(buf, count, - &test_fw_config->file_offset); + rc = __test_dev_config_update_size_t(buf, count, + &test_fw_config->file_offset); + mutex_unlock(&test_fw_mutex); out: return rc; From be37bed754ed90b2655382f93f9724b3c1aae847 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Tue, 9 May 2023 10:47:47 +0200 Subject: [PATCH 301/934] test_firmware: fix a memory leak with reqs buffer Dan Carpenter spotted that test_fw_config->reqs will be leaked if trigger_batched_requests_store() is called two or more times. The same appears with trigger_batched_requests_async_store(). This bug wasn't trigger by the tests, but observed by Dan's visual inspection of the code. The recommended workaround was to return -EBUSY if test_fw_config->reqs is already allocated. Fixes: 7feebfa487b92 ("test_firmware: add support for request_firmware_into_buf") Cc: Luis Chamberlain Cc: Greg Kroah-Hartman Cc: Russ Weight Cc: Tianfei Zhang Cc: Shuah Khan Cc: Colin Ian King Cc: Randy Dunlap Cc: linux-kselftest@vger.kernel.org Cc: stable@vger.kernel.org # v5.4 Suggested-by: Dan Carpenter Suggested-by: Takashi Iwai Signed-off-by: Mirsad Goran Todorovac Reviewed-by: Dan Carpenter Acked-by: Luis Chamberlain Link: https://lore.kernel.org/r/20230509084746.48259-2-mirsad.todorovac@alu.unizg.hr Signed-off-by: Greg Kroah-Hartman --- lib/test_firmware.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 35417e0af3f4..91b232ed3161 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -913,6 +913,11 @@ static ssize_t trigger_batched_requests_store(struct device *dev, mutex_lock(&test_fw_mutex); + if (test_fw_config->reqs) { + rc = -EBUSY; + goto out_bail; + } + test_fw_config->reqs = vzalloc(array3_size(sizeof(struct test_batched_req), test_fw_config->num_requests, 2)); @@ -1011,6 +1016,11 @@ ssize_t trigger_batched_requests_async_store(struct device *dev, mutex_lock(&test_fw_mutex); + if (test_fw_config->reqs) { + rc = -EBUSY; + goto out_bail; + } + test_fw_config->reqs = vzalloc(array3_size(sizeof(struct test_batched_req), test_fw_config->num_requests, 2)); From 48e156023059e57a8fc68b498439832f7600ffff Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Tue, 9 May 2023 10:47:49 +0200 Subject: [PATCH 302/934] test_firmware: fix the memory leak of the allocated firmware buffer The following kernel memory leak was noticed after running tools/testing/selftests/firmware/fw_run_tests.sh: [root@pc-mtodorov firmware]# cat /sys/kernel/debug/kmemleak . . . unreferenced object 0xffff955389bc3400 (size 1024): comm "test_firmware-0", pid 5451, jiffies 4294944822 (age 65.652s) hex dump (first 32 bytes): 47 48 34 35 36 37 0a 00 00 00 00 00 00 00 00 00 GH4567.......... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] slab_post_alloc_hook+0x8c/0x3c0 [] __kmem_cache_alloc_node+0x184/0x240 [] kmalloc_trace+0x2e/0xc0 [] test_fw_run_batch_request+0x9d/0x180 [] kthread+0x10b/0x140 [] ret_from_fork+0x29/0x50 unreferenced object 0xffff9553c334b400 (size 1024): comm "test_firmware-1", pid 5452, jiffies 4294944822 (age 65.652s) hex dump (first 32 bytes): 47 48 34 35 36 37 0a 00 00 00 00 00 00 00 00 00 GH4567.......... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] slab_post_alloc_hook+0x8c/0x3c0 [] __kmem_cache_alloc_node+0x184/0x240 [] kmalloc_trace+0x2e/0xc0 [] test_fw_run_batch_request+0x9d/0x180 [] kthread+0x10b/0x140 [] ret_from_fork+0x29/0x50 unreferenced object 0xffff9553c334f000 (size 1024): comm "test_firmware-2", pid 5453, jiffies 4294944822 (age 65.652s) hex dump (first 32 bytes): 47 48 34 35 36 37 0a 00 00 00 00 00 00 00 00 00 GH4567.......... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] slab_post_alloc_hook+0x8c/0x3c0 [] __kmem_cache_alloc_node+0x184/0x240 [] kmalloc_trace+0x2e/0xc0 [] test_fw_run_batch_request+0x9d/0x180 [] kthread+0x10b/0x140 [] ret_from_fork+0x29/0x50 unreferenced object 0xffff9553c3348400 (size 1024): comm "test_firmware-3", pid 5454, jiffies 4294944822 (age 65.652s) hex dump (first 32 bytes): 47 48 34 35 36 37 0a 00 00 00 00 00 00 00 00 00 GH4567.......... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] slab_post_alloc_hook+0x8c/0x3c0 [] __kmem_cache_alloc_node+0x184/0x240 [] kmalloc_trace+0x2e/0xc0 [] test_fw_run_batch_request+0x9d/0x180 [] kthread+0x10b/0x140 [] ret_from_fork+0x29/0x50 [root@pc-mtodorov firmware]# Note that the size 1024 corresponds to the size of the test firmware buffer. The actual number of the buffers leaked is around 70-110, depending on the test run. The cause of the leak is the following: request_partial_firmware_into_buf() and request_firmware_into_buf() provided firmware buffer isn't released on release_firmware(), we have allocated it and we are responsible for deallocating it manually. This is introduced in a number of context where previously only release_firmware() was called, which was insufficient. Reported-by: Mirsad Goran Todorovac Fixes: 7feebfa487b92 ("test_firmware: add support for request_firmware_into_buf") Cc: Greg Kroah-Hartman Cc: Dan Carpenter Cc: Takashi Iwai Cc: Luis Chamberlain Cc: Russ Weight Cc: Tianfei zhang Cc: Christophe JAILLET Cc: Zhengchao Shao Cc: Colin Ian King Cc: linux-kernel@vger.kernel.org Cc: Kees Cook Cc: Scott Branden Cc: Luis R. Rodriguez Cc: linux-kselftest@vger.kernel.org Cc: stable@vger.kernel.org # v5.4 Signed-off-by: Mirsad Goran Todorovac Link: https://lore.kernel.org/r/20230509084746.48259-3-mirsad.todorovac@alu.unizg.hr Signed-off-by: Greg Kroah-Hartman --- lib/test_firmware.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 91b232ed3161..1d7d480b8eeb 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -45,6 +45,7 @@ struct test_batched_req { bool sent; const struct firmware *fw; const char *name; + const char *fw_buf; struct completion completion; struct task_struct *task; struct device *dev; @@ -175,8 +176,14 @@ static void __test_release_all_firmware(void) for (i = 0; i < test_fw_config->num_requests; i++) { req = &test_fw_config->reqs[i]; - if (req->fw) + if (req->fw) { + if (req->fw_buf) { + kfree_const(req->fw_buf); + req->fw_buf = NULL; + } release_firmware(req->fw); + req->fw = NULL; + } } vfree(test_fw_config->reqs); @@ -670,6 +677,8 @@ static ssize_t trigger_request_store(struct device *dev, mutex_lock(&test_fw_mutex); release_firmware(test_firmware); + if (test_fw_config->reqs) + __test_release_all_firmware(); test_firmware = NULL; rc = request_firmware(&test_firmware, name, dev); if (rc) { @@ -770,6 +779,8 @@ static ssize_t trigger_async_request_store(struct device *dev, mutex_lock(&test_fw_mutex); release_firmware(test_firmware); test_firmware = NULL; + if (test_fw_config->reqs) + __test_release_all_firmware(); rc = request_firmware_nowait(THIS_MODULE, 1, name, dev, GFP_KERNEL, NULL, trigger_async_request_cb); if (rc) { @@ -812,6 +823,8 @@ static ssize_t trigger_custom_fallback_store(struct device *dev, mutex_lock(&test_fw_mutex); release_firmware(test_firmware); + if (test_fw_config->reqs) + __test_release_all_firmware(); test_firmware = NULL; rc = request_firmware_nowait(THIS_MODULE, FW_ACTION_NOUEVENT, name, dev, GFP_KERNEL, NULL, @@ -874,6 +887,8 @@ static int test_fw_run_batch_request(void *data) test_fw_config->buf_size); if (!req->fw) kfree(test_buf); + else + req->fw_buf = test_buf; } else { req->rc = test_fw_config->req_firmware(&req->fw, req->name, @@ -934,6 +949,7 @@ static ssize_t trigger_batched_requests_store(struct device *dev, req->fw = NULL; req->idx = i; req->name = test_fw_config->name; + req->fw_buf = NULL; req->dev = dev; init_completion(&req->completion); req->task = kthread_run(test_fw_run_batch_request, req, @@ -1038,6 +1054,7 @@ ssize_t trigger_batched_requests_async_store(struct device *dev, for (i = 0; i < test_fw_config->num_requests; i++) { req = &test_fw_config->reqs[i]; req->name = test_fw_config->name; + req->fw_buf = NULL; req->fw = NULL; req->idx = i; init_completion(&req->completion); From 126310c9f669c9a8c875a3e5c2292299ca90225d Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Mon, 8 May 2023 14:11:14 +0530 Subject: [PATCH 303/934] drivers: base: cacheinfo: Fix shared_cpu_map changes in event of CPU hotplug While building the shared_cpu_map, check if the cache level and cache type matches. On certain systems that build the cache topology based on the instance ID, there are cases where the same ID may repeat across multiple cache levels, leading inaccurate topology. In event of CPU offlining, the cache_shared_cpu_map_remove() does not consider if IDs at same level are being compared. As a result, when same IDs repeat across different cache levels, the CPU going offline is not removed from all the shared_cpu_map. Below is the output of cache topology of CPU8 and it's SMT sibling after CPU8 is offlined on a dual socket 3rd Generation AMD EPYC processor (2 x 64C/128T) running kernel release v6.3: # for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done /sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143 # echo 0 > /sys/devices/system/cpu/cpu8/online # for i in /sys/devices/system/cpu/cpu136/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list: 136 /sys/devices/system/cpu/cpu136/cache/index1/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu136/cache/index2/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list: 9-15,136-143 CPU8 is removed from index0 (L1i) but remains in the shared_cpu_list of index1 (L1d) and index2 (L2). Since L1i, L1d, and L2 are shared by the SMT siblings, and they have the same cache instance ID, CPU 2 is only removed from the first index with matching ID which is index1 (L1i) in this case. With this fix, the results are as expected when performing the same experiment on the same system: # for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done /sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143 # echo 0 > /sys/devices/system/cpu/cpu8/online # for i in /sys/devices/system/cpu/cpu136/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list: 136 /sys/devices/system/cpu/cpu136/cache/index1/shared_cpu_list: 136 /sys/devices/system/cpu/cpu136/cache/index2/shared_cpu_list: 136 /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list: 9-15,136-143 When rebuilding topology, the same problem appears as cache_shared_cpu_map_setup() implements a similar logic. Consider the same 3rd Generation EPYC processor: CPUs in Core 1, that share the L1 and L2 caches, have L1 and L2 instance ID as 1. For all the CPUs on the second chiplet, the L3 ID is also 1 leading to grouping on CPUs from Core 1 (1, 17) and the entire second chiplet (8-15, 24-31) as CPUs sharing one cache domain. This went undetected since x86 processors depended on arch specific populate_cache_leaves() method to repopulate the shared_cpus_map when CPU came back online until kernel release v6.3-rc5. Fixes: 198102c9103f ("cacheinfo: Fix shared_cpu_map to handle shared caches at different levels") Signed-off-by: K Prateek Nayak Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20230508084115.1157-2-kprateek.nayak@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index bba3482ddeb8..d1ae443fd7a0 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -388,6 +388,16 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) continue;/* skip if itself or no cacheinfo */ for (sib_index = 0; sib_index < cache_leaves(i); sib_index++) { sib_leaf = per_cpu_cacheinfo_idx(i, sib_index); + + /* + * Comparing cache IDs only makes sense if the leaves + * belong to the same cache level of same type. Skip + * the check if level and type do not match. + */ + if (sib_leaf->level != this_leaf->level || + sib_leaf->type != this_leaf->type) + continue; + if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_set_cpu(i, &this_leaf->shared_cpu_map); @@ -419,6 +429,16 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) for (sib_index = 0; sib_index < cache_leaves(sibling); sib_index++) { sib_leaf = per_cpu_cacheinfo_idx(sibling, sib_index); + + /* + * Comparing cache IDs only makes sense if the leaves + * belong to the same cache level of same type. Skip + * the check if level and type do not match. + */ + if (sib_leaf->level != this_leaf->level || + sib_leaf->type != this_leaf->type) + continue; + if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); From c26fabe73330d983c7ce822c6b6ec0879b4da61f Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Mon, 8 May 2023 14:11:15 +0530 Subject: [PATCH 304/934] drivers: base: cacheinfo: Update cpu_map_populated during CPU Hotplug Until commit 5c2712387d48 ("cacheinfo: Fix LLC is not exported through sysfs"), cacheinfo called populate_cache_leaves() for CPU coming online which let the arch specific functions handle (at least on x86) populating the shared_cpu_map. However, with the changes in the aforementioned commit, populate_cache_leaves() is not called when a CPU comes online as a result of hotplug since last_level_cache_is_valid() returns true as the cacheinfo data is not discarded. The CPU coming online is not present in shared_cpu_map, however, it will not be added since the cpu_cacheinfo->cpu_map_populated flag is set (it is set in populate_cache_leaves() when cacheinfo is first populated for x86) This can lead to inconsistencies in the shared_cpu_map when an offlined CPU comes online again. Example below depicts the inconsistency in the shared_cpu_list in cacheinfo when CPU8 is offlined and onlined again on a 3rd Generation EPYC processor: # for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done /sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143 # echo 0 > /sys/devices/system/cpu/cpu8/online # echo 1 > /sys/devices/system/cpu/cpu8/online # for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done /sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8 /sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8 /sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8 /sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8 # cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list 136 # cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list 9-15,136-143 Clear the flag when the CPU is removed from shared_cpu_map when cache_shared_cpu_map_remove() is called during CPU hotplug. This will allow cache_shared_cpu_map_setup() to add the CPU coming back online in the shared_cpu_map. Set the flag again when the shared_cpu_map is setup. Following are results of performing the same test as described above with the changes: # for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done /sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143 # echo 0 > /sys/devices/system/cpu/cpu8/online # echo 1 > /sys/devices/system/cpu/cpu8/online # for i in /sys/devices/system/cpu/cpu8/cache/index*/shared_cpu_list; do echo -n "$i: "; cat $i; done /sys/devices/system/cpu/cpu8/cache/index0/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index1/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index2/shared_cpu_list: 8,136 /sys/devices/system/cpu/cpu8/cache/index3/shared_cpu_list: 8-15,136-143 # cat /sys/devices/system/cpu/cpu136/cache/index0/shared_cpu_list 8,136 # cat /sys/devices/system/cpu/cpu136/cache/index3/shared_cpu_list 8-15,136-143 Fixes: 5c2712387d48 ("cacheinfo: Fix LLC is not exported through sysfs") Signed-off-by: K Prateek Nayak Reviewed-by: Yicong Yang Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20230508084115.1157-3-kprateek.nayak@amd.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/cacheinfo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index d1ae443fd7a0..cbae8be1fe52 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -410,11 +410,14 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) coherency_max_size = this_leaf->coherency_line_size; } + /* shared_cpu_map is now populated for the cpu */ + this_cpu_ci->cpu_map_populated = true; return 0; } static void cache_shared_cpu_map_remove(unsigned int cpu) { + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf, *sib_leaf; unsigned int sibling, index, sib_index; @@ -447,6 +450,9 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) } } } + + /* cpu is no longer populated in the shared map */ + this_cpu_ci->cpu_map_populated = false; } static void free_cache_attributes(unsigned int cpu) From 6a07826f2057b5fa1c479ba56460195882464270 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Sun, 21 May 2023 09:24:00 +0800 Subject: [PATCH 305/934] drm/amd/pm: reverse mclk and fclk clocks levels for SMU v13.0.4 This patch reverses the DPM clocks levels output of pp_dpm_mclk and pp_dpm_fclk. On dGPUs and older APUs we expose the levels from lowest clocks to highest clocks. But for some APUs, the clocks levels that from the DFPstateTable are given the reversed orders by PMFW. Like the memory DPM clocks that are exposed by pp_dpm_mclk. It's not intuitive that they are reversed on these APUs. All tools and software that talks to the driver then has to know different ways to interpret the data depending on the asic. So we need to reverse them to expose the clocks levels from the driver consistently. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 8fa9a36c38b6..6d9760eac16d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -478,7 +478,7 @@ static int smu_v13_0_4_get_dpm_level_count(struct smu_context *smu, static int smu_v13_0_4_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int i, size = 0, ret = 0; + int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t min, max; @@ -512,7 +512,8 @@ static int smu_v13_0_4_print_clk_levels(struct smu_context *smu, break; for (i = 0; i < count; i++) { - ret = smu_v13_0_4_get_dpm_freq_by_index(smu, clk_type, i, &value); + idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i; + ret = smu_v13_0_4_get_dpm_freq_by_index(smu, clk_type, idx, &value); if (ret) break; From c1d35412b3e826ae8119e3fb5f51dd0fa5b6b567 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Sun, 21 May 2023 10:28:05 +0800 Subject: [PATCH 306/934] drm/amd/pm: reverse mclk clocks levels for SMU v13.0.5 This patch reverses the DPM clocks levels output of pp_dpm_mclk. On dGPUs and older APUs we expose the levels from lowest clocks to highest clocks. But for some APUs, the clocks levels that from the DFPstateTable are given the reversed orders by PMFW. Like the memory DPM clocks that are exposed by pp_dpm_mclk. It's not intuitive that they are reversed on these APUs. All tools and software that talks to the driver then has to know different ways to interpret the data depending on the asic. So we need to reverse them to expose the clocks levels from the driver consistently. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c index 66445964efbd..0081fa607e02 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c @@ -866,7 +866,7 @@ out: static int smu_v13_0_5_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int i, size = 0, ret = 0; + int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t min = 0, max = 0; @@ -898,7 +898,8 @@ static int smu_v13_0_5_print_clk_levels(struct smu_context *smu, goto print_clk_out; for (i = 0; i < count; i++) { - ret = smu_v13_0_5_get_dpm_freq_by_index(smu, clk_type, i, &value); + idx = (clk_type == SMU_MCLK) ? (count - i - 1) : i; + ret = smu_v13_0_5_get_dpm_freq_by_index(smu, clk_type, idx, &value); if (ret) goto print_clk_out; From f1373a97a41f429e0095d4be388092ffa3c1a157 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Sun, 21 May 2023 10:35:59 +0800 Subject: [PATCH 307/934] drm/amd/pm: reverse mclk and fclk clocks levels for yellow carp This patch reverses the DPM clocks levels output of pp_dpm_mclk and pp_dpm_fclk. On dGPUs and older APUs we expose the levels from lowest clocks to highest clocks. But for some APUs, the clocks levels that from the DFPstateTable are given the reversed orders by PMFW. Like the memory DPM clocks that are exposed by pp_dpm_mclk. It's not intuitive that they are reversed on these APUs. All tools and software that talks to the driver then has to know different ways to interpret the data depending on the asic. So we need to reverse them to expose the clocks levels from the driver consistently. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 04e56b0b3033..798f36cfcebd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -1000,7 +1000,7 @@ out: static int yellow_carp_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int i, size = 0, ret = 0; + int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t min, max; @@ -1033,7 +1033,8 @@ static int yellow_carp_print_clk_levels(struct smu_context *smu, goto print_clk_out; for (i = 0; i < count; i++) { - ret = yellow_carp_get_dpm_freq_by_index(smu, clk_type, i, &value); + idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i; + ret = yellow_carp_get_dpm_freq_by_index(smu, clk_type, idx, &value); if (ret) goto print_clk_out; From bfc03568d9d81332382c73a1985a90c4506bd36c Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Sun, 21 May 2023 11:10:19 +0800 Subject: [PATCH 308/934] drm/amd/pm: reverse mclk and fclk clocks levels for vangogh This patch reverses the DPM clocks levels output of pp_dpm_mclk and pp_dpm_fclk. On dGPUs and older APUs we expose the levels from lowest clocks to highest clocks. But for some APUs, the clocks levels that from the DFPstateTable are given the reversed orders by PMFW. Like the memory DPM clocks that are exposed by pp_dpm_mclk. It's not intuitive that they are reversed on these APUs. All tools and software that talks to the driver then has to know different ways to interpret the data depending on the asic. So we need to reverse them to expose the clocks levels from the driver consistently. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 7433dcaa16e0..067b4e0b026c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -582,7 +582,7 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, DpmClocks_t *clk_table = smu->smu_table.clocks_table; SmuMetrics_legacy_t metrics; struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); - int i, size = 0, ret = 0; + int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; bool cur_value_match_level = false; @@ -656,7 +656,8 @@ static int vangogh_print_legacy_clk_levels(struct smu_context *smu, case SMU_MCLK: case SMU_FCLK: for (i = 0; i < count; i++) { - ret = vangogh_get_dpm_clk_limited(smu, clk_type, i, &value); + idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i; + ret = vangogh_get_dpm_clk_limited(smu, clk_type, idx, &value); if (ret) return ret; if (!value) @@ -683,7 +684,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu, DpmClocks_t *clk_table = smu->smu_table.clocks_table; SmuMetrics_t metrics; struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); - int i, size = 0, ret = 0; + int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; bool cur_value_match_level = false; uint32_t min, max; @@ -765,7 +766,8 @@ static int vangogh_print_clk_levels(struct smu_context *smu, case SMU_MCLK: case SMU_FCLK: for (i = 0; i < count; i++) { - ret = vangogh_get_dpm_clk_limited(smu, clk_type, i, &value); + idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i; + ret = vangogh_get_dpm_clk_limited(smu, clk_type, idx, &value); if (ret) return ret; if (!value) From 55e02c14f9b5fd973ba32a16a715baa42617f9c6 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 22 May 2023 23:17:28 +0800 Subject: [PATCH 309/934] drm/amd/pm: reverse mclk and fclk clocks levels for renoir This patch reverses the DPM clocks levels output of pp_dpm_mclk and pp_dpm_fclk for renoir. On dGPUs and older APUs we expose the levels from lowest clocks to highest clocks. But for some APUs, the clocks levels are given the reversed orders by PMFW. Like the memory DPM clocks that are exposed by pp_dpm_mclk. It's not intuitive that they are reversed on these APUs. All tools and software that talks to the driver then has to know different ways to interpret the data depending on the asic. So we need to reverse them to expose the clocks levels from the driver consistently. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 5cdc07165480..8a8ba25c9ad7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -494,7 +494,7 @@ static int renoir_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) static int renoir_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int i, size = 0, ret = 0; + int i, idx, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; SmuMetrics_t metrics; struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm); @@ -594,7 +594,8 @@ static int renoir_print_clk_levels(struct smu_context *smu, case SMU_VCLK: case SMU_DCLK: for (i = 0; i < count; i++) { - ret = renoir_get_dpm_clk_limited(smu, clk_type, i, &value); + idx = (clk_type == SMU_FCLK || clk_type == SMU_MCLK) ? (count - i - 1) : i; + ret = renoir_get_dpm_clk_limited(smu, clk_type, idx, &value); if (ret) return ret; if (!value) From c14fb01c4629b96b64ab54caea7e543a0239f14e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 22 May 2023 15:08:22 +0200 Subject: [PATCH 310/934] Revert "drm/amd/display: Block optimize on consecutive FAMS enables" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit ce560ac40272a5c8b5b68a9d63a75edd9e66aed2. It depends on its parent commit, which we want to revert. Acked-by: Alex Deucher Signed-off-by: Michel Dänzer [Hamza: fix a whitespace issue in dcn30_prepare_bandwidth()] Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 3 --- .../drm/amd/display/dc/dcn30/dcn30_hwseq.c | 22 +++---------------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 422fbf79da64..6ce10fd4bb1a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2117,9 +2117,6 @@ void dcn20_optimize_bandwidth( dc_dmub_srv_p_state_delegate(dc, true, context); context->bw_ctx.bw.dcn.clk.p_state_change_support = true; - dc->clk_mgr->clks.fw_based_mclk_switching = true; - } else { - dc->clk_mgr->clks.fw_based_mclk_switching = false; } dc->clk_mgr->funcs->update_clocks( diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 8263a07f265f..f923224e85fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -983,13 +983,9 @@ void dcn30_set_disp_pattern_generator(const struct dc *dc, } void dcn30_prepare_bandwidth(struct dc *dc, - struct dc_state *context) + struct dc_state *context) { - bool p_state_change_support = context->bw_ctx.bw.dcn.clk.p_state_change_support; - /* Any transition into an FPO config should disable MCLK switching first to avoid - * driver and FW P-State synchronization issues. - */ - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { dc->optimized_required = true; context->bw_ctx.bw.dcn.clk.p_state_change_support = false; } @@ -1000,19 +996,7 @@ void dcn30_prepare_bandwidth(struct dc *dc, dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz); dcn20_prepare_bandwidth(dc, context); - /* - * enabled -> enabled: do not disable - * enabled -> disabled: disable - * disabled -> enabled: don't care - * disabled -> disabled: don't care - */ - if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) - dc_dmub_srv_p_state_delegate(dc, false, context); - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching || dc->clk_mgr->clks.fw_based_mclk_switching) { - /* After disabling P-State, restore the original value to ensure we get the correct P-State - * on the next optimize. */ - context->bw_ctx.bw.dcn.clk.p_state_change_support = p_state_change_support; - } + dc_dmub_srv_p_state_delegate(dc, false, context); } From 8e1b45c578b799510f9a01a9745a737e74f43cb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 22 May 2023 15:08:23 +0200 Subject: [PATCH 311/934] Revert "drm/amd/display: Do not set drr on pipe commit" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 474f01015ffdb74e01c2eb3584a2822c64e7b2be. Caused a regression: Samsung Odyssey Neo G9, running at 5120x1440@240/VRR, connected to Navi 21 via DisplayPort, blanks and the GPU hangs while starting the Steam game Assetto Corsa Competizione (via Proton 7.0). Example dmesg excerpt: amdgpu 0000:0c:00.0: [drm] ERROR [CRTC:82:crtc-0] flip_done timed out NMI watchdog: Watchdog detected hard LOCKUP on cpu 6 [...] RIP: 0010:amdgpu_device_rreg.part.0+0x2f/0xf0 [amdgpu] Code: 41 54 44 8d 24 b5 00 00 00 00 55 89 f5 53 48 89 fb 4c 3b a7 60 0b 00 00 73 6a 83 e2 02 74 29 4c 03 a3 68 0b 00 00 45 8b 24 24 <48> 8b 43 08 0f b7 70 3e 66 90 44 89 e0 5b 5d 41 5c 31 d2 31 c9 31 RSP: 0000:ffffb39a119dfb88 EFLAGS: 00000086 RAX: ffffffffc0eb96a0 RBX: ffff9e7963dc0000 RCX: 0000000000007fff RDX: 0000000000000000 RSI: 0000000000004ff6 RDI: ffff9e7963dc0000 RBP: 0000000000004ff6 R08: ffffb39a119dfc40 R09: 0000000000000010 R10: ffffb39a119dfc40 R11: ffffb39a119dfc44 R12: 00000000000e05ae R13: 0000000000000000 R14: ffff9e7963dc0010 R15: 0000000000000000 FS: 000000001012f6c0(0000) GS:ffff9e805eb80000(0000) knlGS:000000007fd40000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000461ca000 CR3: 00000002a8a20000 CR4: 0000000000350ee0 Call Trace: dm_read_reg_func+0x37/0xc0 [amdgpu] generic_reg_get2+0x22/0x60 [amdgpu] optc1_get_crtc_scanoutpos+0x6a/0xc0 [amdgpu] dc_stream_get_scanoutpos+0x74/0x90 [amdgpu] dm_crtc_get_scanoutpos+0x82/0xf0 [amdgpu] amdgpu_display_get_crtc_scanoutpos+0x91/0x190 [amdgpu] ? dm_read_reg_func+0x37/0xc0 [amdgpu] amdgpu_get_vblank_counter_kms+0xb4/0x1a0 [amdgpu] dm_pflip_high_irq+0x213/0x2f0 [amdgpu] amdgpu_dm_irq_handler+0x8a/0x200 [amdgpu] amdgpu_irq_dispatch+0xd4/0x220 [amdgpu] amdgpu_ih_process+0x7f/0x110 [amdgpu] amdgpu_irq_handler+0x1f/0x70 [amdgpu] __handle_irq_event_percpu+0x46/0x1b0 handle_irq_event+0x34/0x80 handle_edge_irq+0x9f/0x240 __common_interrupt+0x66/0x110 common_interrupt+0x5c/0xd0 asm_common_interrupt+0x22/0x40 Reviewed-by: Aurabindo Pillai Acked-by: Alex Deucher Signed-off-by: Michel Dänzer Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 6 ------ drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 7 ------- 2 files changed, 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 6ce10fd4bb1a..5403e9399a46 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2113,12 +2113,6 @@ void dcn20_optimize_bandwidth( if (hubbub->funcs->program_compbuf_size) hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true); - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { - dc_dmub_srv_p_state_delegate(dc, - true, context); - context->bw_ctx.bw.dcn.clk.p_state_change_support = true; - } - dc->clk_mgr->funcs->update_clocks( dc->clk_mgr, context, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index f923224e85fc..32121db2851e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -985,18 +985,11 @@ void dcn30_set_disp_pattern_generator(const struct dc *dc, void dcn30_prepare_bandwidth(struct dc *dc, struct dc_state *context) { - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { - dc->optimized_required = true; - context->bw_ctx.bw.dcn.clk.p_state_change_support = false; - } - if (dc->clk_mgr->dc_mode_softmax_enabled) if (dc->clk_mgr->clks.dramclk_khz <= dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000 && context->bw_ctx.bw.dcn.clk.dramclk_khz > dc->clk_mgr->bw_params->dc_mode_softmax_memclk * 1000) dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz); dcn20_prepare_bandwidth(dc, context); - - dc_dmub_srv_p_state_delegate(dc, false, context); } From ac1d8e2f074d9bffc2d368ad0720cdbb4c938fa5 Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Mon, 15 May 2023 21:45:51 -0400 Subject: [PATCH 312/934] drm/amdgpu: separate ras irq from vcn instance irq for UVD_POISON Separate vcn RAS poison consumption handling from the instance irq, and register dedicated ras_poison_irq src and funcs for UVD_POISON. v2: - Separate ras irq from vcn instance irq - Improve the subject and code comments v3: - Split the patch into three parts - Improve the code comments Suggested-by: Hawking Zhang Signed-off-by: Horatio Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 27 ++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 3 +++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index e63fcc58e8e0..2d94f1b63bd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1181,6 +1181,31 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, return 0; } +int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r, i; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + if (amdgpu_ras_is_supported(adev, ras_block->block)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + r = amdgpu_irq_get(adev, &adev->vcn.inst[i].ras_poison_irq, 0); + if (r) + goto late_fini; + } + } + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + return r; +} + int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev) { int err; @@ -1202,7 +1227,7 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev) adev->vcn.ras_if = &ras->ras_block.ras_comm; if (!ras->ras_block.ras_late_init) - ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + ras->ras_block.ras_late_init = amdgpu_vcn_ras_late_init; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index c730949ece7d..f1397ef66fd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -234,6 +234,7 @@ struct amdgpu_vcn_inst { struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; atomic_t sched_score; struct amdgpu_irq_src irq; + struct amdgpu_irq_src ras_poison_irq; struct amdgpu_vcn_reg external; struct amdgpu_bo *dpg_sram_bo; struct dpg_pause_state pause_state; @@ -400,6 +401,8 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block); int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev); #endif From 6889f28c736c357700f5755fed852a2badc15a7b Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Mon, 15 May 2023 21:52:00 -0400 Subject: [PATCH 313/934] drm/amdgpu: add RAS POISON interrupt funcs for vcn_v2_6 Add ras_poison_irq and functions. Suggested-by: Hawking Zhang Signed-off-by: Horatio Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index ab0b45d0ead1..515681c88dcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -143,7 +143,7 @@ static int vcn_v2_5_sw_init(void *handle) /* VCN POISON TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], - VCN_2_6__SRCID_UVD_POISON, &adev->vcn.inst[j].irq); + VCN_2_6__SRCID_UVD_POISON, &adev->vcn.inst[j].ras_poison_irq); if (r) return r; } @@ -354,6 +354,9 @@ static int vcn_v2_5_hw_fini(void *handle) (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, mmUVD_STATUS))) vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0); } return 0; @@ -1807,6 +1810,14 @@ static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev, return 0; } +static int vcn_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -1837,9 +1848,6 @@ static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); break; - case VCN_2_6__SRCID_UVD_POISON: - amdgpu_vcn_process_poison_irq(adev, source, entry); - break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -1854,6 +1862,11 @@ static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = { .process = vcn_v2_5_process_interrupt, }; +static const struct amdgpu_irq_src_funcs vcn_v2_6_ras_irq_funcs = { + .set = vcn_v2_6_set_ras_interrupt_state, + .process = amdgpu_vcn_process_poison_irq, +}; + static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) { int i; @@ -1863,6 +1876,9 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) continue; adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; + + adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v2_6_ras_irq_funcs; } } @@ -1965,6 +1981,7 @@ const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = { static struct amdgpu_vcn_ras vcn_v2_6_ras = { .ras_block = { .hw_ops = &vcn_v2_6_ras_hw_ops, + .ras_late_init = amdgpu_vcn_ras_late_init, }, }; From 020c76d983151f6f6c9493a3bbe83c1ec927617a Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Mon, 15 May 2023 22:09:43 -0400 Subject: [PATCH 314/934] drm/amdgpu: add RAS POISON interrupt funcs for vcn_v4_0 Add ras_poison_irq and functions. And fix the amdgpu_irq_put call trace in vcn_v4_0_hw_fini. [ 44.563572] RIP: 0010:amdgpu_irq_put+0xa4/0xc0 [amdgpu] [ 44.563629] RSP: 0018:ffffb36740edfc90 EFLAGS: 00010246 [ 44.563630] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 [ 44.563630] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 44.563631] RBP: ffffb36740edfcb0 R08: 0000000000000000 R09: 0000000000000000 [ 44.563631] R10: 0000000000000000 R11: 0000000000000000 R12: ffff954c568e2ea8 [ 44.563631] R13: 0000000000000000 R14: ffff954c568c0000 R15: ffff954c568e2ea8 [ 44.563632] FS: 0000000000000000(0000) GS:ffff954f584c0000(0000) knlGS:0000000000000000 [ 44.563632] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 44.563633] CR2: 00007f028741ba70 CR3: 000000026ca10000 CR4: 0000000000750ee0 [ 44.563633] PKRU: 55555554 [ 44.563633] Call Trace: [ 44.563634] [ 44.563634] vcn_v4_0_hw_fini+0x62/0x160 [amdgpu] [ 44.563700] vcn_v4_0_suspend+0x13/0x30 [amdgpu] [ 44.563755] amdgpu_device_ip_suspend_phase2+0x240/0x470 [amdgpu] [ 44.563806] amdgpu_device_ip_suspend+0x41/0x80 [amdgpu] [ 44.563858] amdgpu_device_pre_asic_reset+0xd9/0x4a0 [amdgpu] [ 44.563909] amdgpu_device_gpu_recover.cold+0x548/0xcf1 [amdgpu] [ 44.564006] amdgpu_debugfs_reset_work+0x4c/0x80 [amdgpu] [ 44.564061] process_one_work+0x21f/0x400 [ 44.564062] worker_thread+0x200/0x3f0 [ 44.564063] ? process_one_work+0x400/0x400 [ 44.564064] kthread+0xee/0x120 [ 44.564065] ? kthread_complete_and_exit+0x20/0x20 [ 44.564066] ret_from_fork+0x22/0x30 Suggested-by: Hawking Zhang Signed-off-by: Horatio Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 36 ++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index bf0674039598..e5fd1e00914d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -139,7 +139,7 @@ static int vcn_v4_0_sw_init(void *handle) /* VCN POISON TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); + VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].ras_poison_irq); if (r) return r; @@ -305,8 +305,8 @@ static int vcn_v4_0_hw_fini(void *handle) vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); } } - - amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) + amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0); } return 0; @@ -1975,6 +1975,24 @@ static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgp return 0; } +/** + * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @type: interrupt types + * @state: interrupt states + * + * Set VCN block RAS interrupt state + */ +static int vcn_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + /** * vcn_v4_0_process_interrupt - process VCN block interrupt * @@ -2007,9 +2025,6 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; - case VCN_4_0__SRCID_UVD_POISON: - amdgpu_vcn_process_poison_irq(adev, source, entry); - break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -2024,6 +2039,11 @@ static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = { .process = vcn_v4_0_process_interrupt, }; +static const struct amdgpu_irq_src_funcs vcn_v4_0_ras_irq_funcs = { + .set = vcn_v4_0_set_ras_interrupt_state, + .process = amdgpu_vcn_process_poison_irq, +}; + /** * vcn_v4_0_set_irq_funcs - set VCN block interrupt irq functions * @@ -2041,6 +2061,9 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; adev->vcn.inst[i].irq.funcs = &vcn_v4_0_irq_funcs; + + adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v4_0_ras_irq_funcs; } } @@ -2114,6 +2137,7 @@ const struct amdgpu_ras_block_hw_ops vcn_v4_0_ras_hw_ops = { static struct amdgpu_vcn_ras vcn_v4_0_ras = { .ras_block = { .hw_ops = &vcn_v4_0_ras_hw_ops, + .ras_late_init = amdgpu_vcn_ras_late_init, }, }; From ce784421a3e15fd89d5fc1b9da7d846dd8309661 Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Mon, 15 May 2023 22:57:19 -0400 Subject: [PATCH 315/934] drm/amdgpu: separate ras irq from jpeg instance irq for UVD_POISON Separate jpegbRAS poison consumption handling from the instance irq, and register dedicated ras_poison_irq src and funcs for UVD_POISON. v2: - Separate ras irq from jpeg instance irq - Improve the subject and code comments v3: - Split the patch into three parts - Improve the code comments Suggested-by: Hawking Zhang Signed-off-by: Horatio Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 27 +++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h | 3 +++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index b07c000fc8ba..4fa019c8aefc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -241,6 +241,31 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev, return 0; } +int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r, i; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + if (amdgpu_ras_is_supported(adev, ras_block->block)) { + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + r = amdgpu_irq_get(adev, &adev->jpeg.inst[i].ras_poison_irq, 0); + if (r) + goto late_fini; + } + } + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + return r; +} + int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev) { int err; @@ -262,7 +287,7 @@ int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev) adev->jpeg.ras_if = &ras->ras_block.ras_comm; if (!ras->ras_block.ras_late_init) - ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + ras->ras_block.ras_late_init = amdgpu_jpeg_ras_late_init; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 0ca76f0f23e9..1471a1ebb034 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -38,6 +38,7 @@ struct amdgpu_jpeg_reg{ struct amdgpu_jpeg_inst { struct amdgpu_ring ring_dec; struct amdgpu_irq_src irq; + struct amdgpu_irq_src ras_poison_irq; struct amdgpu_jpeg_reg external; }; @@ -72,6 +73,8 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block); int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev); #endif /*__AMDGPU_JPEG_H__*/ From 30b2d778f629d51e2ff30beb6d060a0bd7f70104 Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Mon, 15 May 2023 22:59:54 -0400 Subject: [PATCH 316/934] drm/amdgpu: add RAS POISON interrupt funcs for jpeg_v2_6 Add ras_poison_irq and functions. Suggested-by: Hawking Zhang Signed-off-by: Horatio Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 28 ++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index b040f51d9aa9..73e0dc5a10cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -102,13 +102,13 @@ static int jpeg_v2_5_sw_init(void *handle) /* JPEG DJPEG POISON EVENT */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], - VCN_2_6__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].irq); + VCN_2_6__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].ras_poison_irq); if (r) return r; /* JPEG EJPEG POISON EVENT */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], - VCN_2_6__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].irq); + VCN_2_6__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].ras_poison_irq); if (r) return r; } @@ -221,6 +221,9 @@ static int jpeg_v2_5_hw_fini(void *handle) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS)) jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) + amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0); } return 0; @@ -569,6 +572,14 @@ static int jpeg_v2_5_set_interrupt_state(struct amdgpu_device *adev, return 0; } +static int jpeg_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -593,10 +604,6 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev, case VCN_2_0__SRCID__JPEG_DECODE: amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec); break; - case VCN_2_6__SRCID_DJPEG0_POISON: - case VCN_2_6__SRCID_EJPEG0_POISON: - amdgpu_jpeg_process_poison_irq(adev, source, entry); - break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -725,6 +732,11 @@ static const struct amdgpu_irq_src_funcs jpeg_v2_5_irq_funcs = { .process = jpeg_v2_5_process_interrupt, }; +static const struct amdgpu_irq_src_funcs jpeg_v2_6_ras_irq_funcs = { + .set = jpeg_v2_6_set_ras_interrupt_state, + .process = amdgpu_jpeg_process_poison_irq, +}; + static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev) { int i; @@ -735,6 +747,9 @@ static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev) adev->jpeg.inst[i].irq.num_types = 1; adev->jpeg.inst[i].irq.funcs = &jpeg_v2_5_irq_funcs; + + adev->jpeg.inst[i].ras_poison_irq.num_types = 1; + adev->jpeg.inst[i].ras_poison_irq.funcs = &jpeg_v2_6_ras_irq_funcs; } } @@ -800,6 +815,7 @@ const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = { static struct amdgpu_jpeg_ras jpeg_v2_6_ras = { .ras_block = { .hw_ops = &jpeg_v2_6_ras_hw_ops, + .ras_late_init = amdgpu_jpeg_ras_late_init, }, }; From bc3e1d60f933f823599376f830eb99451afb995a Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Mon, 15 May 2023 23:02:10 -0400 Subject: [PATCH 317/934] drm/amdgpu: add RAS POISON interrupt funcs for jpeg_v4_0 Add ras_poison_irq and functions. And fix the amdgpu_irq_put call trace in jpeg_v4_0_hw_fini. [ 50.497562] RIP: 0010:amdgpu_irq_put+0xa4/0xc0 [amdgpu] [ 50.497619] RSP: 0018:ffffaa2400fcfcb0 EFLAGS: 00010246 [ 50.497620] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 [ 50.497621] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 50.497621] RBP: ffffaa2400fcfcd0 R08: 0000000000000000 R09: 0000000000000000 [ 50.497622] R10: 0000000000000000 R11: 0000000000000000 R12: ffff99b2105242d8 [ 50.497622] R13: 0000000000000000 R14: ffff99b210500000 R15: ffff99b210500000 [ 50.497623] FS: 0000000000000000(0000) GS:ffff99b518480000(0000) knlGS:0000000000000000 [ 50.497623] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 50.497624] CR2: 00007f9d32aa91e8 CR3: 00000001ba210000 CR4: 0000000000750ee0 [ 50.497624] PKRU: 55555554 [ 50.497625] Call Trace: [ 50.497625] [ 50.497627] jpeg_v4_0_hw_fini+0x43/0xc0 [amdgpu] [ 50.497693] jpeg_v4_0_suspend+0x13/0x30 [amdgpu] [ 50.497751] amdgpu_device_ip_suspend_phase2+0x240/0x470 [amdgpu] [ 50.497802] amdgpu_device_ip_suspend+0x41/0x80 [amdgpu] [ 50.497854] amdgpu_device_pre_asic_reset+0xd9/0x4a0 [amdgpu] [ 50.497905] amdgpu_device_gpu_recover.cold+0x548/0xcf1 [amdgpu] [ 50.498005] amdgpu_debugfs_reset_work+0x4c/0x80 [amdgpu] [ 50.498060] process_one_work+0x21f/0x400 [ 50.498063] worker_thread+0x200/0x3f0 [ 50.498064] ? process_one_work+0x400/0x400 [ 50.498065] kthread+0xee/0x120 [ 50.498067] ? kthread_complete_and_exit+0x20/0x20 [ 50.498068] ret_from_fork+0x22/0x30 Suggested-by: Hawking Zhang Signed-off-by: Horatio Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 28 +++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 77e1e64aa1d1..a3d83c9f2c9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -87,13 +87,13 @@ static int jpeg_v4_0_sw_init(void *handle) /* JPEG DJPEG POISON EVENT */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->irq); + VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq); if (r) return r; /* JPEG EJPEG POISON EVENT */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->irq); + VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->ras_poison_irq); if (r) return r; @@ -202,7 +202,8 @@ static int jpeg_v4_0_hw_fini(void *handle) RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); } - amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) + amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0); return 0; } @@ -670,6 +671,14 @@ static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev, return 0; } +static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -680,10 +689,6 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev, case VCN_4_0__SRCID__JPEG_DECODE: amdgpu_fence_process(&adev->jpeg.inst->ring_dec); break; - case VCN_4_0__SRCID_DJPEG0_POISON: - case VCN_4_0__SRCID_EJPEG0_POISON: - amdgpu_jpeg_process_poison_irq(adev, source, entry); - break; default: DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -753,10 +758,18 @@ static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = { .process = jpeg_v4_0_process_interrupt, }; +static const struct amdgpu_irq_src_funcs jpeg_v4_0_ras_irq_funcs = { + .set = jpeg_v4_0_set_ras_interrupt_state, + .process = amdgpu_jpeg_process_poison_irq, +}; + static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev) { adev->jpeg.inst->irq.num_types = 1; adev->jpeg.inst->irq.funcs = &jpeg_v4_0_irq_funcs; + + adev->jpeg.inst->ras_poison_irq.num_types = 1; + adev->jpeg.inst->ras_poison_irq.funcs = &jpeg_v4_0_ras_irq_funcs; } const struct amdgpu_ip_block_version jpeg_v4_0_ip_block = { @@ -811,6 +824,7 @@ const struct amdgpu_ras_block_hw_ops jpeg_v4_0_ras_hw_ops = { static struct amdgpu_jpeg_ras jpeg_v4_0_ras = { .ras_block = { .hw_ops = &jpeg_v4_0_ras_hw_ops, + .ras_late_init = amdgpu_jpeg_ras_late_init, }, }; From e490d60a2f76bff636c68ce4fe34c1b6c34bbd86 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Wed, 24 May 2023 15:03:02 +0800 Subject: [PATCH 318/934] drm/amd/pm: resolve reboot exception for si oland During reboot test on arm64 platform, it may failure on boot. The error message are as follows: [ 1.706570][ 3] [ T273] [drm:si_thermal_enable_alert [amdgpu]] *ERROR* Could not enable thermal interrupts. [ 1.716547][ 3] [ T273] [drm:amdgpu_device_ip_late_init [amdgpu]] *ERROR* late_init of IP block failed -22 [ 1.727064][ 3] [ T273] amdgpu 0000:02:00.0: amdgpu_device_ip_late_init failed [ 1.734367][ 3] [ T273] amdgpu 0000:02:00.0: Fatal error during GPU init v2: squash in built warning fix (Alex) Signed-off-by: Zhenneng Li Reviewed-by: Guchun Chen Signed-off-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c | 29 ---------------------- 1 file changed, 29 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index d6d9e3b1b2c0..02e69ccff3ba 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -6925,23 +6925,6 @@ static int si_dpm_enable(struct amdgpu_device *adev) return 0; } -static int si_set_temperature_range(struct amdgpu_device *adev) -{ - int ret; - - ret = si_thermal_enable_alert(adev, false); - if (ret) - return ret; - ret = si_thermal_set_temperature_range(adev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX); - if (ret) - return ret; - ret = si_thermal_enable_alert(adev, true); - if (ret) - return ret; - - return ret; -} - static void si_dpm_disable(struct amdgpu_device *adev) { struct rv7xx_power_info *pi = rv770_get_pi(adev); @@ -7626,18 +7609,6 @@ static int si_dpm_process_interrupt(struct amdgpu_device *adev, static int si_dpm_late_init(void *handle) { - int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (!adev->pm.dpm_enabled) - return 0; - - ret = si_set_temperature_range(adev); - if (ret) - return ret; -#if 0 //TODO ? - si_dpm_powergate_uvd(adev, true); -#endif return 0; } From 663b930e24842f3d3bb79418bb5cd8d01b40c559 Mon Sep 17 00:00:00 2001 From: Ikshwaku Chauhan Date: Thu, 25 May 2023 10:57:26 +0530 Subject: [PATCH 319/934] drm/amdgpu: enable tmz by default for GC 11.0.1 Add IP GC 11.0.1 in the list of target to have tmz enabled by default. Signed-off-by: Ikshwaku Chauhan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 4e2531758866..95b0f984acbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -593,6 +593,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(9, 3, 0): /* GC 10.3.7 */ case IP_VERSION(10, 3, 7): + /* GC 11.0.1 */ + case IP_VERSION(11, 0, 1): if (amdgpu_tmz == 0) { adev->gmc.tmz_enabled = false; dev_info(adev->dev, @@ -616,7 +618,6 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): - case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): /* Don't enable it by default yet. */ From 1c4c769cdf682c63d3b10cb241f4a96ebad2f215 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 30 May 2023 11:59:34 +0300 Subject: [PATCH 320/934] net/mlx5: Remove rmap also in case dynamic MSIX not supported mlx5 add IRQs to rmap upon MSIX request, and mlx5 remove rmap from MSIX only if msi_map.index is populated. However, msi_map.index is populated only when dynamic MSIX is supported. This results in freeing IRQs without removing them from rmap, which triggers the bellow WARN_ON[1]. rmap is a feature which have no relation to dynamic MSIX. Hence, remove the check of msi_map.index when removing IRQ from rmap. [1] [ 200.307160 ] WARNING: CPU: 20 PID: 1702 at kernel/irq/manage.c:2034 free_irq+0x2ac/0x358 [ 200.316990 ] CPU: 20 PID: 1702 Comm: modprobe Not tainted 6.4.0-rc3_for_upstream_min_debug_2023_05_24_14_02 #1 [ 200.318939 ] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 200.321659 ] pc : free_irq+0x2ac/0x358 [ 200.322400 ] lr : free_irq+0x20/0x358 [ 200.337865 ] Call trace: [ 200.338360 ] free_irq+0x2ac/0x358 [ 200.339029 ] irq_release+0x58/0xd0 [mlx5_core] [ 200.340093 ] mlx5_irqs_release_vectors+0x80/0xb0 [mlx5_core] [ 200.341344 ] destroy_comp_eqs+0x120/0x170 [mlx5_core] [ 200.342469 ] mlx5_eq_table_destroy+0x1c/0x38 [mlx5_core] [ 200.343645 ] mlx5_unload+0x8c/0xc8 [mlx5_core] [ 200.344652 ] mlx5_uninit_one+0x78/0x118 [mlx5_core] [ 200.345745 ] remove_one+0x80/0x108 [mlx5_core] [ 200.346752 ] pci_device_remove+0x40/0xd8 [ 200.347554 ] device_remove+0x50/0x88 [ 200.348272 ] device_release_driver_internal+0x1c4/0x228 [ 200.349312 ] driver_detach+0x54/0xa0 [ 200.350030 ] bus_remove_driver+0x74/0x100 [ 200.350833 ] driver_unregister+0x34/0x68 [ 200.351619 ] pci_unregister_driver+0x28/0xa0 [ 200.352476 ] mlx5_cleanup+0x14/0x2210 [mlx5_core] [ 200.353536 ] __arm64_sys_delete_module+0x190/0x2e8 [ 200.354495 ] el0_svc_common.constprop.0+0x6c/0x1d0 [ 200.355455 ] do_el0_svc+0x38/0x98 [ 200.356122 ] el0_svc+0x1c/0x80 [ 200.356739 ] el0t_64_sync_handler+0xb4/0x130 [ 200.357604 ] el0t_64_sync+0x174/0x178 [ 200.358345 ] ---[ end trace 0000000000000000 ]--- Fixes: 3354822cde5a ("net/mlx5: Use dynamic msix vectors allocation") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index db5687d9fec9..86ac4a85fd87 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -141,7 +141,7 @@ static void irq_release(struct mlx5_irq *irq) irq_update_affinity_hint(irq->map.virq, NULL); #ifdef CONFIG_RFS_ACCEL rmap = mlx5_eq_table_get_rmap(pool->dev); - if (rmap && irq->map.index) + if (rmap) irq_cpu_rmap_remove(rmap, irq->map.virq); #endif From 8764bd0fa5d402c51b136f6aeaba20fc16961ba1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 31 May 2023 10:48:56 +0200 Subject: [PATCH 321/934] net/mlx5: Fix setting of irq->map.index for static IRQ case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dynamic IRQ allocation is not supported all IRQs are allocated up front in mlx5_irq_table_create() instead of dynamically as part of mlx5_irq_alloc(). In the latter dynamic case irq->map.index is set via the mapping returned by pci_msix_alloc_irq_at(). In the static case and prior to commit 1da438c0ae02 ("net/mlx5: Fix indexing of mlx5_irq") irq->map.index was set in mlx5_irq_alloc() twice once initially to 0 and then to the requested index before storing in the xarray. After this commit it is only set to 0 which breaks all other IRQ mappings. Fix this by setting irq->map.index to the requested index together with irq->map.virq and improve the related comment to make it clearer which cases it deals with. Cc: Chuck Lever III Tested-by: Mark Brown Reviewed-by: Mark Brown Reviewed-by: Simon Horman Reviewed-by: Eli Cohen Fixes: 1da438c0ae02 ("net/mlx5: Fix indexing of mlx5_irq") Signed-off-by: Niklas Schnelle Tested-by: Cédric Le Goater Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 86ac4a85fd87..38edd485ba6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -232,12 +232,13 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, if (!irq) return ERR_PTR(-ENOMEM); if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { - /* The vector at index 0 was already allocated. - * Just get the irq number. If dynamic irq is not supported - * vectors have also been allocated. + /* The vector at index 0 is always statically allocated. If + * dynamic irq is not supported all vectors are statically + * allocated. In both cases just get the irq number and set + * the index. */ irq->map.virq = pci_irq_vector(dev->pdev, i); - irq->map.index = 0; + irq->map.index = i; } else { irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc); if (!irq->map.virq) { From 368591995d010e639ad8f28b27f1b721f0872342 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 31 May 2023 15:48:25 -0400 Subject: [PATCH 322/934] net/mlx5: Ensure af_desc.mask is properly initialized [ 9.837087] mlx5_core 0000:02:00.0: firmware version: 16.35.2000 [ 9.843126] mlx5_core 0000:02:00.0: 126.016 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x16 link) [ 10.311515] mlx5_core 0000:02:00.0: Rate limit: 127 rates are supported, range: 0Mbps to 97656Mbps [ 10.321948] mlx5_core 0000:02:00.0: E-Switch: Total vports 2, per vport: max uc(128) max mc(2048) [ 10.344324] mlx5_core 0000:02:00.0: mlx5_pcie_event:301:(pid 88): PCIe slot advertised sufficient power (27W). [ 10.354339] BUG: unable to handle page fault for address: ffffffff8ff0ade0 [ 10.361206] #PF: supervisor read access in kernel mode [ 10.366335] #PF: error_code(0x0000) - not-present page [ 10.371467] PGD 81ec39067 P4D 81ec39067 PUD 81ec3a063 PMD 114b07063 PTE 800ffff7e10f5062 [ 10.379544] Oops: 0000 [#1] PREEMPT SMP PTI [ 10.383721] CPU: 0 PID: 117 Comm: kworker/0:6 Not tainted 6.3.0-13028-g7222f123c983 #1 [ 10.391625] Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.0b 06/12/2017 [ 10.398750] Workqueue: events work_for_cpu_fn [ 10.403108] RIP: 0010:__bitmap_or+0x10/0x26 [ 10.407286] Code: 85 c0 0f 95 c0 c3 cc cc cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 89 c9 31 c0 48 83 c1 3f 48 c1 e9 06 39 c> [ 10.426024] RSP: 0000:ffffb45a0078f7b0 EFLAGS: 00010097 [ 10.431240] RAX: 0000000000000000 RBX: ffffffff8ff0adc0 RCX: 0000000000000004 [ 10.438365] RDX: ffff9156801967d0 RSI: ffffffff8ff0ade0 RDI: ffff9156801967b0 [ 10.445489] RBP: ffffb45a0078f7e8 R08: 0000000000000030 R09: 0000000000000000 [ 10.452613] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000000ec [ 10.459737] R13: ffffffff8ff0ade0 R14: 0000000000000001 R15: 0000000000000020 [ 10.466862] FS: 0000000000000000(0000) GS:ffff9165bfc00000(0000) knlGS:0000000000000000 [ 10.474936] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 10.480674] CR2: ffffffff8ff0ade0 CR3: 00000001011ae003 CR4: 00000000003706f0 [ 10.487800] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 10.494922] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 10.502046] Call Trace: [ 10.504493] [ 10.506589] ? matrix_alloc_area.constprop.0+0x43/0x9a [ 10.511729] ? prepare_namespace+0x84/0x174 [ 10.515914] irq_matrix_reserve_managed+0x56/0x10c [ 10.520699] x86_vector_alloc_irqs+0x1d2/0x31e [ 10.525146] irq_domain_alloc_irqs_hierarchy+0x39/0x3f [ 10.530284] irq_domain_alloc_irqs_parent+0x1a/0x2a [ 10.535155] intel_irq_remapping_alloc+0x59/0x5e9 [ 10.539859] ? kmem_cache_debug_flags+0x11/0x26 [ 10.544383] ? __radix_tree_lookup+0x39/0xb9 [ 10.548649] irq_domain_alloc_irqs_hierarchy+0x39/0x3f [ 10.553779] irq_domain_alloc_irqs_parent+0x1a/0x2a [ 10.558650] msi_domain_alloc+0x8c/0x120 [ 10.567697] irq_domain_alloc_irqs_locked+0x11d/0x286 [ 10.572741] __irq_domain_alloc_irqs+0x72/0x93 [ 10.577179] __msi_domain_alloc_irqs+0x193/0x3f1 [ 10.581789] ? __xa_alloc+0xcf/0xe2 [ 10.585273] msi_domain_alloc_irq_at+0xa8/0xfe [ 10.589711] pci_msix_alloc_irq_at+0x47/0x5c The crash is due to matrix_alloc_area() attempting to access per-CPU memory for CPUs that are not present on the system. The CPU mask passed into reserve_managed_vector() via it's @irqd parameter is corrupted because it contains uninitialized stack data. Fixes: bbac70c74183 ("net/mlx5: Use newer affinity descriptor") Reviewed-by: Thomas Gleixner Signed-off-by: Chuck Lever Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 38edd485ba6f..843da89a9035 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -571,11 +571,11 @@ int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, af_desc.is_managed = false; for (i = 0; i < nirqs; i++) { + cpumask_clear(&af_desc.mask); cpumask_set_cpu(cpus[i], &af_desc.mask); irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap); if (IS_ERR(irq)) break; - cpumask_clear(&af_desc.mask); irqs[i] = irq; } From b6193d7030e3c59f1d4c75648c9c8fa40cad2bcd Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 27 May 2023 23:07:08 -0700 Subject: [PATCH 323/934] net/mlx5e: Fix error handling in mlx5e_refresh_tirs Allocation failure is outside the critical lock section and should return immediately rather than jumping to the unlock section. Also unlock as soon as required and remove the now redundant jump label. Fixes: 80a2a9026b24 ("net/mlx5e: Add a lock on tir list") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 1f90594499c6..41c396e76457 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -150,10 +150,8 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - err = -ENOMEM; - goto out; - } + if (!in) + return -ENOMEM; if (enable_uc_lb) lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; @@ -171,14 +169,13 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, tirn = tir->tirn; err = mlx5_core_modify_tir(mdev, tirn, in); if (err) - goto out; + break; } + mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); -out: kvfree(in); if (err) netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); - mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); return err; } From bbfa4b58997e3d38ba629c9f6fc0bd1c163aaf43 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 28 Apr 2023 13:48:13 +0300 Subject: [PATCH 324/934] net/mlx5: Read embedded cpu after init bit cleared During driver load it reads embedded_cpu bit from initialization segment, but the initialization segment is readable only after initialization bit is cleared. Move the call to mlx5_read_embedded_cpu() right after initialization bit cleared. Signed-off-by: Moshe Shemesh Fixes: 591905ba9679 ("net/mlx5: Introduce Mellanox SmartNIC and modify page management logic") Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2132a6510639..d6ee016deae1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -923,7 +923,6 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, } mlx5_pci_vsc_init(dev); - dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); return 0; err_clr_master: @@ -1155,6 +1154,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout goto err_cmd_cleanup; } + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); mlx5_start_health_poll(dev); From 622ab656344a288acf4fb03d628c3bb5dd241f34 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 30 May 2023 21:25:27 +0100 Subject: [PATCH 325/934] sfc: fix error unwinds in TC offload Failure ladders weren't exactly unwinding what the function had done up to that point; most seriously, when we encountered an already offloaded rule, the failure path tried to remove the new rule from the hashtable, which would in fact remove the already-present 'old' rule (since it has the same key) from the table, and leak its resources. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202305200745.xmIlkqjH-lkp@intel.com/ Fixes: d902e1a737d4 ("sfc: bare bones TC offload on EF100") Fixes: 17654d84b47c ("sfc: add offloading of 'foreign' TC (decap) rules") Signed-off-by: Edward Cree Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230530202527.53115-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 0327639a628a..c004443c1d58 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -624,13 +624,12 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, if (!found) { /* We don't care. */ netif_dbg(efx, drv, efx->net_dev, "Ignoring foreign filter that doesn't egdev us\n"); - rc = -EOPNOTSUPP; - goto release; + return -EOPNOTSUPP; } rc = efx_mae_match_check_caps(efx, &match.mask, NULL); if (rc) - goto release; + return rc; if (efx_tc_match_is_encap(&match.mask)) { enum efx_encap_type type; @@ -639,8 +638,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, if (type == EFX_ENCAP_TYPE_NONE) { NL_SET_ERR_MSG_MOD(extack, "Egress encap match on unsupported tunnel device"); - rc = -EOPNOTSUPP; - goto release; + return -EOPNOTSUPP; } rc = efx_mae_check_encap_type_supported(efx, type); @@ -648,25 +646,24 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, NL_SET_ERR_MSG_FMT_MOD(extack, "Firmware reports no support for %s encap match", efx_tc_encap_type_name(type)); - goto release; + return rc; } rc = efx_tc_flower_record_encap_match(efx, &match, type, extack); if (rc) - goto release; + return rc; } else { /* This is not a tunnel decap rule, ignore it */ netif_dbg(efx, drv, efx->net_dev, "Ignoring foreign filter without encap match\n"); - rc = -EOPNOTSUPP; - goto release; + return -EOPNOTSUPP; } rule = kzalloc(sizeof(*rule), GFP_USER); if (!rule) { rc = -ENOMEM; - goto release; + goto out_free; } INIT_LIST_HEAD(&rule->acts.list); rule->cookie = tc->cookie; @@ -678,7 +675,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, "Ignoring already-offloaded rule (cookie %lx)\n", tc->cookie); rc = -EEXIST; - goto release; + goto out_free; } act = kzalloc(sizeof(*act), GFP_USER); @@ -843,6 +840,7 @@ release: efx_tc_match_action_ht_params); efx_tc_free_action_set_list(efx, &rule->acts, false); } +out_free: kfree(rule); if (match.encap) efx_tc_flower_release_encap_match(efx, match.encap); @@ -899,8 +897,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx, return rc; if (efx_tc_match_is_encap(&match.mask)) { NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported"); - rc = -EOPNOTSUPP; - goto release; + return -EOPNOTSUPP; } if (tc->common.chain_index) { @@ -924,9 +921,9 @@ static int efx_tc_flower_replace(struct efx_nic *efx, if (old) { netif_dbg(efx, drv, efx->net_dev, "Already offloaded rule (cookie %lx)\n", tc->cookie); - rc = -EEXIST; NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); - goto release; + kfree(rule); + return -EEXIST; } /* Parse actions */ From 62fe398761cd06a428e6f367aba84732a2f1c268 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 23 May 2023 13:40:42 -0700 Subject: [PATCH 326/934] drm/i915/perf: Clear out entire reports after reading if not power of 2 size Clearing out report id and timestamp as means to detect unlanded reports only works if report size is power of 2. That is, only when report size is a sub-multiple of the OA buffer size can we be certain that reports will land at the same place each time in the OA buffer (after rewind). If report size is not a power of 2, we need to zero out the entire report to be able to detect unlanded reports reliably. v2: Add Fixes tag (Umesh) Fixes: 1cc064dce4ed ("drm/i915/perf: Add support for OA media units") Reviewed-by: Umesh Nerlige Ramappa Reviewed-by: Lionel Landwerlin Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20230523204042.4180641-1-ashutosh.dixit@intel.com (cherry picked from commit 09a36015d9a0940214c080f95afc605c47648bbd) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_perf.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 050b8ae7b8e7..3035cba2c6a2 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -877,12 +877,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, stream->oa_buffer.last_ctx_id = ctx_id; } - /* - * Clear out the report id and timestamp as a means to detect unlanded - * reports. - */ - oa_report_id_clear(stream, report32); - oa_timestamp_clear(stream, report32); + if (is_power_of_2(report_size)) { + /* + * Clear out the report id and timestamp as a means + * to detect unlanded reports. + */ + oa_report_id_clear(stream, report32); + oa_timestamp_clear(stream, report32); + } else { + /* Zero out the entire report */ + memset(report32, 0, report_size); + } } if (start_offset != *offset) { From 3692ababa322b4d9ffbd973865bc88018e896fcd Mon Sep 17 00:00:00 2001 From: Jammy Huang Date: Thu, 1 Jun 2023 08:48:47 +0800 Subject: [PATCH 327/934] drm/ast: Fix modeset failed on DisplayPort If we switch display and update cursor together, it could lead to modeset failed because of concurrent access to IO registers. Add lock protection in DP's edid access to avoid this problem. Reviewed-by: Thomas Zimmermann Signed-off-by: Jammy Huang Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230601004847.1115-1-jammy_huang@aspeedtech.com --- drivers/gpu/drm/ast/ast_mode.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 36374828f6c8..b3c670af6ef2 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -1647,6 +1647,8 @@ static int ast_dp501_output_init(struct ast_device *ast) static int ast_astdp_connector_helper_get_modes(struct drm_connector *connector) { void *edid; + struct drm_device *dev = connector->dev; + struct ast_device *ast = to_ast_device(dev); int succ; int count; @@ -1655,9 +1657,17 @@ static int ast_astdp_connector_helper_get_modes(struct drm_connector *connector) if (!edid) goto err_drm_connector_update_edid_property; + /* + * Protect access to I/O registers from concurrent modesetting + * by acquiring the I/O-register lock. + */ + mutex_lock(&ast->ioregs_lock); + succ = ast_astdp_read_edid(connector->dev, edid); if (succ < 0) - goto err_kfree; + goto err_mutex_unlock; + + mutex_unlock(&ast->ioregs_lock); drm_connector_update_edid_property(connector, edid); count = drm_add_edid_modes(connector, edid); @@ -1665,7 +1675,8 @@ static int ast_astdp_connector_helper_get_modes(struct drm_connector *connector) return count; -err_kfree: +err_mutex_unlock: + mutex_unlock(&ast->ioregs_lock); kfree(edid); err_drm_connector_update_edid_property: drm_connector_update_edid_property(connector, NULL); From b3fc95709c54ffbe80f16801e0a792a4d2b3d55e Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 26 May 2023 16:53:59 +0800 Subject: [PATCH 328/934] iommu/mediatek: Flush IOTLB completely only if domain has been attached If an IOMMU domain was never attached, it lacks any linkage to the actual IOMMU hardware. Attempting to do flush_iotlb_all() on it will result in a NULL pointer dereference. This seems to happen after the recent IOMMU core rework in v6.4-rc1. Unable to handle kernel read from unreadable memory at virtual address 0000000000000018 Call trace: mtk_iommu_flush_iotlb_all+0x20/0x80 iommu_create_device_direct_mappings.part.0+0x13c/0x230 iommu_setup_default_domain+0x29c/0x4d0 iommu_probe_device+0x12c/0x190 of_iommu_configure+0x140/0x208 of_dma_configure_id+0x19c/0x3c0 platform_dma_configure+0x38/0x88 really_probe+0x78/0x2c0 Check if the "bank" field has been filled in before actually attempting the IOTLB flush to avoid it. The IOTLB is also flushed when the device comes out of runtime suspend, so it should have a clean initial state. Fixes: 08500c43d4f7 ("iommu/mediatek: Adjust the structure") Signed-off-by: Chen-Yu Tsai Reviewed-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230526085402.394239-1-wenst@chromium.org Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index aecc7d154f28..e93906d6e112 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -781,7 +781,8 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); - mtk_iommu_tlb_flush_all(dom->bank->parent_data); + if (dom->bank) + mtk_iommu_tlb_flush_all(dom->bank->parent_data); } static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, From 4d56304e5827c8cc8cc18c75343d283af7c4825c Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Wed, 31 May 2023 18:28:04 +0800 Subject: [PATCH 329/934] net/sched: flower: fix possible OOB write in fl_set_geneve_opt() If we send two TCA_FLOWER_KEY_ENC_OPTS_GENEVE packets and their total size is 252 bytes(key->enc_opts.len = 252) then key->enc_opts.len = opt->length = data_len / 4 = 0 when the third TCA_FLOWER_KEY_ENC_OPTS_GENEVE packet enters fl_set_geneve_opt. This bypasses the next bounds check and results in an out-of-bounds. Fixes: 0a6e77784f49 ("net/sched: allow flower to match tunnel options") Signed-off-by: Hangyu Hua Reviewed-by: Simon Horman Reviewed-by: Pieter Jansen van Vuuren Link: https://lore.kernel.org/r/20230531102805.27090-1-hbh25y@gmail.com Signed-off-by: Paolo Abeni --- net/sched/cls_flower.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 9dbc43388e57..815c3e416bc5 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1153,6 +1153,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, if (option_len > sizeof(struct geneve_opt)) data_len = option_len - sizeof(struct geneve_opt); + if (key->enc_opts.len > FLOW_DIS_TUN_OPTS_MAX - 4) + return -ERANGE; + opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len]; memset(opt, 0xff, option_len); opt->length = data_len / 4; From 30c6f0bf9579debce27e45fac34fdc97e46acacc Mon Sep 17 00:00:00 2001 From: fuyuanli Date: Wed, 31 May 2023 16:01:50 +0800 Subject: [PATCH 330/934] tcp: fix mishandling when the sack compression is deferred. In this patch, we mainly try to handle sending a compressed ack correctly if it's deferred. Here are more details in the old logic: When sack compression is triggered in the tcp_compressed_ack_kick(), if the sock is owned by user, it will set TCP_DELACK_TIMER_DEFERRED and then defer to the release cb phrase. Later once user releases the sock, tcp_delack_timer_handler() should send a ack as expected, which, however, cannot happen due to lack of ICSK_ACK_TIMER flag. Therefore, the receiver would not sent an ack until the sender's retransmission timeout. It definitely increases unnecessary latency. Fixes: 5d9f4262b7ea ("tcp: add SACK compression") Suggested-by: Eric Dumazet Signed-off-by: fuyuanli Signed-off-by: Jason Xing Link: https://lore.kernel.org/netdev/20230529113804.GA20300@didi-ThinkCentre-M920t-N000/ Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230531080150.GA20424@didi-ThinkCentre-M920t-N000 Signed-off-by: Paolo Abeni --- include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_timer.c | 16 +++++++++++++--- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 18a038d16434..5066e4586cf0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -632,6 +632,7 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); +void tcp_sack_compress_send_ack(struct sock *sk); /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 61b6710f337a..bf8b22218dd4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4530,7 +4530,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) } } -static void tcp_sack_compress_send_ack(struct sock *sk) +void tcp_sack_compress_send_ack(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b839c2f91292..39eb947fe392 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -290,9 +290,19 @@ static int tcp_write_timeout(struct sock *sk) void tcp_delack_timer_handler(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); - if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || - !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) + return; + + /* Handling the sack compression case */ + if (tp->compressed_ack) { + tcp_mstamp_refresh(tp); + tcp_sack_compress_send_ack(sk); + return; + } + + if (!(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) return; if (time_after(icsk->icsk_ack.timeout, jiffies)) { @@ -312,7 +322,7 @@ void tcp_delack_timer_handler(struct sock *sk) inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } - tcp_mstamp_refresh(tcp_sk(sk)); + tcp_mstamp_refresh(tp); tcp_send_ack(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS); } From be7f8012a513f5099916ee2da28420156cbb8cf3 Mon Sep 17 00:00:00 2001 From: Bert Karwatzki Date: Wed, 31 May 2023 12:36:19 +0200 Subject: [PATCH 331/934] net: ipa: Use correct value for IPA_STATUS_SIZE IPA_STATUS_SIZE was introduced in commit b8dc7d0eea5a as a replacement for the size of the removed struct ipa_status which had size sizeof(__le32[8]). Use this value as IPA_STATUS_SIZE. Fixes: b8dc7d0eea5a ("net: ipa: stop using sizeof(status)") Signed-off-by: Bert Karwatzki Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230531103618.102608-1-spasswolf@web.de Signed-off-by: Paolo Abeni --- drivers/net/ipa/ipa_endpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 2ee80ed140b7..afa1d56d9095 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -119,7 +119,7 @@ enum ipa_status_field_id { }; /* Size in bytes of an IPA packet status structure */ -#define IPA_STATUS_SIZE sizeof(__le32[4]) +#define IPA_STATUS_SIZE sizeof(__le32[8]) /* IPA status structure decoder; looks up field values for a structure */ static u32 ipa_status_extract(struct ipa *ipa, const void *data, From fd67a7a1a22ce47fcbc094c4b6e164c34c652cbe Mon Sep 17 00:00:00 2001 From: Trevor Wu Date: Thu, 1 Jun 2023 11:33:17 +0800 Subject: [PATCH 332/934] ASoC: mediatek: mt8188: fix use-after-free in driver remove path During mt8188_afe_init_clock(), mt8188_audsys_clk_register() was called followed by several other devm functions. The caller of mt8188_afe_init_clock() utilized devm_add_action_or_reset() to call mt8188_afe_deinit_clock(). However, the order was incorrect, causing a use-after-free issue during remove time. At probe time, the order of calls was: 1. mt8188_audsys_clk_register 2. afe_priv->clk = devm_kcalloc 3. afe_priv->clk[i] = devm_clk_get At remove time, the order of calls was: 1. mt8188_audsys_clk_unregister 3. free afe_priv->clk[i] 2. free afe_priv->clk To resolve the problem, it's necessary to move devm_add_action_or_reset() to the appropriate position so that the remove order can be 3->2->1. Fixes: f6b026479b13 ("ASoC: mediatek: mt8188: support audio clock control") Signed-off-by: Trevor Wu Reviewed-by: Douglas Anderson Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230601033318.10408-2-trevor.wu@mediatek.com Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8188/mt8188-afe-clk.c | 7 --- sound/soc/mediatek/mt8188/mt8188-afe-clk.h | 1 - sound/soc/mediatek/mt8188/mt8188-afe-pcm.c | 4 -- sound/soc/mediatek/mt8188/mt8188-audsys-clk.c | 47 ++++++++++--------- sound/soc/mediatek/mt8188/mt8188-audsys-clk.h | 1 - 5 files changed, 24 insertions(+), 36 deletions(-) diff --git a/sound/soc/mediatek/mt8188/mt8188-afe-clk.c b/sound/soc/mediatek/mt8188/mt8188-afe-clk.c index 743d6a162cb9..0fb97517f82c 100644 --- a/sound/soc/mediatek/mt8188/mt8188-afe-clk.c +++ b/sound/soc/mediatek/mt8188/mt8188-afe-clk.c @@ -418,13 +418,6 @@ int mt8188_afe_init_clock(struct mtk_base_afe *afe) return 0; } -void mt8188_afe_deinit_clock(void *priv) -{ - struct mtk_base_afe *afe = priv; - - mt8188_audsys_clk_unregister(afe); -} - int mt8188_afe_enable_clk(struct mtk_base_afe *afe, struct clk *clk) { int ret; diff --git a/sound/soc/mediatek/mt8188/mt8188-afe-clk.h b/sound/soc/mediatek/mt8188/mt8188-afe-clk.h index 084fdfb1d877..a4203a87a1e3 100644 --- a/sound/soc/mediatek/mt8188/mt8188-afe-clk.h +++ b/sound/soc/mediatek/mt8188/mt8188-afe-clk.h @@ -100,7 +100,6 @@ int mt8188_afe_get_mclk_source_clk_id(int sel); int mt8188_afe_get_mclk_source_rate(struct mtk_base_afe *afe, int apll); int mt8188_afe_get_default_mclk_source_by_rate(int rate); int mt8188_afe_init_clock(struct mtk_base_afe *afe); -void mt8188_afe_deinit_clock(void *priv); int mt8188_afe_enable_clk(struct mtk_base_afe *afe, struct clk *clk); void mt8188_afe_disable_clk(struct mtk_base_afe *afe, struct clk *clk); int mt8188_afe_set_clk_rate(struct mtk_base_afe *afe, struct clk *clk, diff --git a/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c b/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c index e5f9373bed56..bcf7025886df 100644 --- a/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c +++ b/sound/soc/mediatek/mt8188/mt8188-afe-pcm.c @@ -3185,10 +3185,6 @@ static int mt8188_afe_pcm_dev_probe(struct platform_device *pdev) if (ret) return dev_err_probe(dev, ret, "init clock error"); - ret = devm_add_action_or_reset(dev, mt8188_afe_deinit_clock, (void *)afe); - if (ret) - return ret; - spin_lock_init(&afe_priv->afe_ctrl_lock); mutex_init(&afe->irq_alloc_lock); diff --git a/sound/soc/mediatek/mt8188/mt8188-audsys-clk.c b/sound/soc/mediatek/mt8188/mt8188-audsys-clk.c index be1c53bf4729..c796ad8b62ee 100644 --- a/sound/soc/mediatek/mt8188/mt8188-audsys-clk.c +++ b/sound/soc/mediatek/mt8188/mt8188-audsys-clk.c @@ -138,6 +138,29 @@ static const struct afe_gate aud_clks[CLK_AUD_NR_CLK] = { GATE_AUD6(CLK_AUD_GASRC11, "aud_gasrc11", "top_asm_h", 11), }; +static void mt8188_audsys_clk_unregister(void *data) +{ + struct mtk_base_afe *afe = data; + struct mt8188_afe_private *afe_priv = afe->platform_priv; + struct clk *clk; + struct clk_lookup *cl; + int i; + + if (!afe_priv) + return; + + for (i = 0; i < CLK_AUD_NR_CLK; i++) { + cl = afe_priv->lookup[i]; + if (!cl) + continue; + + clk = cl->clk; + clk_unregister_gate(clk); + + clkdev_drop(cl); + } +} + int mt8188_audsys_clk_register(struct mtk_base_afe *afe) { struct mt8188_afe_private *afe_priv = afe->platform_priv; @@ -179,27 +202,5 @@ int mt8188_audsys_clk_register(struct mtk_base_afe *afe) afe_priv->lookup[i] = cl; } - return 0; -} - -void mt8188_audsys_clk_unregister(struct mtk_base_afe *afe) -{ - struct mt8188_afe_private *afe_priv = afe->platform_priv; - struct clk *clk; - struct clk_lookup *cl; - int i; - - if (!afe_priv) - return; - - for (i = 0; i < CLK_AUD_NR_CLK; i++) { - cl = afe_priv->lookup[i]; - if (!cl) - continue; - - clk = cl->clk; - clk_unregister_gate(clk); - - clkdev_drop(cl); - } + return devm_add_action_or_reset(afe->dev, mt8188_audsys_clk_unregister, afe); } diff --git a/sound/soc/mediatek/mt8188/mt8188-audsys-clk.h b/sound/soc/mediatek/mt8188/mt8188-audsys-clk.h index 6c5f463ad7e4..45b0948c4a06 100644 --- a/sound/soc/mediatek/mt8188/mt8188-audsys-clk.h +++ b/sound/soc/mediatek/mt8188/mt8188-audsys-clk.h @@ -10,6 +10,5 @@ #define _MT8188_AUDSYS_CLK_H_ int mt8188_audsys_clk_register(struct mtk_base_afe *afe); -void mt8188_audsys_clk_unregister(struct mtk_base_afe *afe); #endif From dc93f0dcb436dfd24a06c5b3c0f4c5cd9296e8e5 Mon Sep 17 00:00:00 2001 From: Trevor Wu Date: Thu, 1 Jun 2023 11:33:18 +0800 Subject: [PATCH 333/934] ASoC: mediatek: mt8195: fix use-after-free in driver remove path During mt8195_afe_init_clock(), mt8195_audsys_clk_register() was called followed by several other devm functions. At mt8195_afe_deinit_clock() located at mt8195_afe_pcm_dev_remove(), mt8195_audsys_clk_unregister() was called. However, there was an issue with the order in which these functions were called. Specifically, the remove callback of platform_driver was called before devres released the resource, resulting in a use-after-free issue during remove time. At probe time, the order of calls was: 1. mt8195_audsys_clk_register 2. afe_priv->clk = devm_kcalloc 3. afe_priv->clk[i] = devm_clk_get At remove time, the order of calls was: 1. mt8195_audsys_clk_unregister 3. free afe_priv->clk[i] 2. free afe_priv->clk To resolve the problem, we can utilize devm_add_action_or_reset() in mt8195_audsys_clk_register() so that the remove order can be changed to 3->2->1. Fixes: 6746cc858259 ("ASoC: mediatek: mt8195: add platform driver") Signed-off-by: Trevor Wu Reviewed-by: Douglas Anderson Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230601033318.10408-3-trevor.wu@mediatek.com Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8195/mt8195-afe-clk.c | 5 -- sound/soc/mediatek/mt8195/mt8195-afe-clk.h | 1 - sound/soc/mediatek/mt8195/mt8195-afe-pcm.c | 4 -- sound/soc/mediatek/mt8195/mt8195-audsys-clk.c | 47 ++++++++++--------- sound/soc/mediatek/mt8195/mt8195-audsys-clk.h | 1 - 5 files changed, 24 insertions(+), 34 deletions(-) diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-clk.c b/sound/soc/mediatek/mt8195/mt8195-afe-clk.c index 9ca2cb8c8a9c..f35318ae0739 100644 --- a/sound/soc/mediatek/mt8195/mt8195-afe-clk.c +++ b/sound/soc/mediatek/mt8195/mt8195-afe-clk.c @@ -410,11 +410,6 @@ int mt8195_afe_init_clock(struct mtk_base_afe *afe) return 0; } -void mt8195_afe_deinit_clock(struct mtk_base_afe *afe) -{ - mt8195_audsys_clk_unregister(afe); -} - int mt8195_afe_enable_clk(struct mtk_base_afe *afe, struct clk *clk) { int ret; diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-clk.h b/sound/soc/mediatek/mt8195/mt8195-afe-clk.h index 40663e31becd..a08c0ee6c860 100644 --- a/sound/soc/mediatek/mt8195/mt8195-afe-clk.h +++ b/sound/soc/mediatek/mt8195/mt8195-afe-clk.h @@ -101,7 +101,6 @@ int mt8195_afe_get_mclk_source_clk_id(int sel); int mt8195_afe_get_mclk_source_rate(struct mtk_base_afe *afe, int apll); int mt8195_afe_get_default_mclk_source_by_rate(int rate); int mt8195_afe_init_clock(struct mtk_base_afe *afe); -void mt8195_afe_deinit_clock(struct mtk_base_afe *afe); int mt8195_afe_enable_clk(struct mtk_base_afe *afe, struct clk *clk); void mt8195_afe_disable_clk(struct mtk_base_afe *afe, struct clk *clk); int mt8195_afe_prepare_clk(struct mtk_base_afe *afe, struct clk *clk); diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c index 9e45efeada55..03dabc056b91 100644 --- a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c +++ b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c @@ -3255,15 +3255,11 @@ err_pm_put: static void mt8195_afe_pcm_dev_remove(struct platform_device *pdev) { - struct mtk_base_afe *afe = platform_get_drvdata(pdev); - snd_soc_unregister_component(&pdev->dev); pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) mt8195_afe_runtime_suspend(&pdev->dev); - - mt8195_afe_deinit_clock(afe); } static const struct of_device_id mt8195_afe_pcm_dt_match[] = { diff --git a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c index e0670e0dbd5b..38594bc3f2f7 100644 --- a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c +++ b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.c @@ -148,6 +148,29 @@ static const struct afe_gate aud_clks[CLK_AUD_NR_CLK] = { GATE_AUD6(CLK_AUD_GASRC19, "aud_gasrc19", "top_asm_h", 19), }; +static void mt8195_audsys_clk_unregister(void *data) +{ + struct mtk_base_afe *afe = data; + struct mt8195_afe_private *afe_priv = afe->platform_priv; + struct clk *clk; + struct clk_lookup *cl; + int i; + + if (!afe_priv) + return; + + for (i = 0; i < CLK_AUD_NR_CLK; i++) { + cl = afe_priv->lookup[i]; + if (!cl) + continue; + + clk = cl->clk; + clk_unregister_gate(clk); + + clkdev_drop(cl); + } +} + int mt8195_audsys_clk_register(struct mtk_base_afe *afe) { struct mt8195_afe_private *afe_priv = afe->platform_priv; @@ -188,27 +211,5 @@ int mt8195_audsys_clk_register(struct mtk_base_afe *afe) afe_priv->lookup[i] = cl; } - return 0; -} - -void mt8195_audsys_clk_unregister(struct mtk_base_afe *afe) -{ - struct mt8195_afe_private *afe_priv = afe->platform_priv; - struct clk *clk; - struct clk_lookup *cl; - int i; - - if (!afe_priv) - return; - - for (i = 0; i < CLK_AUD_NR_CLK; i++) { - cl = afe_priv->lookup[i]; - if (!cl) - continue; - - clk = cl->clk; - clk_unregister_gate(clk); - - clkdev_drop(cl); - } + return devm_add_action_or_reset(afe->dev, mt8195_audsys_clk_unregister, afe); } diff --git a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h index 239d31016ba7..69db2dd1c9e0 100644 --- a/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h +++ b/sound/soc/mediatek/mt8195/mt8195-audsys-clk.h @@ -10,6 +10,5 @@ #define _MT8195_AUDSYS_CLK_H_ int mt8195_audsys_clk_register(struct mtk_base_afe *afe); -void mt8195_audsys_clk_unregister(struct mtk_base_afe *afe); #endif From b320a45638296b63be8d9a901ca8bc43716b1ae1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 May 2023 19:51:49 +0000 Subject: [PATCH 334/934] bpf, sockmap: Avoid potential NULL dereference in sk_psock_verdict_data_ready() syzbot found sk_psock(sk) could return NULL when called from sk_psock_verdict_data_ready(). Just make sure to handle this case. [1] general protection fault, probably for non-canonical address 0xdffffc000000005c: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x00000000000002e0-0x00000000000002e7] CPU: 0 PID: 15 Comm: ksoftirqd/0 Not tainted 6.4.0-rc3-syzkaller-00588-g4781e965e655 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/16/2023 RIP: 0010:sk_psock_verdict_data_ready+0x19f/0x3c0 net/core/skmsg.c:1213 Code: 4c 89 e6 e8 63 70 5e f9 4d 85 e4 75 75 e8 19 74 5e f9 48 8d bb e0 02 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 07 02 00 00 48 89 ef ff 93 e0 02 00 00 e8 29 fd RSP: 0018:ffffc90000147688 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000100 RDX: 000000000000005c RSI: ffffffff8825ceb7 RDI: 00000000000002e0 RBP: ffff888076518c40 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000008000 R15: ffff888076518c40 FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f901375bab0 CR3: 000000004bf26000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_data_ready+0x10a/0x520 net/ipv4/tcp_input.c:5006 tcp_data_queue+0x25d3/0x4c50 net/ipv4/tcp_input.c:5080 tcp_rcv_established+0x829/0x1f90 net/ipv4/tcp_input.c:6019 tcp_v4_do_rcv+0x65a/0x9c0 net/ipv4/tcp_ipv4.c:1726 tcp_v4_rcv+0x2cbf/0x3340 net/ipv4/tcp_ipv4.c:2148 ip_protocol_deliver_rcu+0x9f/0x480 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2ec/0x520 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:303 [inline] NF_HOOK include/linux/netfilter.h:297 [inline] ip_local_deliver+0x1ae/0x200 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:468 [inline] ip_rcv_finish+0x1cf/0x2f0 net/ipv4/ip_input.c:449 NF_HOOK include/linux/netfilter.h:303 [inline] NF_HOOK include/linux/netfilter.h:297 [inline] ip_rcv+0xae/0xd0 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core+0x114/0x180 net/core/dev.c:5491 __netif_receive_skb+0x1f/0x1c0 net/core/dev.c:5605 process_backlog+0x101/0x670 net/core/dev.c:5933 __napi_poll+0xb7/0x6f0 net/core/dev.c:6499 napi_poll net/core/dev.c:6566 [inline] net_rx_action+0x8a9/0xcb0 net/core/dev.c:6699 __do_softirq+0x1d4/0x905 kernel/softirq.c:571 run_ksoftirqd kernel/softirq.c:939 [inline] run_ksoftirqd+0x31/0x60 kernel/softirq.c:931 smpboot_thread_fn+0x659/0x9e0 kernel/smpboot.c:164 kthread+0x344/0x440 kernel/kthread.c:379 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 Fixes: 6df7f764cd3c ("bpf, sockmap: Wake up polling after data copy") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/bpf/20230530195149.68145-1-edumazet@google.com --- net/core/skmsg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index a9060e1f0e43..a29508e1ff35 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1210,7 +1210,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - psock->saved_data_ready(sk); + if (psock) + psock->saved_data_ready(sk); rcu_read_unlock(); } } From fa58cc888d67e640e354d8b3ceef877ea167b0cf Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 31 May 2023 21:08:26 +0200 Subject: [PATCH 335/934] gfs2: Don't get stuck writing page onto itself under direct I/O When a direct I/O write is performed, iomap_dio_rw() invalidates the part of the page cache which the write is going to before carrying out the write. In the odd case, the direct I/O write will be reading from the same page it is writing to. gfs2 carries out writes with page faults disabled, so it should have been obvious that this page invalidation can cause iomap_dio_rw() to never make any progress. Currently, gfs2 will end up in an endless retry loop in gfs2_file_direct_write() instead, though. Break this endless loop by limiting the number of retries and falling back to buffered I/O after that. Also simplify should_fault_in_pages() sightly and add a comment to make the above case easier to understand. Reported-by: Jan Kara Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 300844f50dcd..cb62c8f07d1e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -784,9 +784,13 @@ static inline bool should_fault_in_pages(struct iov_iter *i, if (!user_backed_iter(i)) return false; + /* + * Try to fault in multiple pages initially. When that doesn't result + * in any progress, fall back to a single page. + */ size = PAGE_SIZE; offs = offset_in_page(iocb->ki_pos); - if (*prev_count != count || !*window_size) { + if (*prev_count != count) { size_t nr_dirtied; nr_dirtied = max(current->nr_dirtied_pause - @@ -870,6 +874,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, struct gfs2_inode *ip = GFS2_I(inode); size_t prev_count = 0, window_size = 0; size_t written = 0; + bool enough_retries; ssize_t ret; /* @@ -913,11 +918,17 @@ retry: if (ret > 0) written = ret; + enough_retries = prev_count == iov_iter_count(from) && + window_size <= PAGE_SIZE; if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { gfs2_glock_dq(gh); window_size -= fault_in_iov_iter_readable(from, window_size); - if (window_size) - goto retry; + if (window_size) { + if (!enough_retries) + goto retry; + /* fall back to buffered I/O */ + ret = 0; + } } out_unlock: if (gfs2_holder_queued(gh)) From 0e2ee345856454632dcd2f3ee2ba4bb3f8632f74 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 Jun 2023 06:05:59 -0700 Subject: [PATCH 336/934] ASoC: max98363: Removed 32bit support MAX98363 does not support 32bit depth audio. Removed 32bit from the supported format list. Instead, added 16bit and 24bit to the list. Signed-off-by: Ryan Lee Link: https://lore.kernel.org/r/20230601130600.25344-1-ryan.lee.analog@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98363.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/max98363.c b/sound/soc/codecs/max98363.c index dcce06bff756..53e19c8abb40 100644 --- a/sound/soc/codecs/max98363.c +++ b/sound/soc/codecs/max98363.c @@ -211,7 +211,7 @@ static int max98363_io_init(struct sdw_slave *slave) } #define MAX98363_RATES SNDRV_PCM_RATE_8000_192000 -#define MAX98363_FORMATS (SNDRV_PCM_FMTBIT_S32_LE) +#define MAX98363_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE) static int max98363_sdw_dai_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, From 3a67ad17b47ed111bda692238b6a19420e6934c8 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 1 Jun 2023 06:06:00 -0700 Subject: [PATCH 337/934] ASoC: max98363: limit the number of channel to 1 MAX98363 is a mono amplifier. The number of channel needs to be always 1. Signed-off-by: Ryan Lee Link: https://lore.kernel.org/r/20230601130600.25344-2-ryan.lee.analog@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98363.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/max98363.c b/sound/soc/codecs/max98363.c index 53e19c8abb40..e6b84e222b50 100644 --- a/sound/soc/codecs/max98363.c +++ b/sound/soc/codecs/max98363.c @@ -246,7 +246,7 @@ static int max98363_sdw_dai_hw_params(struct snd_pcm_substream *substream, stream_config.frame_rate = params_rate(params); stream_config.bps = snd_pcm_format_width(params_format(params)); stream_config.direction = direction; - stream_config.ch_count = params_channels(params); + stream_config.ch_count = 1; if (stream_config.ch_count > runtime->hw.channels_max) { stream_config.ch_count = runtime->hw.channels_max; From 524306c3764276ce6cc7509908934982ce167039 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 1 Jun 2023 13:49:07 +0100 Subject: [PATCH 338/934] ASoC: cs35l56: Remove NULL check from cs35l56_sdw_dai_set_stream() The dma pointer must be set to the passed stream pointer, even if that pointer is NULL. Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230601124907.3128170-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index d1677d76d018..e0d2b9bb2326 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -704,9 +704,6 @@ static int cs35l56_sdw_dai_hw_free(struct snd_pcm_substream *substream, static int cs35l56_sdw_dai_set_stream(struct snd_soc_dai *dai, void *sdw_stream, int direction) { - if (!sdw_stream) - return 0; - snd_soc_dai_dma_data_set(dai, direction, sdw_stream); return 0; From b675df0257bb717082f592626da3ddfc5bdc2b6b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 1 Jun 2023 18:51:34 +0800 Subject: [PATCH 339/934] btrfs: zoned: fix dev-replace after the scrub rework [BUG] After commit e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure"), scrub no longer works for zoned device at all. Even an empty zoned btrfs cannot be replaced: # mkfs.btrfs -f /dev/nvme0n1 # mount /dev/nvme0n1 /mnt/btrfs # btrfs replace start -Bf 1 /dev/nvme0n2 /mnt/btrfs Resetting device zones /dev/nvme1n1 (160 zones) ... ERROR: ioctl(DEV_REPLACE_START) failed on "/mnt/btrfs/": Input/output error And we can hit kernel crash related to that: BTRFS info (device nvme1n1): host-managed zoned block device /dev/nvme3n1, 160 zones of 134217728 bytes BTRFS info (device nvme1n1): dev_replace from /dev/nvme2n1 (devid 2) to /dev/nvme3n1 started nvme3n1: Zone Management Append(0x7d) @ LBA 65536, 4 blocks, Zone Is Full (sct 0x1 / sc 0xb9) DNR I/O error, dev nvme3n1, sector 786432 op 0xd:(ZONE_APPEND) flags 0x4000 phys_seg 3 prio class 2 BTRFS error (device nvme1n1): bdev /dev/nvme3n1 errs: wr 1, rd 0, flush 0, corrupt 0, gen 0 BUG: kernel NULL pointer dereference, address: 00000000000000a8 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:_raw_spin_lock_irqsave+0x1e/0x40 Call Trace: btrfs_lookup_ordered_extent+0x31/0x190 btrfs_record_physical_zoned+0x18/0x40 btrfs_simple_end_io+0xaf/0xc0 blk_update_request+0x153/0x4c0 blk_mq_end_request+0x15/0xd0 nvme_poll_cq+0x1d3/0x360 nvme_irq+0x39/0x80 __handle_irq_event_percpu+0x3b/0x190 handle_irq_event+0x2f/0x70 handle_edge_irq+0x7c/0x210 __common_interrupt+0x34/0xa0 common_interrupt+0x7d/0xa0 asm_common_interrupt+0x22/0x40 [CAUSE] Dev-replace reuses scrub code to iterate all extents and write the existing content back to the new device. And for zoned devices, we call fill_writer_pointer_gap() to make sure all the writes into the zoned device is sequential, even if there may be some gaps between the writes. However we have several different bugs all related to zoned dev-replace: - We are using ZONE_APPEND operation for metadata style write back For zoned devices, btrfs has two ways to write data: * ZONE_APPEND for data This allows higher queue depth, but will not be able to know where the write would land. Thus needs to grab the real on-disk physical location in it's endio. * WRITE for metadata This requires single queue depth (new writes can only be submitted after previous one finished), and all writes must be sequential. For scrub, we go single queue depth, but still goes with ZONE_APPEND, which requires btrfs_bio::inode being populated. This is the cause of that crash. - No correct tracing of write_pointer After a write finished, we should forward sctx->write_pointer, or fill_writer_pointer_gap() would not work properly and cause more than necessary zero out, and fill the whole zone prematurely. - Incorrect physical bytenr passed to fill_writer_pointer_gap() In scrub_write_sectors(), one call site passes logical address, which is completely wrong. The other call site passes physical address of current sector, but we should pass the physical address of the btrfs_bio we're submitting. This is the cause of the -EIO errors. [FIX] - Do not use ZONE_APPEND for btrfs_submit_repair_write(). - Manually forward sctx->write_pointer after successful writeback - Use the physical address of the to-be-submitted btrfs_bio for fill_writer_pointer_gap() Now zoned device replace would work as expected. Reported-by: Christoph Hellwig Fixes: e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure") Reviewed-by: Christoph Hellwig Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/bio.c | 4 ---- fs/btrfs/scrub.c | 48 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 35d34c731d72..b3ad0f51e616 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -811,10 +811,6 @@ void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_ goto fail; if (dev_replace) { - if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) { - bbio->bio.bi_opf &= ~REQ_OP_WRITE; - bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND; - } ASSERT(smap.dev == fs_info->dev_replace.srcdev); smap.dev = fs_info->dev_replace.tgtdev; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index dd37cba58022..7c666517d3d3 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1137,6 +1137,35 @@ static void scrub_write_endio(struct btrfs_bio *bbio) wake_up(&stripe->io_wait); } +static void scrub_submit_write_bio(struct scrub_ctx *sctx, + struct scrub_stripe *stripe, + struct btrfs_bio *bbio, bool dev_replace) +{ + struct btrfs_fs_info *fs_info = sctx->fs_info; + u32 bio_len = bbio->bio.bi_iter.bi_size; + u32 bio_off = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT) - + stripe->logical; + + fill_writer_pointer_gap(sctx, stripe->physical + bio_off); + atomic_inc(&stripe->pending_io); + btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace); + if (!btrfs_is_zoned(fs_info)) + return; + /* + * For zoned writeback, queue depth must be 1, thus we must wait for + * the write to finish before the next write. + */ + wait_scrub_stripe_io(stripe); + + /* + * And also need to update the write pointer if write finished + * successfully. + */ + if (!test_bit(bio_off >> fs_info->sectorsize_bits, + &stripe->write_error_bitmap)) + sctx->write_pointer += bio_len; +} + /* * Submit the write bio(s) for the sectors specified by @write_bitmap. * @@ -1155,7 +1184,6 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; - const bool zoned = btrfs_is_zoned(fs_info); int sector_nr; for_each_set_bit(sector_nr, &write_bitmap, stripe->nr_sectors) { @@ -1168,13 +1196,7 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str /* Cannot merge with previous sector, submit the current one. */ if (bbio && sector_nr && !test_bit(sector_nr - 1, &write_bitmap)) { - fill_writer_pointer_gap(sctx, stripe->physical + - (sector_nr << fs_info->sectorsize_bits)); - atomic_inc(&stripe->pending_io); - btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace); - /* For zoned writeback, queue depth must be 1. */ - if (zoned) - wait_scrub_stripe_io(stripe); + scrub_submit_write_bio(sctx, stripe, bbio, dev_replace); bbio = NULL; } if (!bbio) { @@ -1187,14 +1209,8 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); ASSERT(ret == fs_info->sectorsize); } - if (bbio) { - fill_writer_pointer_gap(sctx, bbio->bio.bi_iter.bi_sector << - SECTOR_SHIFT); - atomic_inc(&stripe->pending_io); - btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace); - if (zoned) - wait_scrub_stripe_io(stripe); - } + if (bbio) + scrub_submit_write_bio(sctx, stripe, bbio, dev_replace); } /* From 3918dd0177ee08970683a2c22a3388825d82fd79 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Sat, 27 May 2023 16:29:37 +0800 Subject: [PATCH 340/934] wifi: rtw88: correct PS calculation for SUPPORTS_DYNAMIC_PS This driver relies on IEEE80211_CONF_PS of hw->conf.flags to turn off PS or turn on dynamic PS controlled by driver and firmware. Though this would be incorrect, it did work before because the flag is always recalculated until the commit 28977e790b5d ("wifi: mac80211: skip powersave recalc if driver SUPPORTS_DYNAMIC_PS") is introduced by kernel 5.20 to skip to recalculate IEEE80211_CONF_PS of hw->conf.flags if driver sets SUPPORTS_DYNAMIC_PS. Correct this by doing recalculation while BSS_CHANGED_PS is changed and interface is added or removed. It is allowed to enter PS only if single one station vif is working. Without this fix, driver doesn't enter PS anymore that causes higher power consumption. Fixes: bcde60e599fb ("rtw88: remove misleading module parameter rtw_fw_support_lps") Cc: stable@vger.kernel.org # 6.1+ Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230527082939.11206-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/mac80211.c | 14 +++--- drivers/net/wireless/realtek/rtw88/main.c | 4 +- drivers/net/wireless/realtek/rtw88/ps.c | 43 +++++++++++++++++++ drivers/net/wireless/realtek/rtw88/ps.h | 2 + 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c index a6c024cab7ee..144618bb94c8 100644 --- a/drivers/net/wireless/realtek/rtw88/mac80211.c +++ b/drivers/net/wireless/realtek/rtw88/mac80211.c @@ -88,15 +88,6 @@ static int rtw_ops_config(struct ieee80211_hw *hw, u32 changed) } } - if (changed & IEEE80211_CONF_CHANGE_PS) { - if (hw->conf.flags & IEEE80211_CONF_PS) { - rtwdev->ps_enabled = true; - } else { - rtwdev->ps_enabled = false; - rtw_leave_lps(rtwdev); - } - } - if (changed & IEEE80211_CONF_CHANGE_CHANNEL) rtw_set_channel(rtwdev); @@ -213,6 +204,7 @@ static int rtw_ops_add_interface(struct ieee80211_hw *hw, config |= PORT_SET_BCN_CTRL; rtw_vif_port_config(rtwdev, rtwvif, config); rtw_core_port_switch(rtwdev, vif); + rtw_recalc_lps(rtwdev, vif); mutex_unlock(&rtwdev->mutex); @@ -244,6 +236,7 @@ static void rtw_ops_remove_interface(struct ieee80211_hw *hw, config |= PORT_SET_BCN_CTRL; rtw_vif_port_config(rtwdev, rtwvif, config); clear_bit(rtwvif->port, rtwdev->hw_port); + rtw_recalc_lps(rtwdev, NULL); mutex_unlock(&rtwdev->mutex); } @@ -438,6 +431,9 @@ static void rtw_ops_bss_info_changed(struct ieee80211_hw *hw, if (changed & BSS_CHANGED_ERP_SLOT) rtw_conf_tx(rtwdev, rtwvif); + if (changed & BSS_CHANGED_PS) + rtw_recalc_lps(rtwdev, NULL); + rtw_vif_port_config(rtwdev, rtwvif, config); mutex_unlock(&rtwdev->mutex); diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index d30a191c9291..9447a3aae3b5 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -271,8 +271,8 @@ static void rtw_watch_dog_work(struct work_struct *work) * more than two stations associated to the AP, then we can not enter * lps, because fw does not handle the overlapped beacon interval * - * mac80211 should iterate vifs and determine if driver can enter - * ps by passing IEEE80211_CONF_PS to us, all we need to do is to + * rtw_recalc_lps() iterate vifs and determine if driver can enter + * ps by vif->type and vif->cfg.ps, all we need to do here is to * get that vif and check if device is having traffic more than the * threshold. */ diff --git a/drivers/net/wireless/realtek/rtw88/ps.c b/drivers/net/wireless/realtek/rtw88/ps.c index 996365575f44..53933fb38a33 100644 --- a/drivers/net/wireless/realtek/rtw88/ps.c +++ b/drivers/net/wireless/realtek/rtw88/ps.c @@ -299,3 +299,46 @@ void rtw_leave_lps_deep(struct rtw_dev *rtwdev) __rtw_leave_lps_deep(rtwdev); } + +struct rtw_vif_recalc_lps_iter_data { + struct rtw_dev *rtwdev; + struct ieee80211_vif *found_vif; + int count; +}; + +static void __rtw_vif_recalc_lps(struct rtw_vif_recalc_lps_iter_data *data, + struct ieee80211_vif *vif) +{ + if (data->count < 0) + return; + + if (vif->type != NL80211_IFTYPE_STATION) { + data->count = -1; + return; + } + + data->count++; + data->found_vif = vif; +} + +static void rtw_vif_recalc_lps_iter(void *data, u8 *mac, + struct ieee80211_vif *vif) +{ + __rtw_vif_recalc_lps(data, vif); +} + +void rtw_recalc_lps(struct rtw_dev *rtwdev, struct ieee80211_vif *new_vif) +{ + struct rtw_vif_recalc_lps_iter_data data = { .rtwdev = rtwdev }; + + if (new_vif) + __rtw_vif_recalc_lps(&data, new_vif); + rtw_iterate_vifs(rtwdev, rtw_vif_recalc_lps_iter, &data); + + if (data.count == 1 && data.found_vif->cfg.ps) { + rtwdev->ps_enabled = true; + } else { + rtwdev->ps_enabled = false; + rtw_leave_lps(rtwdev); + } +} diff --git a/drivers/net/wireless/realtek/rtw88/ps.h b/drivers/net/wireless/realtek/rtw88/ps.h index c194386f6db5..5ae83d2526cf 100644 --- a/drivers/net/wireless/realtek/rtw88/ps.h +++ b/drivers/net/wireless/realtek/rtw88/ps.h @@ -23,4 +23,6 @@ void rtw_enter_lps(struct rtw_dev *rtwdev, u8 port_id); void rtw_leave_lps(struct rtw_dev *rtwdev); void rtw_leave_lps_deep(struct rtw_dev *rtwdev); enum rtw_lps_deep_mode rtw_get_lps_deep_mode(struct rtw_dev *rtwdev); +void rtw_recalc_lps(struct rtw_dev *rtwdev, struct ieee80211_vif *new_vif); + #endif From 26a125f550a3bf86ac91d38752f4d446426dfe1c Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Sat, 27 May 2023 16:29:38 +0800 Subject: [PATCH 341/934] wifi: rtw89: correct PS calculation for SUPPORTS_DYNAMIC_PS This driver relies on IEEE80211_CONF_PS of hw->conf.flags to turn off PS or turn on dynamic PS controlled by driver and firmware. Though this would be incorrect, it did work before because the flag is always recalculated until the commit 28977e790b5d ("wifi: mac80211: skip powersave recalc if driver SUPPORTS_DYNAMIC_PS") is introduced by kernel 5.20 to skip to recalculate IEEE80211_CONF_PS of hw->conf.flags if driver sets SUPPORTS_DYNAMIC_PS. Correct this by doing recalculation while BSS_CHANGED_PS is changed and interface is added or removed. For now, it is allowed to enter PS only if single one station vif is working, and it could possible to have PS per vif after firmware can support it. Without this fix, driver doesn't enter PS anymore that causes higher power consumption. Fixes: e3ec7017f6a2 ("rtw89: add Realtek 802.11ax driver") Cc: stable@vger.kernel.org # 6.1+ Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230527082939.11206-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/mac80211.c | 15 +++++------ drivers/net/wireless/realtek/rtw89/ps.c | 26 +++++++++++++++++++ drivers/net/wireless/realtek/rtw89/ps.h | 1 + 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index ee4588b61b8f..c42e31069035 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -89,15 +89,6 @@ static int rtw89_ops_config(struct ieee80211_hw *hw, u32 changed) !(hw->conf.flags & IEEE80211_CONF_IDLE)) rtw89_leave_ips(rtwdev); - if (changed & IEEE80211_CONF_CHANGE_PS) { - if (hw->conf.flags & IEEE80211_CONF_PS) { - rtwdev->lps_enabled = true; - } else { - rtw89_leave_lps(rtwdev); - rtwdev->lps_enabled = false; - } - } - if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { rtw89_config_entity_chandef(rtwdev, RTW89_SUB_ENTITY_0, &hw->conf.chandef); @@ -168,6 +159,8 @@ static int rtw89_ops_add_interface(struct ieee80211_hw *hw, rtw89_core_txq_init(rtwdev, vif->txq); rtw89_btc_ntfy_role_info(rtwdev, rtwvif, NULL, BTC_ROLE_START); + + rtw89_recalc_lps(rtwdev); out: mutex_unlock(&rtwdev->mutex); @@ -192,6 +185,7 @@ static void rtw89_ops_remove_interface(struct ieee80211_hw *hw, rtw89_mac_remove_vif(rtwdev, rtwvif); rtw89_core_release_bit_map(rtwdev->hw_port, rtwvif->port); list_del_init(&rtwvif->list); + rtw89_recalc_lps(rtwdev); rtw89_enter_ips_by_hwflags(rtwdev); mutex_unlock(&rtwdev->mutex); @@ -451,6 +445,9 @@ static void rtw89_ops_bss_info_changed(struct ieee80211_hw *hw, if (changed & BSS_CHANGED_CQM) rtw89_fw_h2c_set_bcn_fltr_cfg(rtwdev, vif, true); + if (changed & BSS_CHANGED_PS) + rtw89_recalc_lps(rtwdev); + mutex_unlock(&rtwdev->mutex); } diff --git a/drivers/net/wireless/realtek/rtw89/ps.c b/drivers/net/wireless/realtek/rtw89/ps.c index fa94335f699a..84201ef19c17 100644 --- a/drivers/net/wireless/realtek/rtw89/ps.c +++ b/drivers/net/wireless/realtek/rtw89/ps.c @@ -252,3 +252,29 @@ void rtw89_process_p2p_ps(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif) rtw89_p2p_disable_all_noa(rtwdev, vif); rtw89_p2p_update_noa(rtwdev, vif); } + +void rtw89_recalc_lps(struct rtw89_dev *rtwdev) +{ + struct ieee80211_vif *vif, *found_vif = NULL; + struct rtw89_vif *rtwvif; + int count = 0; + + rtw89_for_each_rtwvif(rtwdev, rtwvif) { + vif = rtwvif_to_vif(rtwvif); + + if (vif->type != NL80211_IFTYPE_STATION) { + count = 0; + break; + } + + count++; + found_vif = vif; + } + + if (count == 1 && found_vif->cfg.ps) { + rtwdev->lps_enabled = true; + } else { + rtw89_leave_lps(rtwdev); + rtwdev->lps_enabled = false; + } +} diff --git a/drivers/net/wireless/realtek/rtw89/ps.h b/drivers/net/wireless/realtek/rtw89/ps.h index 73c008db0426..4c18f49204b2 100644 --- a/drivers/net/wireless/realtek/rtw89/ps.h +++ b/drivers/net/wireless/realtek/rtw89/ps.h @@ -15,6 +15,7 @@ void rtw89_enter_ips(struct rtw89_dev *rtwdev); void rtw89_leave_ips(struct rtw89_dev *rtwdev); void rtw89_set_coex_ctrl_lps(struct rtw89_dev *rtwdev, bool btc_ctrl); void rtw89_process_p2p_ps(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif); +void rtw89_recalc_lps(struct rtw89_dev *rtwdev); static inline void rtw89_leave_ips_by_hwflags(struct rtw89_dev *rtwdev) { From b408f33b35a4de606bfbd0de33b8f971bc5488ba Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Sat, 27 May 2023 16:29:39 +0800 Subject: [PATCH 342/934] wifi: rtw89: remove redundant check of entering LPS Originally, add this check rule to prevent entering LPS if more than one vif (in station mode) connect to AP. Since we have checked this by previous commit, remove this redundant check. Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230527082939.11206-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 7fc0a26a4d73..bad864d56bd5 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -2531,9 +2531,6 @@ static void rtw89_vif_enter_lps(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwv rtwvif->tdls_peer) return; - if (rtwdev->total_sta_assoc > 1) - return; - if (rtwvif->offchan) return; From ead449023d3acb1424886d7b8cc672ed69bdd27e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 28 May 2023 12:28:49 +0200 Subject: [PATCH 343/934] wifi: mt76: mt7996: fix possible NULL pointer dereference in mt7996_mac_write_txwi() Fix possible NULL pointer dereference on mvif pointer in mt7996_mac_write_txwi routine. Fixes: 15ee62e73705 ("wifi: mt76: mt7996: enable BSS_CHANGED_BASIC_RATES support") Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/2637628a84f42ad6d7b774e706f041d5b45c8cb5.1685269638.git.lorenzo@kernel.org --- .../net/wireless/mediatek/mt76/mt7996/mac.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c index 39a4a73ef8e6..9b0f6053e0fa 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/mac.c @@ -1004,10 +1004,10 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi, { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_vif *vif = info->control.vif; - struct mt7996_vif *mvif = (struct mt7996_vif *)vif->drv_priv; u8 band_idx = (info->hw_queue & MT_TX_HW_QUEUE_PHY) >> 2; u8 p_fmt, q_idx, omac_idx = 0, wmm_idx = 0; bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP; + struct mt7996_vif *mvif; u16 tx_count = 15; u32 val; bool beacon = !!(changed & (BSS_CHANGED_BEACON | @@ -1015,7 +1015,8 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi, bool inband_disc = !!(changed & (BSS_CHANGED_UNSOL_BCAST_PROBE_RESP | BSS_CHANGED_FILS_DISCOVERY)); - if (vif) { + mvif = vif ? (struct mt7996_vif *)vif->drv_priv : NULL; + if (mvif) { omac_idx = mvif->mt76.omac_idx; wmm_idx = mvif->mt76.wmm_idx; band_idx = mvif->mt76.band_idx; @@ -1081,12 +1082,16 @@ void mt7996_mac_write_txwi(struct mt7996_dev *dev, __le32 *txwi, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; bool mcast = ieee80211_is_data(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1); - u8 idx = mvif->basic_rates_idx; + u8 idx = MT7996_BASIC_RATES_TBL; - if (mcast && mvif->mcast_rates_idx) - idx = mvif->mcast_rates_idx; - else if (beacon && mvif->beacon_rates_idx) - idx = mvif->beacon_rates_idx; + if (mvif) { + if (mcast && mvif->mcast_rates_idx) + idx = mvif->mcast_rates_idx; + else if (beacon && mvif->beacon_rates_idx) + idx = mvif->beacon_rates_idx; + else + idx = mvif->basic_rates_idx; + } txwi[6] |= cpu_to_le32(FIELD_PREP(MT_TXD6_TX_RATE, idx)); txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE); From 4420528254153189c70b6267593e445dc8654e37 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 29 May 2023 12:52:07 -0600 Subject: [PATCH 344/934] firewire: Replace zero-length array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-length and one-element arrays are deprecated, and we are moving towards adopting C99 flexible-array members, instead. Address the following warnings found with GCC-13 and -fstrict-flex-arrays=3 enabled: sound/firewire/amdtp-stream.c: In function ‘build_it_pkt_header’: sound/firewire/amdtp-stream.c:694:17: warning: ‘generate_cip_header’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=] 694 | generate_cip_header(s, cip_header, data_block_counter, syt); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sound/firewire/amdtp-stream.c:694:17: note: referencing argument 2 of type ‘__be32[2]’ {aka ‘unsigned int[2]’} sound/firewire/amdtp-stream.c:667:13: note: in a call to function ‘generate_cip_header’ 667 | static void generate_cip_header(struct amdtp_stream *s, __be32 cip_header[2], | ^~~~~~~~~~~~~~~~~~~ This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy() and help us make progress towards globally enabling -fstrict-flex-arrays=3 [1]. Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/303 Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/ZHT0V3SpvHyxCv5W@work Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 1716c01c4e54..efb6e2cf2034 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -391,7 +391,7 @@ struct fw_iso_packet { u32 tag:2; /* tx: Tag in packet header */ u32 sy:4; /* tx: Sy in packet header */ u32 header_length:8; /* Length of immediate header */ - u32 header[0]; /* tx: Top of 1394 isoch. data_block */ + u32 header[]; /* tx: Top of 1394 isoch. data_block */ }; #define FW_ISO_CONTEXT_TRANSMIT 0 From 3c27f3d53d588618d81d30d6712459a3cc9489b8 Mon Sep 17 00:00:00 2001 From: Andreas Svensson Date: Tue, 30 May 2023 16:52:23 +0200 Subject: [PATCH 345/934] net: dsa: mv88e6xxx: Increase wait after reset deactivation A switch held in reset by default needs to wait longer until we can reliably detect it. An issue was observed when testing on the Marvell 88E6393X (Link Street). The driver failed to detect the switch on some upstarts. Increasing the wait time after reset deactivation solves this issue. The updated wait time is now also the same as the wait time in the mv88e6xxx_hardware_reset function. Fixes: 7b75e49de424 ("net: dsa: mv88e6xxx: wait after reset deactivation") Signed-off-by: Andreas Svensson Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230530145223.1223993-1-andreas.svensson@axis.com Signed-off-by: Paolo Abeni --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 64a2f2f83735..08a46ffd53af 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -7170,7 +7170,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) goto out; } if (chip->reset) - usleep_range(1000, 2000); + usleep_range(10000, 20000); /* Detect if the device is configured in single chip addressing mode, * otherwise continue with address specific smi init/detection. From 03c5c83b70dca3729a3eb488e668e5044bd9a5ea Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 25 May 2023 19:41:45 +0200 Subject: [PATCH 346/934] s390/purgatory: disable branch profiling Avoid linker error for randomly generated config file that has CONFIG_BRANCH_PROFILE_NONE enabled and make it similar to riscv, x86 and also to commit 4bf3ec384edf ("s390: disable branch profiling for vdso"). Reviewed-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/purgatory/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 32573b4f9bd2..cc8cf5abea15 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -26,6 +26,7 @@ KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += -fno-stack-protector +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) From 444c17cfbc855bf6c1524f3813f6f667d9b708ff Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Fri, 26 May 2023 19:41:54 +0200 Subject: [PATCH 347/934] MAINTAINERS: Add myself as reviewer instead of Naga Naga no longer works for AMD/Xilinx and there is no activity from him to continue to maintain Xilinx related drivers. Add myself instead to be kept in loop if there is any need for testing. Signed-off-by: Michal Simek Signed-off-by: Miquel Raynal [: Manually apply on top of the latest -rc which where the MAINTAINERS file got sorted] Link: https://lore.kernel.org/linux-mtd/06df49c300c53a27423260e99acc217b06d4e588.1684827820.git.michal.simek@amd.com --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 27ef11624748..2a6606a772af 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1600,7 +1600,7 @@ F: drivers/media/i2c/ar0521.c ARASAN NAND CONTROLLER DRIVER M: Miquel Raynal -M: Naga Sureshkumar Relli +R: Michal Simek L: linux-mtd@lists.infradead.org S: Maintained F: Documentation/devicetree/bindings/mtd/arasan,nand-controller.yaml @@ -1763,7 +1763,7 @@ F: include/linux/amba/mmci.h ARM PRIMECELL PL35X NAND CONTROLLER DRIVER M: Miquel Raynal -M: Naga Sureshkumar Relli +R: Michal Simek L: linux-mtd@lists.infradead.org S: Maintained F: Documentation/devicetree/bindings/mtd/arm,pl353-nand-r2p1.yaml @@ -1771,7 +1771,7 @@ F: drivers/mtd/nand/raw/pl35x-nand-controller.c ARM PRIMECELL PL35X SMC DRIVER M: Miquel Raynal -M: Naga Sureshkumar Relli +R: Michal Simek L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/memory-controllers/arm,pl35x-smc.yaml From 0ea923f443350c8c5cca6eef5b748d52b903f46c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 17 Apr 2023 22:56:50 +0200 Subject: [PATCH 348/934] mtdchar: mark bits of ioctl handler noinline The addition of the mtdchar_read_ioctl() function caused the stack usage of mtdchar_ioctl() to grow beyond the warning limit on 32-bit architectures with gcc-13: drivers/mtd/mtdchar.c: In function 'mtdchar_ioctl': drivers/mtd/mtdchar.c:1229:1: error: the frame size of 1488 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Mark both the read and write portions as noinline_for_stack to ensure they don't get inlined and use separate stack slots to reduce the maximum usage, both in the mtdchar_ioctl() and combined with any of its callees. Fixes: 095bb6e44eb1 ("mtdchar: add MEMREAD ioctl") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Richard Weinberger Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230417205654.1982368-1-arnd@kernel.org --- drivers/mtd/mtdchar.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 01f1c6792df9..8dc4f5c493fc 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -590,8 +590,8 @@ static void adjust_oob_length(struct mtd_info *mtd, uint64_t start, (end_page - start_page + 1) * oob_per_page); } -static int mtdchar_write_ioctl(struct mtd_info *mtd, - struct mtd_write_req __user *argp) +static noinline_for_stack int +mtdchar_write_ioctl(struct mtd_info *mtd, struct mtd_write_req __user *argp) { struct mtd_info *master = mtd_get_master(mtd); struct mtd_write_req req; @@ -688,8 +688,8 @@ static int mtdchar_write_ioctl(struct mtd_info *mtd, return ret; } -static int mtdchar_read_ioctl(struct mtd_info *mtd, - struct mtd_read_req __user *argp) +static noinline_for_stack int +mtdchar_read_ioctl(struct mtd_info *mtd, struct mtd_read_req __user *argp) { struct mtd_info *master = mtd_get_master(mtd); struct mtd_read_req req; From 8a6f4d346f3bad9c68b4a87701eb3f7978542d57 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Thu, 25 May 2023 12:31:52 +1200 Subject: [PATCH 349/934] mtd: rawnand: marvell: ensure timing values are written When new timing values are calculated in marvell_nfc_setup_interface() ensure that they will be applied in marvell_nfc_select_target() by clearing the selected_chip pointer. Fixes: b25251414f6e ("mtd: rawnand: marvell: Stop implementing ->select_chip()") Suggested-by: Miquel Raynal Signed-off-by: Chris Packham Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230525003154.2303012-1-chris.packham@alliedtelesis.co.nz --- drivers/mtd/nand/raw/marvell_nand.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index afb424579f0b..f1fcf136ad03 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -2457,6 +2457,12 @@ static int marvell_nfc_setup_interface(struct nand_chip *chip, int chipnr, NDTR1_WAIT_MODE; } + /* + * Reset nfc->selected_chip so the next command will cause the timing + * registers to be updated in marvell_nfc_select_target(). + */ + nfc->selected_chip = NULL; + return 0; } From c4d28e30a8d0b979e4029465ab8f312ab6ce2644 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Thu, 25 May 2023 12:31:53 +1200 Subject: [PATCH 350/934] mtd: rawnand: marvell: don't set the NAND frequency select marvell_nfc_setup_interface() uses the frequency retrieved from the clock associated with the nand interface to determine the timings that will be used. By changing the NAND frequency select without reflecting this in the clock configuration this means that the timings calculated don't correctly meet the requirements of the NAND chip. This hasn't been an issue up to now because of a different bug that was stopping the timings being updated after they were initially set. Fixes: b25251414f6e ("mtd: rawnand: marvell: Stop implementing ->select_chip()") Signed-off-by: Chris Packham Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230525003154.2303012-2-chris.packham@alliedtelesis.co.nz --- drivers/mtd/nand/raw/marvell_nand.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index f1fcf136ad03..30c15e4e1cc0 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -2900,10 +2900,6 @@ static int marvell_nfc_init(struct marvell_nfc *nfc) regmap_update_bits(sysctrl_base, GENCONF_CLK_GATING_CTRL, GENCONF_CLK_GATING_CTRL_ND_GATE, GENCONF_CLK_GATING_CTRL_ND_GATE); - - regmap_update_bits(sysctrl_base, GENCONF_ND_CLK_CTRL, - GENCONF_ND_CLK_CTRL_EN, - GENCONF_ND_CLK_CTRL_EN); } /* Configure the DMA if appropriate */ From 3bf3a7c6985c625f64e73baefdaa36f1c2045a29 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 25 Apr 2023 01:02:42 +0000 Subject: [PATCH 351/934] RDMA/rtrs: Fix the last iu->buf leak in err path The last iu->buf will leak if ib_dma_mapping_error() fails. Fixes: c0894b3ea69d ("RDMA/rtrs: core: lib functions shared between client and server modules") Link: https://lore.kernel.org/r/1682384563-2-3-git-send-email-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Acked-by: Guoqing Jiang Acked-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 4bf9d868cc52..3696f367ff51 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -37,8 +37,10 @@ struct rtrs_iu *rtrs_iu_alloc(u32 iu_num, size_t size, gfp_t gfp_mask, goto err; iu->dma_addr = ib_dma_map_single(dma_dev, iu->buf, size, dir); - if (ib_dma_mapping_error(dma_dev, iu->dma_addr)) + if (ib_dma_mapping_error(dma_dev, iu->dma_addr)) { + kfree(iu->buf); goto err; + } iu->cqe.done = done; iu->size = size; From 9c29c8c7df0688f358d2df5ddd16c97c2f7292b4 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Tue, 25 Apr 2023 01:02:43 +0000 Subject: [PATCH 352/934] RDMA/rtrs: Fix rxe_dealloc_pd warning In current design: 1. PD and clt_path->s.dev are shared among connections. 2. every con[n]'s cleanup phase will call destroy_con_cq_qp() 3. clt_path->s.dev will be always decreased in destroy_con_cq_qp(), and when clt_path->s.dev become zero, it will destroy PD. 4. when con[1] failed to create, con[1] will not take clt_path->s.dev, but it try to decreased clt_path->s.dev So, in case create_cm(con[0]) succeeds but create_cm(con[1]) fails, destroy_con_cq_qp(con[1]) will be called first which will destroy the PD while this PD is still taken by con[0]. Here, we refactor the error path of create_cm() and init_conns(), so that we do the cleanup in the order they are created. The warning occurs when destroying RXE PD whose reference count is not zero. rnbd_client L597: Mapping device /dev/nvme0n1 on session client, (access_mode: rw, nr_poll_queues: 0) ------------[ cut here ]------------ WARNING: CPU: 0 PID: 26407 at drivers/infiniband/sw/rxe/rxe_pool.c:256 __rxe_cleanup+0x13a/0x170 [rdma_rxe] Modules linked in: rpcrdma rdma_ucm ib_iser rnbd_client libiscsi rtrs_client scsi_transport_iscsi rtrs_core rdma_cm iw_cm ib_cm crc32_generic rdma_rxe udp_tunnel ib_uverbs ib_core kmem device_dax nd_pmem dax_pmem nd_vme crc32c_intel fuse nvme_core nfit libnvdimm dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 26407 Comm: rnbd-client.sh Kdump: loaded Not tainted 6.2.0-rc6-roce-flush+ #53 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:__rxe_cleanup+0x13a/0x170 [rdma_rxe] Code: 45 84 e4 0f 84 5a ff ff ff 48 89 ef e8 5f 18 71 f9 84 c0 75 90 be c8 00 00 00 48 89 ef e8 be 89 1f fa 85 c0 0f 85 7b ff ff ff <0f> 0b 41 bc ea ff ff ff e9 71 ff ff ff e8 84 7f 1f fa e9 d0 fe ff RSP: 0018:ffffb09880b6f5f0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff99401f15d6a8 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffffbac8234b RDI: 00000000ffffffff RBP: ffff99401f15d6d0 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000002d82 R11: 0000000000000000 R12: 0000000000000001 R13: ffff994101eff208 R14: ffffb09880b6f6a0 R15: 00000000fffffe00 FS: 00007fe113904740(0000) GS:ffff99413bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ff6cde656c8 CR3: 000000001f108004 CR4: 00000000001706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rxe_dealloc_pd+0x16/0x20 [rdma_rxe] ib_dealloc_pd_user+0x4b/0x80 [ib_core] rtrs_ib_dev_put+0x79/0xd0 [rtrs_core] destroy_con_cq_qp+0x8a/0xa0 [rtrs_client] init_path+0x1e7/0x9a0 [rtrs_client] ? __pfx_autoremove_wake_function+0x10/0x10 ? lock_is_held_type+0xd7/0x130 ? rcu_read_lock_sched_held+0x43/0x80 ? pcpu_alloc+0x3dd/0x7d0 ? rtrs_clt_init_stats+0x18/0x40 [rtrs_client] rtrs_clt_open+0x24f/0x5a0 [rtrs_client] ? __pfx_rnbd_clt_link_ev+0x10/0x10 [rnbd_client] rnbd_clt_map_device+0x6a5/0xe10 [rnbd_client] Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/1682384563-2-4-git-send-email-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Acked-by: Jack Wang Tested-by: Jack Wang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 55 +++++++++++--------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index edb2e3a25880..cfb50bfe53c3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2040,6 +2040,7 @@ static int rtrs_clt_rdma_cm_handler(struct rdma_cm_id *cm_id, return 0; } +/* The caller should do the cleanup in case of error */ static int create_cm(struct rtrs_clt_con *con) { struct rtrs_path *s = con->c.path; @@ -2062,14 +2063,14 @@ static int create_cm(struct rtrs_clt_con *con) err = rdma_set_reuseaddr(cm_id, 1); if (err != 0) { rtrs_err(s, "Set address reuse failed, err: %d\n", err); - goto destroy_cm; + return err; } err = rdma_resolve_addr(cm_id, (struct sockaddr *)&clt_path->s.src_addr, (struct sockaddr *)&clt_path->s.dst_addr, RTRS_CONNECT_TIMEOUT_MS); if (err) { rtrs_err(s, "Failed to resolve address, err: %d\n", err); - goto destroy_cm; + return err; } /* * Combine connection status and session events. This is needed @@ -2084,29 +2085,15 @@ static int create_cm(struct rtrs_clt_con *con) if (err == 0) err = -ETIMEDOUT; /* Timedout or interrupted */ - goto errr; + return err; } - if (con->cm_err < 0) { - err = con->cm_err; - goto errr; - } - if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) { + if (con->cm_err < 0) + return con->cm_err; + if (READ_ONCE(clt_path->state) != RTRS_CLT_CONNECTING) /* Device removal */ - err = -ECONNABORTED; - goto errr; - } + return -ECONNABORTED; return 0; - -errr: - stop_cm(con); - mutex_lock(&con->con_mutex); - destroy_con_cq_qp(con); - mutex_unlock(&con->con_mutex); -destroy_cm: - destroy_cm(con); - - return err; } static void rtrs_clt_path_up(struct rtrs_clt_path *clt_path) @@ -2334,7 +2321,7 @@ static void rtrs_clt_close_work(struct work_struct *work) static int init_conns(struct rtrs_clt_path *clt_path) { unsigned int cid; - int err; + int err, i; /* * On every new session connections increase reconnect counter @@ -2350,10 +2337,8 @@ static int init_conns(struct rtrs_clt_path *clt_path) goto destroy; err = create_cm(to_clt_con(clt_path->s.con[cid])); - if (err) { - destroy_con(to_clt_con(clt_path->s.con[cid])); + if (err) goto destroy; - } } err = alloc_path_reqs(clt_path); if (err) @@ -2364,15 +2349,21 @@ static int init_conns(struct rtrs_clt_path *clt_path) return 0; destroy: - while (cid--) { - struct rtrs_clt_con *con = to_clt_con(clt_path->s.con[cid]); + /* Make sure we do the cleanup in the order they are created */ + for (i = 0; i <= cid; i++) { + struct rtrs_clt_con *con; - stop_cm(con); + if (!clt_path->s.con[i]) + break; - mutex_lock(&con->con_mutex); - destroy_con_cq_qp(con); - mutex_unlock(&con->con_mutex); - destroy_cm(con); + con = to_clt_con(clt_path->s.con[i]); + if (con->c.cm_id) { + stop_cm(con); + mutex_lock(&con->con_mutex); + destroy_con_cq_qp(con); + mutex_unlock(&con->con_mutex); + destroy_cm(con); + } destroy_con(con); } /* From a60caf039e96d806b1ced893242bae82ba3ccf0d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 29 May 2023 16:38:17 +0900 Subject: [PATCH 353/934] net: renesas: rswitch: Fix return value in error path of xmit Fix return value in the error path of rswitch_start_xmit(). If TX queues are full, this function should return NETDEV_TX_BUSY. Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20230529073817.1145208-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 29afaddb598d..aace87139cea 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1485,7 +1485,7 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd if (rswitch_get_num_cur_queues(gq) >= gq->ring_size - 1) { netif_stop_subqueue(ndev, 0); - return ret; + return NETDEV_TX_BUSY; } if (skb_put_padto(skb, ETH_ZLEN)) From 519d6487640835d19461817c75907e6308074a73 Mon Sep 17 00:00:00 2001 From: Xu Liang Date: Wed, 31 May 2023 15:48:22 +0800 Subject: [PATCH 354/934] net: phy: mxl-gpy: extend interrupt fix to all impacted variants The interrupt fix in commit 97a89ed101bb should be applied on all variants of GPY2xx PHY and GPY115C. Fixes: 97a89ed101bb ("net: phy: mxl-gpy: disable interrupts on GPY215 by default") Signed-off-by: Xu Liang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230531074822.39136-1-lxu@maxlinear.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mxl-gpy.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index 6301a9abfb95..ea1073adc5a1 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -274,13 +274,6 @@ static int gpy_config_init(struct phy_device *phydev) return ret < 0 ? ret : 0; } -static bool gpy_has_broken_mdint(struct phy_device *phydev) -{ - /* At least these PHYs are known to have broken interrupt handling */ - return phydev->drv->phy_id == PHY_ID_GPY215B || - phydev->drv->phy_id == PHY_ID_GPY215C; -} - static int gpy_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -300,8 +293,7 @@ static int gpy_probe(struct phy_device *phydev) phydev->priv = priv; mutex_init(&priv->mbox_lock); - if (gpy_has_broken_mdint(phydev) && - !device_property_present(dev, "maxlinear,use-broken-interrupts")) + if (!device_property_present(dev, "maxlinear,use-broken-interrupts")) phydev->dev_flags |= PHY_F_NO_IRQ; fw_version = phy_read(phydev, PHY_FWV); @@ -659,11 +651,9 @@ static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev) * frame. Therefore, polling is the best we can do and won't do any more * harm. * It was observed that this bug happens on link state and link speed - * changes on a GPY215B and GYP215C independent of the firmware version - * (which doesn't mean that this list is exhaustive). + * changes independent of the firmware version. */ - if (gpy_has_broken_mdint(phydev) && - (reg & (PHY_IMASK_LSTC | PHY_IMASK_LSPC))) { + if (reg & (PHY_IMASK_LSTC | PHY_IMASK_LSPC)) { reg = gpy_mbox_read(phydev, REG_GPIO0_OUT); if (reg < 0) { phy_error(phydev); From abaf8d51b0cedb16af51fb6b2189370d7515977c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 31 May 2023 08:44:57 -0700 Subject: [PATCH 355/934] ice: recycle/free all of the fragments from multi-buffer frame The ice driver caches next_to_clean value at the beginning of ice_clean_rx_irq() in order to remember the first buffer that has to be freed/recycled after main Rx processing loop. The end boundary is indicated by first descriptor of frame that Rx processing loop has ended its duties. Note that if mentioned loop ended in the middle of gathering multi-buffer frame, next_to_clean would be pointing to the descriptor in the middle of the frame BUT freeing/recycling stage will stop at the first descriptor. This means that next iteration of ice_clean_rx_irq() will miss the (first_desc, next_to_clean - 1) entries. When running various 9K MTU workloads, such splats were observed: [ 540.780716] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 540.787787] #PF: supervisor read access in kernel mode [ 540.793002] #PF: error_code(0x0000) - not-present page [ 540.798218] PGD 0 P4D 0 [ 540.800801] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 540.805231] CPU: 18 PID: 3984 Comm: xskxceiver Tainted: G W 6.3.0-rc7+ #96 [ 540.813619] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [ 540.824209] RIP: 0010:ice_clean_rx_irq+0x2b6/0xf00 [ice] [ 540.829678] Code: 74 24 10 e9 aa 00 00 00 8b 55 78 41 31 57 10 41 09 c4 4d 85 ff 0f 84 83 00 00 00 49 8b 57 08 41 8b 4f 1c 65 8b 35 1a fa 4b 3f <48> 8b 02 48 c1 e8 3a 39 c6 0f 85 a2 00 00 00 f6 42 08 02 0f 85 98 [ 540.848717] RSP: 0018:ffffc9000f42fc50 EFLAGS: 00010282 [ 540.854029] RAX: 0000000000000004 RBX: 0000000000000002 RCX: 000000000000fffe [ 540.861272] RDX: 0000000000000000 RSI: 0000000000000001 RDI: 00000000ffffffff [ 540.868519] RBP: ffff88984a05ac00 R08: 0000000000000000 R09: dead000000000100 [ 540.875760] R10: ffff88983fffcd00 R11: 000000000010f2b8 R12: 0000000000000004 [ 540.883008] R13: 0000000000000003 R14: 0000000000000800 R15: ffff889847a10040 [ 540.890253] FS: 00007f6ddf7fe640(0000) GS:ffff88afdf800000(0000) knlGS:0000000000000000 [ 540.898465] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 540.904299] CR2: 0000000000000000 CR3: 000000010d3da001 CR4: 00000000007706e0 [ 540.911542] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 540.918789] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 540.926032] PKRU: 55555554 [ 540.928790] Call Trace: [ 540.931276] [ 540.933418] ice_napi_poll+0x4ca/0x6d0 [ice] [ 540.937804] ? __pfx_ice_napi_poll+0x10/0x10 [ice] [ 540.942716] napi_busy_loop+0xd7/0x320 [ 540.946537] xsk_recvmsg+0x143/0x170 [ 540.950178] sock_recvmsg+0x99/0xa0 [ 540.953729] __sys_recvfrom+0xa8/0x120 [ 540.957543] ? do_futex+0xbd/0x1d0 [ 540.961008] ? __x64_sys_futex+0x73/0x1d0 [ 540.965083] __x64_sys_recvfrom+0x20/0x30 [ 540.969155] do_syscall_64+0x38/0x90 [ 540.972796] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 540.977934] RIP: 0033:0x7f6de5f27934 To fix this, set cached_ntc to first_desc so that at the end, when freeing/recycling buffers, descriptors from first to ntc are not missed. Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Signed-off-by: Maciej Fijalkowski Reviewed-by: Simon Horman Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230531154457.3216621-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 059bd911c51d..52d0a126eb61 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1152,11 +1152,11 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_pkts = 0; unsigned int offset = rx_ring->rx_offset; struct xdp_buff *xdp = &rx_ring->xdp; + u32 cached_ntc = rx_ring->first_desc; struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; u32 cnt = rx_ring->count; - u32 cached_ntc = ntc; u32 xdp_xmit = 0; u32 cached_ntu; bool failure; From b0ad3c179059089d809b477a1d445c1183a7b8fe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 31 May 2023 12:01:42 -0400 Subject: [PATCH 356/934] rtnetlink: call validate_linkmsg in rtnl_create_link validate_linkmsg() was introduced by commit 1840bb13c22f5b ("[RTNL]: Validate hardware and broadcast address attribute for RTM_NEWLINK") to validate tb[IFLA_ADDRESS/BROADCAST] for existing links. The same check should also be done for newly created links. This patch adds validate_linkmsg() call in rtnl_create_link(), to avoid the invalid address set when creating some devices like: # ip link add dummy0 type dummy # ip link add link dummy0 name mac0 address 01:02 type macsec Fixes: 0e06877c6fdb ("[RTNETLINK]: rtnl_link: allow specifying initial device address") Signed-off-by: Xin Long Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 653901a1bf75..824688edb722 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3285,6 +3285,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, struct net_device *dev; unsigned int num_tx_queues = 1; unsigned int num_rx_queues = 1; + int err; if (tb[IFLA_NUM_TX_QUEUES]) num_tx_queues = nla_get_u32(tb[IFLA_NUM_TX_QUEUES]); @@ -3320,13 +3321,18 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, if (!dev) return ERR_PTR(-ENOMEM); + err = validate_linkmsg(dev, tb, extack); + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } + dev_net_set(dev, net); dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING; if (tb[IFLA_MTU]) { u32 mtu = nla_get_u32(tb[IFLA_MTU]); - int err; err = dev_validate_mtu(dev, mtu, extack); if (err) { From fef5b228dd38378148bc850f7e69a7783f3b95a4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 31 May 2023 12:01:43 -0400 Subject: [PATCH 357/934] rtnetlink: move IFLA_GSO_ tb check to validate_linkmsg These IFLA_GSO_* tb check should also be done for the new created link, otherwise, they can be set to a huge value when creating links: # ip link add dummy1 gso_max_size 4294967295 type dummy # ip -d link show dummy1 dummy addrgenmode eui64 ... gso_max_size 4294967295 Fixes: 46e6b992c250 ("rtnetlink: allow GSO maximums to be set on device creation") Fixes: 9eefedd58ae1 ("net: add gso_ipv4_max_size and gro_ipv4_max_size per device") Signed-off-by: Xin Long Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 824688edb722..bc068a857219 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2385,6 +2385,25 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], if (tb[IFLA_BROADCAST] && nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) return -EINVAL; + + if (tb[IFLA_GSO_MAX_SIZE] && + nla_get_u32(tb[IFLA_GSO_MAX_SIZE]) > dev->tso_max_size) { + NL_SET_ERR_MSG(extack, "too big gso_max_size"); + return -EINVAL; + } + + if (tb[IFLA_GSO_MAX_SEGS] && + (nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > GSO_MAX_SEGS || + nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > dev->tso_max_segs)) { + NL_SET_ERR_MSG(extack, "too big gso_max_segs"); + return -EINVAL; + } + + if (tb[IFLA_GSO_IPV4_MAX_SIZE] && + nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]) > dev->tso_max_size) { + NL_SET_ERR_MSG(extack, "too big gso_ipv4_max_size"); + return -EINVAL; + } } if (tb[IFLA_AF_SPEC]) { @@ -2858,11 +2877,6 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_GSO_MAX_SIZE]) { u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]); - if (max_size > dev->tso_max_size) { - err = -EINVAL; - goto errout; - } - if (dev->gso_max_size ^ max_size) { netif_set_gso_max_size(dev, max_size); status |= DO_SETLINK_MODIFIED; @@ -2872,11 +2886,6 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_GSO_MAX_SEGS]) { u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]); - if (max_segs > GSO_MAX_SEGS || max_segs > dev->tso_max_segs) { - err = -EINVAL; - goto errout; - } - if (dev->gso_max_segs ^ max_segs) { netif_set_gso_max_segs(dev, max_segs); status |= DO_SETLINK_MODIFIED; @@ -2895,11 +2904,6 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_GSO_IPV4_MAX_SIZE]) { u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]); - if (max_size > dev->tso_max_size) { - err = -EINVAL; - goto errout; - } - if (dev->gso_ipv4_max_size ^ max_size) { netif_set_gso_ipv4_max_size(dev, max_size); status |= DO_SETLINK_MODIFIED; From 65d6914e253f3d83b724a9bbfc889ae95711e512 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 31 May 2023 12:01:44 -0400 Subject: [PATCH 358/934] rtnetlink: add the missing IFLA_GRO_ tb check in validate_linkmsg This fixes the issue that dev gro_max_size and gso_ipv4_max_size can be set to a huge value: # ip link add dummy1 type dummy # ip link set dummy1 gro_max_size 4294967295 # ip -d link show dummy1 dummy addrgenmode eui64 ... gro_max_size 4294967295 Fixes: 0fe79f28bfaf ("net: allow gro_max_size to exceed 65536") Fixes: 9eefedd58ae1 ("net: add gso_ipv4_max_size and gro_ipv4_max_size per device") Reported-by: Xiumei Mu Signed-off-by: Xin Long Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bc068a857219..41de3a2f29e1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2399,11 +2399,23 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], return -EINVAL; } + if (tb[IFLA_GRO_MAX_SIZE] && + nla_get_u32(tb[IFLA_GRO_MAX_SIZE]) > GRO_MAX_SIZE) { + NL_SET_ERR_MSG(extack, "too big gro_max_size"); + return -EINVAL; + } + if (tb[IFLA_GSO_IPV4_MAX_SIZE] && nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]) > dev->tso_max_size) { NL_SET_ERR_MSG(extack, "too big gso_ipv4_max_size"); return -EINVAL; } + + if (tb[IFLA_GRO_IPV4_MAX_SIZE] && + nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]) > GRO_MAX_SIZE) { + NL_SET_ERR_MSG(extack, "too big gro_ipv4_max_size"); + return -EINVAL; + } } if (tb[IFLA_AF_SPEC]) { From 786fc12457268cc9b555dde6c22ae7300d4b40e1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 May 2023 12:37:03 -0700 Subject: [PATCH 359/934] mptcp: fix connect timeout handling Ondrej reported a functional issue WRT timeout handling on connect with a nice reproducer. The problem is that the current mptcp connect waits for both the MPTCP socket level timeout, and the first subflow socket timeout. The latter is not influenced/touched by the exposed setsockopt(). Overall the above makes the SO_SNDTIMEO a no-op on connect. Since mptcp_connect is invoked via inet_stream_connect and the latter properly handle the MPTCP level timeout, we can address the issue making the nested subflow level connect always unblocking. This also allow simplifying a bit the code, dropping an ugly hack to handle the fastopen and custom proto_ops connect. The issues predates the blamed commit below, but the current resolution requires the infrastructure introduced there. Fixes: 54f1944ed6d2 ("mptcp: factor out mptcp_connect()") Reported-by: Ondrej Mosnacek Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/399 Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 29 +++++++---------------------- net/mptcp/protocol.h | 1 - 2 files changed, 7 insertions(+), 23 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 08dc53f56bc2..9cafd3b89908 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1702,7 +1702,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, lock_sock(ssk); msg->msg_flags |= MSG_DONTWAIT; - msk->connect_flags = O_NONBLOCK; msk->fastopening = 1; ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); msk->fastopening = 0; @@ -3617,9 +3616,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * acquired the subflow socket lock, too. */ if (msk->fastopening) - err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1); + err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1); else - err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags); + err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK); inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect; /* on successful connect, the msk state will be moved to established by @@ -3632,12 +3631,10 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) mptcp_copy_inaddrs(sk, ssock->sk); - /* unblocking connect, mptcp-level inet_stream_connect will error out - * without changing the socket state, update it here. + /* silence EINPROGRESS and let the caller inet_stream_connect + * handle the connection in progress */ - if (err == -EINPROGRESS) - sk->sk_socket->state = ssock->state; - return err; + return 0; } static struct proto mptcp_prot = { @@ -3696,18 +3693,6 @@ unlock: return err; } -static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) -{ - int ret; - - lock_sock(sock->sk); - mptcp_sk(sock->sk)->connect_flags = flags; - ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0); - release_sock(sock->sk); - return ret; -} - static int mptcp_listen(struct socket *sock, int backlog) { struct mptcp_sock *msk = mptcp_sk(sock->sk); @@ -3859,7 +3844,7 @@ static const struct proto_ops mptcp_stream_ops = { .owner = THIS_MODULE, .release = inet_release, .bind = mptcp_bind, - .connect = mptcp_stream_connect, + .connect = inet_stream_connect, .socketpair = sock_no_socketpair, .accept = mptcp_stream_accept, .getname = inet_getname, @@ -3954,7 +3939,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { .owner = THIS_MODULE, .release = inet6_release, .bind = mptcp_bind, - .connect = mptcp_stream_connect, + .connect = inet_stream_connect, .socketpair = sock_no_socketpair, .accept = mptcp_stream_accept, .getname = inet6_getname, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 2d7b2c80a164..de4667dafe59 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -297,7 +297,6 @@ struct mptcp_sock { nodelay:1, fastopening:1, in_accept_queue:1; - int connect_flags; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; From 5b825727d0871b23e8867f6371183e61628b4a26 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 May 2023 12:37:04 -0700 Subject: [PATCH 360/934] mptcp: add annotations around msk->subflow accesses The MPTCP can access the first subflow socket in a few spots outside the socket lock scope. That is actually safe, as MPTCP will delete the socket itself only after the msk sock close(). Still the such accesses causes a few KCSAN splats, as reported by Christoph. Silence the harmless warning adding a few annotation around the relevant accesses. Fixes: 71ba088ce0aa ("mptcp: cleanup accept and poll") Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/402 Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 18 ++++++++++-------- net/mptcp/protocol.h | 6 +++++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9cafd3b89908..ce9de2c946b0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -91,7 +91,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) return err; msk->first = ssock->sk; - msk->subflow = ssock; + WRITE_ONCE(msk->subflow, ssock); subflow = mptcp_subflow_ctx(ssock->sk); list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); @@ -2282,7 +2282,7 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) { if (msk->subflow) { iput(SOCK_INODE(msk->subflow)); - msk->subflow = NULL; + WRITE_ONCE(msk->subflow, NULL); } } @@ -3136,7 +3136,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk = mptcp_sk(nsk); msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; - msk->subflow = NULL; + WRITE_ONCE(msk->subflow, NULL); msk->in_accept_queue = 1; WRITE_ONCE(msk->fully_established, false); if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) @@ -3184,7 +3184,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, struct socket *listener; struct sock *newsk; - listener = msk->subflow; + listener = READ_ONCE(msk->subflow); if (WARN_ON_ONCE(!listener)) { *err = -EINVAL; return NULL; @@ -3736,10 +3736,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, pr_debug("msk=%p", msk); - /* buggy applications can call accept on socket states other then LISTEN + /* Buggy applications can call accept on socket states other then LISTEN * but no need to allocate the first subflow just to error out. */ - ssock = msk->subflow; + ssock = READ_ONCE(msk->subflow); if (!ssock) return -EINVAL; @@ -3813,10 +3813,12 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, state = inet_sk_state_load(sk); pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags); if (state == TCP_LISTEN) { - if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk)) + struct socket *ssock = READ_ONCE(msk->subflow); + + if (WARN_ON_ONCE(!ssock || !ssock->sk)) return 0; - return inet_csk_listen_poll(msk->subflow->sk); + return inet_csk_listen_poll(ssock->sk); } if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index de4667dafe59..7a1a3c35470f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -305,7 +305,11 @@ struct mptcp_sock { struct list_head rtx_queue; struct mptcp_data_frag *first_pending; struct list_head join_list; - struct socket *subflow; /* outgoing connect/listener/!mp_capable */ + struct socket *subflow; /* outgoing connect/listener/!mp_capable + * The mptcp ops can safely dereference, using suitable + * ONCE annotation, the subflow outside the socket + * lock as such sock is freed after close(). + */ struct sock *first; struct mptcp_pm_data pm; struct { From 7e8b88ec35eef363040e08d99536d2bebef83774 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 May 2023 12:37:05 -0700 Subject: [PATCH 361/934] mptcp: consolidate passive msk socket initialization When the msk socket is cloned at MPC handshake time, a few fields are initialized in a racy way outside mptcp_sk_clone() and the msk socket lock. The above is due historical reasons: before commit a88d0092b24b ("mptcp: simplify subflow_syn_recv_sock()") as the first subflow socket carrying all the needed date was not available yet at msk creation time We can now refactor the code moving the missing initialization bit under the socket lock, removing the init race and avoiding some code duplication. This will also simplify the next patch, as all msk->first write access are now under the msk socket lock. Fixes: 0397c6d85f9c ("mptcp: keep unaccepted MPC subflow into join list") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 35 ++++++++++++++++++++++++++++------- net/mptcp/protocol.h | 8 ++++---- net/mptcp/subflow.c | 28 +--------------------------- 3 files changed, 33 insertions(+), 38 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index ce9de2c946b0..2ecd0117ab1b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3038,7 +3038,7 @@ static void mptcp_close(struct sock *sk, long timeout) sock_put(sk); } -void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) +static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) const struct ipv6_pinfo *ssk6 = inet6_sk(ssk); @@ -3115,9 +3115,10 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) } #endif -struct sock *mptcp_sk_clone(const struct sock *sk, - const struct mptcp_options_received *mp_opt, - struct request_sock *req) +struct sock *mptcp_sk_clone_init(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct sock *ssk, + struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); @@ -3149,10 +3150,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; sock_reset_flag(nsk, SOCK_RCU_FREE); - /* will be fully established after successful MPC subflow creation */ - inet_sk_state_store(nsk, TCP_SYN_RECV); - security_inet_csk_clone(nsk, req); + + /* this can't race with mptcp_close(), as the msk is + * not yet exposted to user-space + */ + inet_sk_state_store(nsk, TCP_ESTABLISHED); + + /* The msk maintain a ref to each subflow in the connections list */ + WRITE_ONCE(msk->first, ssk); + list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list); + sock_hold(ssk); + + /* new mpc subflow takes ownership of the newly + * created mptcp socket + */ + mptcp_token_accept(subflow_req, msk); + + /* set msk addresses early to ensure mptcp_pm_get_local_id() + * uses the correct data + */ + mptcp_copy_inaddrs(nsk, ssk); + mptcp_propagate_sndbuf(nsk, ssk); + + mptcp_rcv_space_init(msk, ssk); bh_unlock_sock(nsk); /* note: the newly allocated socket refcount is 2 now */ diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7a1a3c35470f..c5255258bfb3 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -616,7 +616,6 @@ int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); int mptcp_get_pm_type(const struct net *net); -void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); @@ -686,9 +685,10 @@ void __init mptcp_proto_init(void); int __init mptcp_proto_v6_init(void); #endif -struct sock *mptcp_sk_clone(const struct sock *sk, - const struct mptcp_options_received *mp_opt, - struct request_sock *req); +struct sock *mptcp_sk_clone_init(const struct sock *sk, + const struct mptcp_options_received *mp_opt, + struct sock *ssk, + struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct mptcp_options_received *mp_opt); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ba065b66551a..4688daa6b38b 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -815,38 +815,12 @@ create_child: ctx->setsockopt_seq = listener->setsockopt_seq; if (ctx->mp_capable) { - ctx->conn = mptcp_sk_clone(listener->conn, &mp_opt, req); + ctx->conn = mptcp_sk_clone_init(listener->conn, &mp_opt, child, req); if (!ctx->conn) goto fallback; owner = mptcp_sk(ctx->conn); - - /* this can't race with mptcp_close(), as the msk is - * not yet exposted to user-space - */ - inet_sk_state_store(ctx->conn, TCP_ESTABLISHED); - - /* record the newly created socket as the first msk - * subflow, but don't link it yet into conn_list - */ - WRITE_ONCE(owner->first, child); - - /* new mpc subflow takes ownership of the newly - * created mptcp socket - */ - owner->setsockopt_seq = ctx->setsockopt_seq; mptcp_pm_new_connection(owner, child, 1); - mptcp_token_accept(subflow_req, owner); - - /* set msk addresses early to ensure mptcp_pm_get_local_id() - * uses the correct data - */ - mptcp_copy_inaddrs(ctx->conn, child); - mptcp_propagate_sndbuf(ctx->conn, child); - - mptcp_rcv_space_init(owner, child); - list_add(&ctx->node, &owner->conn_list); - sock_hold(child); /* with OoO packets we can reach here without ingress * mpc option From 1b1b43ee7a208096ecd79e626f2fc90d4a321111 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 May 2023 12:37:06 -0700 Subject: [PATCH 362/934] mptcp: fix data race around msk->first access The first subflow socket is accessed outside the msk socket lock by mptcp_subflow_fail(), we need to annotate each write access with WRITE_ONCE, but a few spots still lacks it. Fixes: 76a13b315709 ("mptcp: invoke MP_FAIL response when needed") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2ecd0117ab1b..a7dd7d8c9af2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -90,7 +90,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) if (err) return err; - msk->first = ssock->sk; + WRITE_ONCE(msk->first, ssock->sk); WRITE_ONCE(msk->subflow, ssock); subflow = mptcp_subflow_ctx(ssock->sk); list_add(&subflow->node, &msk->conn_list); @@ -2419,7 +2419,7 @@ out_release: sock_put(ssk); if (ssk == msk->first) - msk->first = NULL; + WRITE_ONCE(msk->first, NULL); out: if (ssk == msk->last_snd) @@ -2720,7 +2720,7 @@ static int __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival = TCP_RTO_MIN; - msk->first = NULL; + WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); WRITE_ONCE(msk->allow_infinite_fallback, true); From 6b9831bfd9322b297eb6d44257808cc055fdc586 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 May 2023 12:37:07 -0700 Subject: [PATCH 363/934] mptcp: add annotations around sk->sk_shutdown accesses Christoph reported the mptcp variant of a recently addressed plain TCP issue. Similar to commit e14cadfd80d7 ("tcp: add annotations around sk->sk_shutdown accesses") add READ/WRITE ONCE annotations to silence KCSAN reports around lockless sk_shutdown access. Fixes: 71ba088ce0aa ("mptcp: cleanup accept and poll") Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/401 Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a7dd7d8c9af2..af54a878ac27 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -603,7 +603,7 @@ static bool mptcp_check_data_fin(struct sock *sk) WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1); WRITE_ONCE(msk->rcv_data_fin, 0); - sk->sk_shutdown |= RCV_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ switch (sk->sk_state) { @@ -910,7 +910,7 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk) /* hopefully temporary hack: propagate shutdown status * to msk, when all subflows agree on it */ - sk->sk_shutdown |= RCV_SHUTDOWN; + WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ sk->sk_data_ready(sk); @@ -2526,7 +2526,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) } inet_sk_state_store(sk, TCP_CLOSE); - sk->sk_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); @@ -2958,7 +2958,7 @@ bool __mptcp_close(struct sock *sk, long timeout) bool do_cancel_work = false; int subflows_alive = 0; - sk->sk_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { mptcp_listen_inuse_dec(sk); @@ -3101,7 +3101,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); - sk->sk_shutdown = 0; + WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); return 0; } @@ -3806,9 +3806,6 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; - if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) - return EPOLLOUT | EPOLLWRNORM; - if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; @@ -3826,6 +3823,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; struct mptcp_sock *msk; __poll_t mask = 0; + u8 shutdown; int state; msk = mptcp_sk(sk); @@ -3842,17 +3840,22 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, return inet_csk_listen_poll(ssock->sk); } + shutdown = READ_ONCE(sk->sk_shutdown); + if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) + mask |= EPOLLHUP; + if (shutdown & RCV_SHUTDOWN) + mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) { mask |= mptcp_check_readable(msk); - mask |= mptcp_check_writeable(msk); + if (shutdown & SEND_SHUTDOWN) + mask |= EPOLLOUT | EPOLLWRNORM; + else + mask |= mptcp_check_writeable(msk); } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { /* cf tcp_poll() note about TFO */ mask |= EPOLLOUT | EPOLLWRNORM; } - if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) - mask |= EPOLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); From 55b47ca7d80814ceb63d64e032e96cd6777811e5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 31 May 2023 12:37:08 -0700 Subject: [PATCH 364/934] mptcp: fix active subflow finalization Active subflow are inserted into the connection list at creation time. When the MPJ handshake completes successfully, a new subflow creation netlink event is generated correctly, but the current code wrongly avoid initializing a couple of subflow data. The above will cause misbehavior on a few exceptional events: unneeded mptcp-level retransmission on msk-level sequence wrap-around and infinite mapping fallback even when a MPJ socket is present. Address the issue factoring out the needed initialization in a new helper and invoking the latter from __mptcp_finish_join() time for passive subflow and from mptcp_finish_join() for active ones. Fixes: 0530020a7c8f ("mptcp: track and update contiguous data status") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index af54a878ac27..67311e7d5b21 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -825,6 +825,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) mptcp_data_unlock(sk); } +static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk) +{ + mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq); + WRITE_ONCE(msk->allow_infinite_fallback, false); + mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); +} + static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; @@ -839,6 +846,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_sock_graft(ssk, sk->sk_socket); mptcp_sockopt_sync_locked(msk, ssk); + mptcp_subflow_joined(msk, ssk); return true; } @@ -3485,14 +3493,16 @@ bool mptcp_finish_join(struct sock *ssk) return false; } - if (!list_empty(&subflow->node)) - goto out; + /* active subflow, already present inside the conn_list */ + if (!list_empty(&subflow->node)) { + mptcp_subflow_joined(msk, ssk); + return true; + } if (!mptcp_pm_allow_new_subflow(msk)) goto err_prohibited; - /* active connections are already on conn_list. - * If we can't acquire msk socket lock here, let the release callback + /* If we can't acquire msk socket lock here, let the release callback * handle it */ mptcp_data_lock(parent); @@ -3515,11 +3525,6 @@ err_prohibited: return false; } - subflow->map_seq = READ_ONCE(msk->ack_seq); - WRITE_ONCE(msk->allow_infinite_fallback, false); - -out: - mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); return true; } From 9a7e8ec0d4cc64870ea449b4fce5779b77496cbb Mon Sep 17 00:00:00 2001 From: Ism Hong Date: Thu, 1 Jun 2023 17:53:55 +0800 Subject: [PATCH 365/934] riscv: perf: Fix callchain parse error with kernel tracepoint events For RISC-V, when tracing with tracepoint events, the IP and status are set to 0, preventing the perf code parsing the callchain and resolving the symbols correctly. ./ply 'tracepoint:kmem/kmem_cache_alloc { @[stack]=count(); }' @: { }: 1 The fix is to implement perf_arch_fetch_caller_regs for riscv, which fills several necessary registers used for callchain unwinding, including epc, sp, s0 and status. It's similar to commit b3eac0265bf6 ("arm: perf: Fix callchain parse error with kernel tracepoint events") and commit 5b09a094f2fb ("arm64: perf: Fix callchain parse error with kernel tracepoint events"). With this patch, callchain can be parsed correctly as: ./ply 'tracepoint:kmem/kmem_cache_alloc { @[stack]=count(); }' @: { __traceiter_kmem_cache_alloc+68 __traceiter_kmem_cache_alloc+68 kmem_cache_alloc+354 __sigqueue_alloc+94 __send_signal_locked+646 send_signal_locked+154 do_send_sig_info+84 __kill_pgrp_info+130 kill_pgrp+60 isig+150 n_tty_receive_signal_char+36 n_tty_receive_buf_standard+2214 n_tty_receive_buf_common+280 n_tty_receive_buf2+26 tty_ldisc_receive_buf+34 tty_port_default_receive_buf+62 flush_to_ldisc+158 process_one_work+458 worker_thread+138 kthread+178 riscv_cpufeature_patch_func+832 }: 1 Signed-off-by: Ism Hong Link: https://lore.kernel.org/r/20230601095355.1168910-1-ism.hong@gmail.com Fixes: 178e9fc47aae ("perf: riscv: preliminary RISC-V support") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index d42c901f9a97..665bbc9b2f84 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -10,4 +10,11 @@ #include #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs + +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->epc = (__ip); \ + (regs)->s0 = (unsigned long) __builtin_frame_address(0); \ + (regs)->sp = current_stack_pointer; \ + (regs)->status = SR_PP; \ +} #endif /* _ASM_RISCV_PERF_EVENT_H */ From 9a3763e87379c97a78b7c6c6f40720b1e877174f Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 17 May 2023 12:22:42 -0500 Subject: [PATCH 366/934] RDMA/rxe: Fix packet length checks In rxe_net.c a received packet, from udp or loopback, is passed to rxe_rcv() in rxe_recv.c as a udp packet. I.e. skb->data is pointing at the udp header. But rxe_rcv() makes length checks to verify the packet is long enough to hold the roce headers as if it were a roce packet. I.e. skb->data pointing at the bth header. A runt packet would appear to have 8 more bytes than it actually does which may lead to incorrect behavior. This patch calls skb_pull() to adjust the skb to point at the bth header before calling rxe_rcv() which fixes this error. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20230517172242.1806340-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index a38fab19bed1..cd59666158b1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -159,6 +159,9 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) pkt->mask = RXE_GRH_MASK; pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph); + /* remove udp header */ + skb_pull(skb, sizeof(struct udphdr)); + rxe_rcv(skb); return 0; @@ -401,6 +404,9 @@ static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt) return -EIO; } + /* remove udp header */ + skb_pull(skb, sizeof(struct udphdr)); + rxe_rcv(skb); return 0; From b00683422fd79dd07c9b75efdce1660e5e19150e Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Wed, 17 May 2023 16:15:10 -0500 Subject: [PATCH 367/934] RDMA/rxe: Fix ref count error in check_rkey() There is a reference count error in error path code and a potential race in check_rkey() in rxe_resp.c. When looking up the rkey for a memory window the reference to the mw from rxe_lookup_mw() is dropped before a reference is taken on the mr referenced by the mw. If the mr is destroyed immediately after the call to rxe_put(mw) the mr pointer is unprotected and may end up pointing at freed memory. The rxe_get(mr) call should take place before the rxe_put(mw) call. All errors in check_rkey() call rxe_put(mw) if mw is not NULL but it was already called after the above. The mw pointer should be set to NULL after the rxe_put(mw) call to prevent this from happening. Fixes: cdd0b85675ae ("RDMA/rxe: Implement memory access through MWs") Link: https://lore.kernel.org/r/20230517211509.1819998-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 1da044f6b7d4..ee68306555b9 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -489,8 +489,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (mw->access & IB_ZERO_BASED) qp->resp.offset = mw->addr; - rxe_put(mw); rxe_get(mr); + rxe_put(mw); + mw = NULL; } else { mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); if (!mr) { From 42c4e97e06a839b07d834f640a10911ad84ec8b3 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 1 Jun 2023 10:21:21 -0400 Subject: [PATCH 368/934] selinux: don't use make's grouped targets feature yet The Linux Kernel currently only requires make v3.82 while the grouped target functionality requires make v4.3. Removed the grouped target introduced in 4ce1f694eb5d ("selinux: ensure av_permissions.h is built when needed") as well as the multiple header file targets in the make rule. This effectively reverts the problem commit. We will revisit this change when make >= 4.3 is required by the rest of the kernel. Cc: stable@vger.kernel.org Fixes: 4ce1f694eb5d ("selinux: ensure av_permissions.h is built when needed") Reported-by: Erwan Velu Reported-by: Luiz Capitulino Tested-by: Luiz Capitulino Signed-off-by: Paul Moore --- security/selinux/Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/security/selinux/Makefile b/security/selinux/Makefile index 0aecf9334ec3..8b21520bd4b9 100644 --- a/security/selinux/Makefile +++ b/security/selinux/Makefile @@ -26,5 +26,9 @@ quiet_cmd_flask = GEN $(obj)/flask.h $(obj)/av_permissions.h cmd_flask = $< $(obj)/flask.h $(obj)/av_permissions.h targets += flask.h av_permissions.h -$(obj)/flask.h $(obj)/av_permissions.h &: scripts/selinux/genheaders/genheaders FORCE +# once make >= 4.3 is required, we can use grouped targets in the rule below, +# which basically involves adding both headers and a '&' before the colon, see +# the example below: +# $(obj)/flask.h $(obj)/av_permissions.h &: scripts/selinux/... +$(obj)/flask.h: scripts/selinux/genheaders/genheaders FORCE $(call if_changed,flask) From 403e97d6ab2cb6fd0ac1ff968cd7b691771f1613 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Wed, 31 May 2023 13:10:38 +0200 Subject: [PATCH 369/934] dt-bindings: serial: 8250_omap: add rs485-rts-active-high Add rs485-rts-active-high property, this was removed by mistake. In general we just use rs485-rts-active-low property, however the OMAP UART for legacy reason uses the -high one. Fixes: 767d3467eb60 ("dt-bindings: serial: 8250_omap: drop rs485 properties") Closes: https://lore.kernel.org/all/ZGefR4mTHHo1iQ7H@francesco-nb.int.toradex.com/ Signed-off-by: Francesco Dolcini Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230531111038.6302-1-francesco@dolcini.it Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/8250_omap.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/serial/8250_omap.yaml b/Documentation/devicetree/bindings/serial/8250_omap.yaml index eb3488d8f9ee..6a7be42da523 100644 --- a/Documentation/devicetree/bindings/serial/8250_omap.yaml +++ b/Documentation/devicetree/bindings/serial/8250_omap.yaml @@ -70,6 +70,7 @@ properties: dsr-gpios: true rng-gpios: true dcd-gpios: true + rs485-rts-active-high: true rts-gpio: true power-domains: true clock-frequency: true From f9010dbdce911ee1f1af1398a24b1f9f992e0080 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 1 Jun 2023 13:32:32 -0500 Subject: [PATCH 370/934] fork, vhost: Use CLONE_THREAD to fix freezer/ps regression When switching from kthreads to vhost_tasks two bugs were added: 1. The vhost worker tasks's now show up as processes so scripts doing ps or ps a would not incorrectly detect the vhost task as another process. 2. kthreads disabled freeze by setting PF_NOFREEZE, but vhost tasks's didn't disable or add support for them. To fix both bugs, this switches the vhost task to be thread in the process that does the VHOST_SET_OWNER ioctl, and has vhost_worker call get_signal to support SIGKILL/SIGSTOP and freeze signals. Note that SIGKILL/STOP support is required because CLONE_THREAD requires CLONE_SIGHAND which requires those 2 signals to be supported. This is a modified version of the patch written by Mike Christie which was a modified version of patch originally written by Linus. Much of what depended upon PF_IO_WORKER now depends on PF_USER_WORKER. Including ignoring signals, setting up the register state, and having get_signal return instead of calling do_group_exit. Tidied up the vhost_task abstraction so that the definition of vhost_task only needs to be visible inside of vhost_task.c. Making it easier to review the code and tell what needs to be done where. As part of this the main loop has been moved from vhost_worker into vhost_task_fn. vhost_worker now returns true if work was done. The main loop has been updated to call get_signal which handles SIGSTOP, freezing, and collects the message that tells the thread to exit as part of process exit. This collection clears __fatal_signal_pending. This collection is not guaranteed to clear signal_pending() so clear that explicitly so the schedule() sleeps. For now the vhost thread continues to exist and run work until the last file descriptor is closed and the release function is called as part of freeing struct file. To avoid hangs in the coredump rendezvous and when killing threads in a multi-threaded exec. The coredump code and de_thread have been modified to ignore vhost threads. Remvoing the special case for exec appears to require teaching vhost_dev_flush how to directly complete transactions in case the vhost thread is no longer running. Removing the special case for coredump rendezvous requires either the above fix needed for exec or moving the coredump rendezvous into get_signal. Fixes: 6e890c5d5021 ("vhost: use vhost_tasks for worker threads") Signed-off-by: Eric W. Biederman Co-developed-by: Mike Christie Signed-off-by: Mike Christie Acked-by: Michael S. Tsirkin Signed-off-by: Linus Torvalds --- arch/x86/include/asm/fpu/sched.h | 2 +- arch/x86/kernel/fpu/context.h | 2 +- arch/x86/kernel/fpu/core.c | 2 +- drivers/vhost/vhost.c | 22 ++------ fs/coredump.c | 4 +- include/linux/sched/task.h | 1 - include/linux/sched/vhost_task.h | 15 ++---- kernel/exit.c | 5 +- kernel/fork.c | 13 ++--- kernel/signal.c | 8 +-- kernel/vhost_task.c | 92 +++++++++++++++++++++----------- 11 files changed, 89 insertions(+), 77 deletions(-) diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h index c2d6cd78ed0c..78fcde7b1f07 100644 --- a/arch/x86/include/asm/fpu/sched.h +++ b/arch/x86/include/asm/fpu/sched.h @@ -39,7 +39,7 @@ extern void fpu_flush_thread(void); static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { if (cpu_feature_enabled(X86_FEATURE_FPU) && - !(current->flags & (PF_KTHREAD | PF_IO_WORKER))) { + !(current->flags & (PF_KTHREAD | PF_USER_WORKER))) { save_fpregs_to_fpstate(old_fpu); /* * The save operation preserved register state, so the diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h index 9fcfa5c4dad7..af5cbdd9bd29 100644 --- a/arch/x86/kernel/fpu/context.h +++ b/arch/x86/kernel/fpu/context.h @@ -57,7 +57,7 @@ static inline void fpregs_restore_userregs(void) struct fpu *fpu = ¤t->thread.fpu; int cpu = smp_processor_id(); - if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_IO_WORKER))) + if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER))) return; if (!fpregs_state_valid(fpu, cpu)) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index caf33486dc5e..1015af1ae562 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -426,7 +426,7 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask) this_cpu_write(in_kernel_fpu, true); - if (!(current->flags & (PF_KTHREAD | PF_IO_WORKER)) && + if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) && !test_thread_flag(TIF_NEED_FPU_LOAD)) { set_thread_flag(TIF_NEED_FPU_LOAD); save_fpregs_to_fpstate(¤t->thread.fpu); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a92af08e7864..074273020849 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -256,7 +256,7 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) * test_and_set_bit() implies a memory barrier. */ llist_add(&work->node, &dev->worker->work_list); - wake_up_process(dev->worker->vtsk->task); + vhost_task_wake(dev->worker->vtsk); } } EXPORT_SYMBOL_GPL(vhost_work_queue); @@ -333,31 +333,19 @@ static void vhost_vq_reset(struct vhost_dev *dev, __vhost_vq_meta_reset(vq); } -static int vhost_worker(void *data) +static bool vhost_worker(void *data) { struct vhost_worker *worker = data; struct vhost_work *work, *work_next; struct llist_node *node; - for (;;) { - /* mb paired w/ kthread_stop */ - set_current_state(TASK_INTERRUPTIBLE); - - if (vhost_task_should_stop(worker->vtsk)) { - __set_current_state(TASK_RUNNING); - break; - } - - node = llist_del_all(&worker->work_list); - if (!node) - schedule(); - + node = llist_del_all(&worker->work_list); + if (node) { node = llist_reverse_order(node); /* make sure flag is seen after deletion */ smp_wmb(); llist_for_each_entry_safe(work, work_next, node, node) { clear_bit(VHOST_WORK_QUEUED, &work->flags); - __set_current_state(TASK_RUNNING); kcov_remote_start_common(worker->kcov_handle); work->fn(work); kcov_remote_stop(); @@ -365,7 +353,7 @@ static int vhost_worker(void *data) } } - return 0; + return !!node; } static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) diff --git a/fs/coredump.c b/fs/coredump.c index ece7badf701b..88740c51b942 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -371,7 +371,9 @@ static int zap_process(struct task_struct *start, int exit_code) if (t != current && !(t->flags & PF_POSTCOREDUMP)) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); - nr++; + /* The vhost_worker does not particpate in coredumps */ + if ((t->flags & (PF_USER_WORKER | PF_IO_WORKER)) != PF_USER_WORKER) + nr++; } } diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 537cbf9a2ade..e0f5ac90a228 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -29,7 +29,6 @@ struct kernel_clone_args { u32 io_thread:1; u32 user_worker:1; u32 no_files:1; - u32 ignore_signals:1; unsigned long stack; unsigned long stack_size; unsigned long tls; diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h index 6123c10b99cf..837a23624a66 100644 --- a/include/linux/sched/vhost_task.h +++ b/include/linux/sched/vhost_task.h @@ -2,22 +2,13 @@ #ifndef _LINUX_VHOST_TASK_H #define _LINUX_VHOST_TASK_H -#include -struct task_struct; +struct vhost_task; -struct vhost_task { - int (*fn)(void *data); - void *data; - struct completion exited; - unsigned long flags; - struct task_struct *task; -}; - -struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg, +struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, const char *name); void vhost_task_start(struct vhost_task *vtsk); void vhost_task_stop(struct vhost_task *vtsk); -bool vhost_task_should_stop(struct vhost_task *vtsk); +void vhost_task_wake(struct vhost_task *vtsk); #endif diff --git a/kernel/exit.c b/kernel/exit.c index 34b90e2e7cf7..edb50b4c9972 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -411,7 +411,10 @@ static void coredump_task_exit(struct task_struct *tsk) tsk->flags |= PF_POSTCOREDUMP; core_state = tsk->signal->core_state; spin_unlock_irq(&tsk->sighand->siglock); - if (core_state) { + + /* The vhost_worker does not particpate in coredumps */ + if (core_state && + ((tsk->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)) { struct core_thread self; self.task = current; diff --git a/kernel/fork.c b/kernel/fork.c index ed4e01daccaa..81cba91f30bb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2336,16 +2336,16 @@ __latent_entropy struct task_struct *copy_process( p->flags &= ~PF_KTHREAD; if (args->kthread) p->flags |= PF_KTHREAD; - if (args->user_worker) - p->flags |= PF_USER_WORKER; - if (args->io_thread) { + if (args->user_worker) { /* - * Mark us an IO worker, and block any signal that isn't + * Mark us a user worker, and block any signal that isn't * fatal or STOP */ - p->flags |= PF_IO_WORKER; + p->flags |= PF_USER_WORKER; siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); } + if (args->io_thread) + p->flags |= PF_IO_WORKER; if (args->name) strscpy_pad(p->comm, args->name, sizeof(p->comm)); @@ -2517,9 +2517,6 @@ __latent_entropy struct task_struct *copy_process( if (retval) goto bad_fork_cleanup_io; - if (args->ignore_signals) - ignore_signals(p); - stackleak_task_init(p); if (pid != &init_struct_pid) { diff --git a/kernel/signal.c b/kernel/signal.c index 8f6330f0e9ca..2547fa73bde5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1368,7 +1368,9 @@ int zap_other_threads(struct task_struct *p) while_each_thread(p, t) { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); - count++; + /* Don't require de_thread to wait for the vhost_worker */ + if ((t->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER) + count++; /* Don't bother with already dead threads */ if (t->exit_state) @@ -2861,11 +2863,11 @@ relock: } /* - * PF_IO_WORKER threads will catch and exit on fatal signals + * PF_USER_WORKER threads will catch and exit on fatal signals * themselves. They have cleanup that must be performed, so * we cannot call do_exit() on their behalf. */ - if (current->flags & PF_IO_WORKER) + if (current->flags & PF_USER_WORKER) goto out; /* diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index b7cbd66f889e..f80d5c51ae67 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -12,58 +12,88 @@ enum vhost_task_flags { VHOST_TASK_FLAGS_STOP, }; +struct vhost_task { + bool (*fn)(void *data); + void *data; + struct completion exited; + unsigned long flags; + struct task_struct *task; +}; + static int vhost_task_fn(void *data) { struct vhost_task *vtsk = data; - int ret; + bool dead = false; + + for (;;) { + bool did_work; + + /* mb paired w/ vhost_task_stop */ + if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) + break; + + if (!dead && signal_pending(current)) { + struct ksignal ksig; + /* + * Calling get_signal will block in SIGSTOP, + * or clear fatal_signal_pending, but remember + * what was set. + * + * This thread won't actually exit until all + * of the file descriptors are closed, and + * the release function is called. + */ + dead = get_signal(&ksig); + if (dead) + clear_thread_flag(TIF_SIGPENDING); + } + + did_work = vtsk->fn(vtsk->data); + if (!did_work) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + } - ret = vtsk->fn(vtsk->data); complete(&vtsk->exited); - do_exit(ret); + do_exit(0); } +/** + * vhost_task_wake - wakeup the vhost_task + * @vtsk: vhost_task to wake + * + * wake up the vhost_task worker thread + */ +void vhost_task_wake(struct vhost_task *vtsk) +{ + wake_up_process(vtsk->task); +} +EXPORT_SYMBOL_GPL(vhost_task_wake); + /** * vhost_task_stop - stop a vhost_task * @vtsk: vhost_task to stop * - * Callers must call vhost_task_should_stop and return from their worker - * function when it returns true; + * vhost_task_fn ensures the worker thread exits after + * VHOST_TASK_FLAGS_SOP becomes true. */ void vhost_task_stop(struct vhost_task *vtsk) { - pid_t pid = vtsk->task->pid; - set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags); - wake_up_process(vtsk->task); + vhost_task_wake(vtsk); /* * Make sure vhost_task_fn is no longer accessing the vhost_task before - * freeing it below. If userspace crashed or exited without closing, - * then the vhost_task->task could already be marked dead so - * kernel_wait will return early. + * freeing it below. */ wait_for_completion(&vtsk->exited); - /* - * If we are just closing/removing a device and the parent process is - * not exiting then reap the task. - */ - kernel_wait4(pid, NULL, __WCLONE, NULL); kfree(vtsk); } EXPORT_SYMBOL_GPL(vhost_task_stop); /** - * vhost_task_should_stop - should the vhost task return from the work function - * @vtsk: vhost_task to stop - */ -bool vhost_task_should_stop(struct vhost_task *vtsk) -{ - return test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags); -} -EXPORT_SYMBOL_GPL(vhost_task_should_stop); - -/** - * vhost_task_create - create a copy of a process to be used by the kernel - * @fn: thread stack + * vhost_task_create - create a copy of a task to be used by the kernel + * @fn: vhost worker function * @arg: data to be passed to fn * @name: the thread's name * @@ -71,17 +101,17 @@ EXPORT_SYMBOL_GPL(vhost_task_should_stop); * failure. The returned task is inactive, and the caller must fire it up * through vhost_task_start(). */ -struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg, +struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, const char *name) { struct kernel_clone_args args = { - .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM, + .flags = CLONE_FS | CLONE_UNTRACED | CLONE_VM | + CLONE_THREAD | CLONE_SIGHAND, .exit_signal = 0, .fn = vhost_task_fn, .name = name, .user_worker = 1, .no_files = 1, - .ignore_signals = 1, }; struct vhost_task *vtsk; struct task_struct *tsk; From fadb74f9f2f609238070c7ca1b04933dc9400e4a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 1 Jun 2023 14:23:31 -0700 Subject: [PATCH 371/934] module/decompress: Fix error checking on zstd decompression While implementing support for in-kernel decompression in kmod, finit_module() was returning a very suspicious value: finit_module(3, "", MODULE_INIT_COMPRESSED_FILE) = 18446744072717407296 It turns out the check for module_get_next_page() failing is wrong, and hence the decompression was not really taking place. Invert the condition to fix it. Fixes: 169a58ad824d ("module/decompress: Support zstd in-kernel decompression") Cc: stable@kernel.org Cc: Luis Chamberlain Cc: Dmitry Torokhov Cc: Stephen Boyd Signed-off-by: Lucas De Marchi Signed-off-by: Luis Chamberlain --- kernel/module/decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/module/decompress.c b/kernel/module/decompress.c index e97232b125eb..8a5d6d63b06c 100644 --- a/kernel/module/decompress.c +++ b/kernel/module/decompress.c @@ -257,7 +257,7 @@ static ssize_t module_zstd_decompress(struct load_info *info, do { struct page *page = module_get_next_page(info); - if (!IS_ERR(page)) { + if (IS_ERR(page)) { retval = PTR_ERR(page); goto out; } From 18e7e3e4217083a682e2c7282011c70c8a1ba070 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 29 May 2023 11:35:26 -0400 Subject: [PATCH 372/934] RDMA/bnxt_re: Fix reporting active_{speed,width} attributes After commit 6d758147c7b8 ("RDMA/bnxt_re: Use auxiliary driver interface") the active_{speed, width} attributes are reported incorrectly, This is happening because ib_get_eth_speed() is called only once from bnxt_re_ib_init() - Fix this issue by calling ib_get_eth_speed() from bnxt_re_query_port(). Fixes: 6d758147c7b8 ("RDMA/bnxt_re: Use auxiliary driver interface") Link: https://lore.kernel.org/r/20230529153525.87254-1-kheib@redhat.com Signed-off-by: Kamal Heib Acked-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 2 -- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 7 ++++--- drivers/infiniband/hw/bnxt_re/main.c | 2 -- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 5a2baf49ecaa..2c95e6f3d47a 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -135,8 +135,6 @@ struct bnxt_re_dev { struct delayed_work worker; u8 cur_prio_map; - u16 active_speed; - u8 active_width; /* FP Notification Queue (CQ & SRQ) */ struct tasklet_struct nq_task; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b1c36412025f..952811c40c54 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -199,6 +199,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num, { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + int rc; memset(port_attr, 0, sizeof(*port_attr)); @@ -228,10 +229,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num, port_attr->sm_sl = 0; port_attr->subnet_timeout = 0; port_attr->init_type_reply = 0; - port_attr->active_speed = rdev->active_speed; - port_attr->active_width = rdev->active_width; + rc = ib_get_eth_speed(&rdev->ibdev, port_num, &port_attr->active_speed, + &port_attr->active_width); - return 0; + return rc; } int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index e34eccd178d0..3073398a2183 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1077,8 +1077,6 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) return rc; } dev_info(rdev_to_dev(rdev), "Device registered with IB successfully"); - ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, - &rdev->active_width); set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ? From 835e5ac3f98e78eca5c512bd48bd1880b90c4eb1 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 28 Apr 2023 14:01:19 +0200 Subject: [PATCH 373/934] riscv: Fix huge_ptep_set_wrprotect when PTE is a NAPOT We need to avoid inconsistencies across the PTEs that form a NAPOT region, so when we write protect such a region, we should clear and flush all the PTEs to make sure that any of those PTEs is not cached which would result in such inconsistencies (arm64 does the same). Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230428120120.21620-1-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/hugetlbpage.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index a163a3e0f0d4..238d00bdac14 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -218,6 +218,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, { pte_t pte = ptep_get(ptep); unsigned long order; + pte_t orig_pte; int i, pte_num; if (!pte_napot(pte)) { @@ -228,9 +229,12 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, order = napot_cont_order(pte); pte_num = napot_pte_num(order); ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); + orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); + + orig_pte = pte_wrprotect(orig_pte); for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) - ptep_set_wrprotect(mm, addr, ptep); + set_pte_at(mm, addr, ptep, orig_pte); } pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, From 6966d7988c4fb6af3e395868e9800c07f9e98a30 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Fri, 28 Apr 2023 14:01:20 +0200 Subject: [PATCH 374/934] riscv: Implement missing huge_ptep_get huge_ptep_get must be reimplemented in order to go through all the PTEs of a NAPOT region: this is needed because the HW can update the A/D bits of any of the PTE that constitutes the NAPOT region. Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20230428120120.21620-2-alexghiti@rivosinc.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hugetlb.h | 3 +++ arch/riscv/mm/hugetlbpage.c | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index fe6f23006641..ce1ebda1a49a 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -36,6 +36,9 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); +#define __HAVE_ARCH_HUGE_PTEP_GET +pte_t huge_ptep_get(pte_t *ptep); + pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags); #define arch_make_huge_pte arch_make_huge_pte diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 238d00bdac14..e0ef56dc57b9 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -3,6 +3,30 @@ #include #ifdef CONFIG_RISCV_ISA_SVNAPOT +pte_t huge_ptep_get(pte_t *ptep) +{ + unsigned long pte_num; + int i; + pte_t orig_pte = ptep_get(ptep); + + if (!pte_present(orig_pte) || !pte_napot(orig_pte)) + return orig_pte; + + pte_num = napot_pte_num(napot_cont_order(orig_pte)); + + for (i = 0; i < pte_num; i++, ptep++) { + pte_t pte = ptep_get(ptep); + + if (pte_dirty(pte)) + orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); + } + + return orig_pte; +} + pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, From ed779fe4c9b5a20b4ab4fd6f3e19807445bb78c7 Mon Sep 17 00:00:00 2001 From: Qingfang DENG Date: Thu, 1 Jun 2023 09:54:32 +0800 Subject: [PATCH 375/934] neighbour: fix unaligned access to pneigh_entry After the blamed commit, the member key is longer 4-byte aligned. On platforms that do not support unaligned access, e.g., MIPS32R2 with unaligned_action set to 1, this will trigger a crash when accessing an IPv6 pneigh_entry, as the key is cast to an in6_addr pointer. Change the type of the key to u32 to make it aligned. Fixes: 62dd93181aaa ("[IPV6] NDISC: Set per-entry is_router flag in Proxy NA.") Signed-off-by: Qingfang DENG Link: https://lore.kernel.org/r/20230601015432.159066-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 3fa5774bddac..f6a8ecc6b1fa 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -180,7 +180,7 @@ struct pneigh_entry { netdevice_tracker dev_tracker; u32 flags; u8 protocol; - u8 key[]; + u32 key[]; }; /* From 5a59a58ec25d44f853c26bdbfda47d73b3067435 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Wed, 31 May 2023 16:38:26 +0200 Subject: [PATCH 376/934] net: dsa: lan9303: allow vid != 0 in port_fdb_{add|del} methods LAN9303 doesn't associate FDB (ALR) entries with VLANs, it has just one global Address Logic Resolution table [1]. Ignore VID in port_fdb_{add|del} methods, go on with the global table. This is the same semantics as hellcreek or RZ/N1 implement. Visible symptoms: LAN9303_MDIO 5b050000.ethernet-1:00: port 2 failed to delete 00:xx:xx:xx:xx:cf vid 1 from fdb: -2 LAN9303_MDIO 5b050000.ethernet-1:00: port 2 failed to add 00:xx:xx:xx:xx:cf vid 1 to fdb: -95 [1] https://ww1.microchip.com/downloads/en/DeviceDoc/00002308A.pdf Fixes: 0620427ea0d6 ("net: dsa: lan9303: Add fdb/mdb manipulation") Signed-off-by: Alexander Sverdlin Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230531143826.477267-1-alexander.sverdlin@siemens.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lan9303-core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index cbe831875347..c0215a8770f4 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1188,8 +1188,6 @@ static int lan9303_port_fdb_add(struct dsa_switch *ds, int port, struct lan9303 *chip = ds->priv; dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, addr, vid); - if (vid) - return -EOPNOTSUPP; return lan9303_alr_add_port(chip, addr, port, false); } @@ -1201,8 +1199,6 @@ static int lan9303_port_fdb_del(struct dsa_switch *ds, int port, struct lan9303 *chip = ds->priv; dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, addr, vid); - if (vid) - return -EOPNOTSUPP; lan9303_alr_del_port(chip, addr, port); return 0; From e209fee4118fe9a449d4d805361eb2de6796be39 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Thu, 1 Jun 2023 12:13:05 +0900 Subject: [PATCH 377/934] net/ipv4: ping_group_range: allow GID from 2147483648 to 4294967294 With this commit, all the GIDs ("0 4294967294") can be written to the "net.ipv4.ping_group_range" sysctl. Note that 4294967295 (0xffffffff) is an invalid GID (see gid_valid() in include/linux/uidgid.h), and an attempt to register this number will cause -EINVAL. Prior to this commit, only up to GID 2147483647 could be covered. Documentation/networking/ip-sysctl.rst had "0 4294967295" as an example value, but this example was wrong and causing -EINVAL. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Co-developed-by: Kuniyuki Iwashima Signed-off-by: Kuniyuki Iwashima Signed-off-by: Akihiro Suda Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 4 ++-- include/net/ping.h | 6 +----- net/ipv4/sysctl_net_ipv4.c | 8 ++++---- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 6ec06a33688a..80b8f73a0244 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1352,8 +1352,8 @@ ping_group_range - 2 INTEGERS Restrict ICMP_PROTO datagram sockets to users in the group range. The default is "1 0", meaning, that nobody (not even root) may create ping sockets. Setting it to "100 100" would grant permissions - to the single group. "0 4294967295" would enable it for the world, "100 - 4294967295" would enable it for the users, but not daemons. + to the single group. "0 4294967294" would enable it for the world, "100 + 4294967294" would enable it for the users, but not daemons. tcp_early_demux - BOOLEAN Enable early demux for established TCP sockets. diff --git a/include/net/ping.h b/include/net/ping.h index 9233ad3de0ad..bc7779262e60 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -16,11 +16,7 @@ #define PING_HTABLE_SIZE 64 #define PING_HTABLE_MASK (PING_HTABLE_SIZE-1) -/* - * gid_t is either uint or ushort. We want to pass it to - * proc_dointvec_minmax(), so it must not be larger than MAX_INT - */ -#define GID_T_MAX (((gid_t)~0U) >> 1) +#define GID_T_MAX (((gid_t)~0U) - 1) /* Compatibility glue so we can support IPv6 when it's compiled as a module */ struct pingv6_ops { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 40fe70fc2015..88dfe51e68f3 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -34,8 +34,8 @@ static int ip_ttl_min = 1; static int ip_ttl_max = 255; static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; -static int ip_ping_group_range_min[] = { 0, 0 }; -static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; +static unsigned long ip_ping_group_range_min[] = { 0, 0 }; +static unsigned long ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600; static u32 fib_multipath_hash_fields_all_mask __maybe_unused = @@ -165,7 +165,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, { struct user_namespace *user_ns = current_user_ns(); int ret; - gid_t urange[2]; + unsigned long urange[2]; kgid_t low, high; struct ctl_table tmp = { .data = &urange, @@ -178,7 +178,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, inet_get_ping_group_range_table(table, &low, &high); urange[0] = from_kgid_munged(user_ns, low); urange[1] = from_kgid_munged(user_ns, high); - ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { low = make_kgid(user_ns, urange[0]); From f93b30e50a81a16cefa262b68bde5c4096ea9235 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 1 Jun 2023 11:30:02 +0800 Subject: [PATCH 378/934] net: systemport: Replace platform_get_irq with platform_get_irq_optional Replace platform_get_irq with platform_get_irq_optional because wol_irq is optional. Signed-off-by: Jiasheng Jiang Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 38d0cdaf22a5..bf1611cce974 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2531,9 +2531,9 @@ static int bcm_sysport_probe(struct platform_device *pdev) priv->irq0 = platform_get_irq(pdev, 0); if (!priv->is_lite) { priv->irq1 = platform_get_irq(pdev, 1); - priv->wol_irq = platform_get_irq(pdev, 2); + priv->wol_irq = platform_get_irq_optional(pdev, 2); } else { - priv->wol_irq = platform_get_irq(pdev, 1); + priv->wol_irq = platform_get_irq_optional(pdev, 1); } if (priv->irq0 <= 0 || (priv->irq1 <= 0 && !priv->is_lite)) { ret = -EINVAL; From f1832e2b5e498e258b090af3b065b85cf8cc5161 Mon Sep 17 00:00:00 2001 From: Jerry Meng Date: Wed, 31 May 2023 11:51:16 +0800 Subject: [PATCH 379/934] USB: serial: option: add Quectel EM061KGL series Add support for Quectel EM061KGL series which are based on Qualcomm SDX12 chip: EM061KGL_LTA(0x2c7c / 0x0123): MBIM + GNSS + DIAG + NMEA + AT + QDSS + DPL EM061KGL_LMS(0x2c7c / 0x0124): MBIM + GNSS + DIAG + NMEA + AT + QDSS + DPL EM061KGL_LWW(0x2c7c / 0x6008): MBIM + GNSS + DIAG + NMEA + AT + QDSS + DPL EM061KGL_LCN(0x2c7c / 0x6009): MBIM + GNSS + DIAG + NMEA + AT + QDSS + DPL Above products use the exact same interface layout and option driver is for interfaces DIAG, NMEA and AT. T: Bus=03 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=6008 Rev= 5.04 S: Manufacturer=Quectel S: Product=Quectel EM061K-GL S: SerialNumber=f6fa08b6 C:* #Ifs= 8 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=70 Driver=(none) E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=80 Driver=(none) E: Ad=8f(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Jerry Meng Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 644a55447fd7..fd42e3a0bd18 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -248,6 +248,8 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_VENDOR_ID 0x2c7c /* These Quectel products use Quectel's vendor ID */ #define QUECTEL_PRODUCT_EC21 0x0121 +#define QUECTEL_PRODUCT_EM061K_LTA 0x0123 +#define QUECTEL_PRODUCT_EM061K_LMS 0x0124 #define QUECTEL_PRODUCT_EC25 0x0125 #define QUECTEL_PRODUCT_EG91 0x0191 #define QUECTEL_PRODUCT_EG95 0x0195 @@ -266,6 +268,8 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_RM520N 0x0801 #define QUECTEL_PRODUCT_EC200U 0x0901 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 +#define QUECTEL_PRODUCT_EM061K_LWW 0x6008 +#define QUECTEL_PRODUCT_EM061K_LCN 0x6009 #define QUECTEL_PRODUCT_EC200T 0x6026 #define QUECTEL_PRODUCT_RM500K 0x7001 @@ -1189,6 +1193,18 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LMS, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LTA, 0xff, 0xff, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0x00, 0x40) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LWW, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, From 37a826d86ff746c4eac8bd3415af19f3c9598206 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 1 Jun 2023 23:31:04 +0200 Subject: [PATCH 380/934] net: dsa: qca8k: add CONFIG_LEDS_TRIGGERS dependency Without LED triggers, the driver now fails to build: drivers/net/dsa/qca/qca8k-leds.c: In function 'qca8k_parse_port_leds': drivers/net/dsa/qca/qca8k-leds.c:403:31: error: 'struct led_classdev' has no member named 'hw_control_is_supported' 403 | port_led->cdev.hw_control_is_supported = qca8k_cled_hw_control_is_supported; | ^ There is a mix of 'depends on' and 'select' for LEDS_TRIGGERS, so it's not clear what we should use here, but in general using 'depends on' causes fewer problems, so use that. Fixes: e0256648c831a ("net: dsa: qca8k: implement hw_control ops") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/dsa/qca/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/qca/Kconfig b/drivers/net/dsa/qca/Kconfig index 4347b42c50fd..de9da469908b 100644 --- a/drivers/net/dsa/qca/Kconfig +++ b/drivers/net/dsa/qca/Kconfig @@ -20,6 +20,7 @@ config NET_DSA_QCA8K_LEDS_SUPPORT bool "Qualcomm Atheros QCA8K Ethernet switch family LEDs support" depends on NET_DSA_QCA8K depends on LEDS_CLASS=y || LEDS_CLASS=NET_DSA_QCA8K + depends on LEDS_TRIGGERS help This enabled support for LEDs present on the Qualcomm Atheros QCA8K Ethernet switch chips. From 635071f5fee31550e921644b2becc42b3ff1036c Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Thu, 1 Jun 2023 19:19:35 -0600 Subject: [PATCH 381/934] ASoC: simple-card-utils: fix PCM constraint error check The code in asoc_simple_startup was treating any non-zero return from snd_pcm_hw_constraint_minmax as an error, when this can return 1 in some normal cases and only negative values indicate an error. When this happened, it caused asoc_simple_startup to disable the clocks it just enabled and return 1, which was not treated as an error by the calling code which only checks for negative return values. Then when the PCM is eventually shut down, it causes the clock framework to complain about disabling clocks that were not enabled. Fix the check for snd_pcm_hw_constraint_minmax return value to only treat negative values as an error. Fixes: 5ca2ab459817 ("ASoC: simple-card-utils: Add new system-clock-fixed flag") Signed-off-by: Robert Hancock Link: https://lore.kernel.org/r/20230602011936.231931-1-robert.hancock@calian.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index 467edd96eae5..e5ff61c1e9d1 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -314,7 +314,7 @@ int asoc_simple_startup(struct snd_pcm_substream *substream) } ret = snd_pcm_hw_constraint_minmax(substream->runtime, SNDRV_PCM_HW_PARAM_RATE, fixed_rate, fixed_rate); - if (ret) + if (ret < 0) goto codec_err; } From 32cf0046a652116d6a216d575f3049a9ff9dd80d Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Tue, 30 May 2023 18:30:12 +0800 Subject: [PATCH 382/934] ASoC: fsl_sai: Enable BCI bit if SAI works on synchronous mode with BYP asserted There's an issue on SAI synchronous mode that TX/RX side can't get BCLK from RX/TX it sync with if BYP bit is asserted. It's a workaround to fix it that enable SION of IOMUX pad control and assert BCI. For example if TX sync with RX which means both TX and RX are using clk form RX and BYP=1. TX can get BCLK only if the following two conditions are valid: 1. SION of RX BCLK IOMUX pad is set to 1 2. BCI of TX is set to 1 Signed-off-by: Chancel Liu Acked-by: Shengjiu Wang Link: https://lore.kernel.org/r/20230530103012.3448838-1-chancel.liu@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 11 +++++++++-- sound/soc/fsl/fsl_sai.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index abdaffb00fbd..e3105d48fb65 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -491,14 +491,21 @@ static int fsl_sai_set_bclk(struct snd_soc_dai *dai, bool tx, u32 freq) regmap_update_bits(sai->regmap, reg, FSL_SAI_CR2_MSEL_MASK, FSL_SAI_CR2_MSEL(sai->mclk_id[tx])); - if (savediv == 1) + if (savediv == 1) { regmap_update_bits(sai->regmap, reg, FSL_SAI_CR2_DIV_MASK | FSL_SAI_CR2_BYP, FSL_SAI_CR2_BYP); - else + if (fsl_sai_dir_is_synced(sai, adir)) + regmap_update_bits(sai->regmap, FSL_SAI_xCR2(tx, ofs), + FSL_SAI_CR2_BCI, FSL_SAI_CR2_BCI); + else + regmap_update_bits(sai->regmap, FSL_SAI_xCR2(tx, ofs), + FSL_SAI_CR2_BCI, 0); + } else { regmap_update_bits(sai->regmap, reg, FSL_SAI_CR2_DIV_MASK | FSL_SAI_CR2_BYP, savediv / 2 - 1); + } if (sai->soc_data->max_register >= FSL_SAI_MCTL) { /* SAI is in master mode at this point, so enable MCLK */ diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h index 197748a888d5..a53c4f0e25fa 100644 --- a/sound/soc/fsl/fsl_sai.h +++ b/sound/soc/fsl/fsl_sai.h @@ -116,6 +116,7 @@ /* SAI Transmit and Receive Configuration 2 Register */ #define FSL_SAI_CR2_SYNC BIT(30) +#define FSL_SAI_CR2_BCI BIT(28) #define FSL_SAI_CR2_MSEL_MASK (0x3 << 26) #define FSL_SAI_CR2_MSEL_BUS 0 #define FSL_SAI_CR2_MSEL_MCLK1 BIT(26) From f3c3762178fad5cf5dc108fb061ba010818323b4 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 19 May 2023 18:14:40 +0100 Subject: [PATCH 383/934] arm64: Remove the ARCH_FORCE_MAX_ORDER config input prompt Commit 34affcd7577a ("arm64: drop ranges in definition of ARCH_FORCE_MAX_ORDER") dropped the ranges from the config entry and introduced an EXPERT condition on the input prompt instead. However, starting with defconfig (ARCH_FORCE_MAX_ORDER of 10) and setting ARM64_64K_PAGES together with EXPERT leaves MAX_ORDER 10 which fails to build in this configuration. Drop the input prompt for ARCH_FORCE_MAX_ORDER completely so that it's no longer configurable. People requiring a higher MAX_ORDER should send a patch changing the default, together with proper justification. Fixes: 34affcd7577a ("arm64: drop ranges in definition of ARCH_FORCE_MAX_ORDER") Signed-off-by: Catalin Marinas Reported-by: Marc Zyngier Cc: Will Deacon Cc: Mike Rapoport Cc: Andrew Morton Cc: Justin M. Forbes Reviewed-by: Anshuman Khandual Acked-by: Marc Zyngier Acked-by: Mike Rapoport (IBM) Link: https://lore.kernel.org/r/20230519171440.1941213-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1201d25a8a4..343e1e1cae10 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1516,7 +1516,7 @@ config XEN # 16K | 27 | 14 | 13 | 11 | # 64K | 29 | 16 | 13 | 13 | config ARCH_FORCE_MAX_ORDER - int "Order of maximal physically contiguous allocations" if EXPERT && (ARM64_4K_PAGES || ARM64_16K_PAGES) + int default "13" if ARM64_64K_PAGES default "11" if ARM64_16K_PAGES default "10" From 0e2aba694866b451db0932a6706683c48379134c Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 May 2023 21:13:05 +0800 Subject: [PATCH 384/934] arm64: mm: pass original fault address to handle_mm_fault() in PER_VMA_LOCK block When reading the arm64's PER_VMA_LOCK support code, I found a bit difference between arm64 and other arch when calling handle_mm_fault() during VMA lock-based page fault handling: the fault address is masked before passing to handle_mm_fault(). This is also different from the usage in mmap_lock-based handling. I think we need to pass the original fault address to handle_mm_fault() as we did in commit 84c5e23edecd ("arm64: mm: Pass original fault address to handle_mm_fault()"). If we go through the code path further, we can find that the "masked" fault address can cause mismatched fault address between perf sw major/minor page fault sw event and perf page fault sw event: do_page_fault perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, ..., addr) // orig addr handle_mm_fault mm_account_fault perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, ...) // masked addr Fixes: cd7f176aea5f ("arm64/mm: try VMA lock-based page fault handling first") Signed-off-by: Jisheng Zhang Reviewed-by: Suren Baghdasaryan Reviewed-by: Anshuman Khandual Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20230524131305.2808-1-jszhang@kernel.org Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index cb21ccd7940d..6045a5117ac1 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -600,8 +600,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, vma_end_read(vma); goto lock_mmap; } - fault = handle_mm_fault(vma, addr & PAGE_MASK, - mm_flags | FAULT_FLAG_VMA_LOCK, regs); + fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); vma_end_read(vma); if (!(fault & VM_FAULT_RETRY)) { From c3d03e8e35e005e1a614e51bb59053eeb5857f76 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Thu, 8 Dec 2022 10:56:46 +0100 Subject: [PATCH 385/934] KEYS: asymmetric: Copy sig and digest in public_key_verify_signature() Commit ac4e97abce9b8 ("scatterlist: sg_set_buf() argument must be in linear mapping") checks that both the signature and the digest reside in the linear mapping area. However, more recently commit ba14a194a434c ("fork: Add generic vmalloced stack support") made it possible to move the stack in the vmalloc area, which is not contiguous, and thus not suitable for sg_set_buf() which needs adjacent pages. Always make a copy of the signature and digest in the same buffer used to store the key and its parameters, and pass them to sg_init_one(). Prefer it to conditionally doing the copy if necessary, to keep the code simple. The buffer allocated with kmalloc() is in the linear mapping area. Cc: stable@vger.kernel.org # 4.9.x Fixes: ba14a194a434 ("fork: Add generic vmalloced stack support") Link: https://lore.kernel.org/linux-integrity/Y4pIpxbjBdajymBJ@sol.localdomain/ Suggested-by: Eric Biggers Signed-off-by: Roberto Sassu Reviewed-by: Eric Biggers Tested-by: Stefan Berger --- crypto/asymmetric_keys/public_key.c | 38 ++++++++++++++++------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index eca5671ad3f2..50c933f86b21 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -380,9 +380,10 @@ int public_key_verify_signature(const struct public_key *pkey, struct crypto_wait cwait; struct crypto_akcipher *tfm; struct akcipher_request *req; - struct scatterlist src_sg[2]; + struct scatterlist src_sg; char alg_name[CRYPTO_MAX_ALG_NAME]; - char *key, *ptr; + char *buf, *ptr; + size_t buf_len; int ret; pr_devel("==>%s()\n", __func__); @@ -420,34 +421,37 @@ int public_key_verify_signature(const struct public_key *pkey, if (!req) goto error_free_tfm; - key = kmalloc(pkey->keylen + sizeof(u32) * 2 + pkey->paramlen, - GFP_KERNEL); - if (!key) + buf_len = max_t(size_t, pkey->keylen + sizeof(u32) * 2 + pkey->paramlen, + sig->s_size + sig->digest_size); + + buf = kmalloc(buf_len, GFP_KERNEL); + if (!buf) goto error_free_req; - memcpy(key, pkey->key, pkey->keylen); - ptr = key + pkey->keylen; + memcpy(buf, pkey->key, pkey->keylen); + ptr = buf + pkey->keylen; ptr = pkey_pack_u32(ptr, pkey->algo); ptr = pkey_pack_u32(ptr, pkey->paramlen); memcpy(ptr, pkey->params, pkey->paramlen); if (pkey->key_is_private) - ret = crypto_akcipher_set_priv_key(tfm, key, pkey->keylen); + ret = crypto_akcipher_set_priv_key(tfm, buf, pkey->keylen); else - ret = crypto_akcipher_set_pub_key(tfm, key, pkey->keylen); + ret = crypto_akcipher_set_pub_key(tfm, buf, pkey->keylen); if (ret) - goto error_free_key; + goto error_free_buf; if (strcmp(pkey->pkey_algo, "sm2") == 0 && sig->data_size) { ret = cert_sig_digest_update(sig, tfm); if (ret) - goto error_free_key; + goto error_free_buf; } - sg_init_table(src_sg, 2); - sg_set_buf(&src_sg[0], sig->s, sig->s_size); - sg_set_buf(&src_sg[1], sig->digest, sig->digest_size); - akcipher_request_set_crypt(req, src_sg, NULL, sig->s_size, + memcpy(buf, sig->s, sig->s_size); + memcpy(buf + sig->s_size, sig->digest, sig->digest_size); + + sg_init_one(&src_sg, buf, sig->s_size + sig->digest_size); + akcipher_request_set_crypt(req, &src_sg, NULL, sig->s_size, sig->digest_size); crypto_init_wait(&cwait); akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | @@ -455,8 +459,8 @@ int public_key_verify_signature(const struct public_key *pkey, crypto_req_done, &cwait); ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait); -error_free_key: - kfree(key); +error_free_buf: + kfree(buf); error_free_req: akcipher_request_free(req); error_free_tfm: From b0fd1852bcc21accca6260ef245356d5c141ff66 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Fri, 2 Jun 2023 02:26:12 +0200 Subject: [PATCH 386/934] bpf: Fix UAF in task local storage When task local storage was generalized for tracing programs, the bpf_task_local_storage callback was moved from a BPF LSM hook callback for security_task_free LSM hook to it's own callback. But a failure case in bad_fork_cleanup_security was missed which, when triggered, led to a dangling task owner pointer and a subsequent use-after-free. Move the bpf_task_storage_free to the very end of free_task to handle all failure cases. This issue was noticed when a BPF LSM program was attached to the task_alloc hook on a kernel with KASAN enabled. The program used bpf_task_storage_get to copy the task local storage from the current task to the new task being created. Fixes: a10787e6d58c ("bpf: Enable task local storage for tracing programs") Reported-by: Kuba Piecuch Signed-off-by: KP Singh Acked-by: Song Liu Link: https://lore.kernel.org/r/20230602002612.1117381-1-kpsingh@kernel.org Signed-off-by: Martin KaFai Lau --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index ed4e01daccaa..cb20f9f596d3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -627,6 +627,7 @@ void free_task(struct task_struct *tsk) arch_release_task_struct(tsk); if (tsk->flags & PF_KTHREAD) free_kthread_struct(tsk); + bpf_task_storage_free(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -979,7 +980,6 @@ void __put_task_struct(struct task_struct *tsk) cgroup_free(tsk); task_numa_free(tsk, true); security_task_free(tsk); - bpf_task_storage_free(tsk); exit_creds(tsk); delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); From f1a411873c85b642f13b01f21b534c2bab81fc1b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 28 May 2023 00:23:09 +0900 Subject: [PATCH 387/934] ksmbd: fix out-of-bound read in deassemble_neg_contexts() The check in the beginning is `clen + sizeof(struct smb2_neg_context) <= len_of_ctxts`, but in the end of loop, `len_of_ctxts` will subtract `((clen + 7) & ~0x7) + sizeof(struct smb2_neg_context)`, which causes integer underflow when clen does the 8 alignment. We should use `(clen + 7) & ~0x7` in the check to avoid underflow from happening. Then there are some variables that need to be declared unsigned instead of signed. [ 11.671070] BUG: KASAN: slab-out-of-bounds in smb2_handle_negotiate+0x799/0x1610 [ 11.671533] Read of size 2 at addr ffff888005e86cf2 by task kworker/0:0/7 ... [ 11.673383] Call Trace: [ 11.673541] [ 11.673679] dump_stack_lvl+0x33/0x50 [ 11.673913] print_report+0xcc/0x620 [ 11.674671] kasan_report+0xae/0xe0 [ 11.675171] kasan_check_range+0x35/0x1b0 [ 11.675412] smb2_handle_negotiate+0x799/0x1610 [ 11.676217] ksmbd_smb_negotiate_common+0x526/0x770 [ 11.676795] handle_ksmbd_work+0x274/0x810 ... Cc: stable@vger.kernel.org Signed-off-by: Chih-Yen Chang Tested-by: Chih-Yen Chang Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 7a81541de602..25c0ba04c59d 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -963,13 +963,13 @@ static void decode_sign_cap_ctxt(struct ksmbd_conn *conn, static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn, struct smb2_negotiate_req *req, - int len_of_smb) + unsigned int len_of_smb) { /* +4 is to account for the RFC1001 len field */ struct smb2_neg_context *pctx = (struct smb2_neg_context *)req; int i = 0, len_of_ctxts; - int offset = le32_to_cpu(req->NegotiateContextOffset); - int neg_ctxt_cnt = le16_to_cpu(req->NegotiateContextCount); + unsigned int offset = le32_to_cpu(req->NegotiateContextOffset); + unsigned int neg_ctxt_cnt = le16_to_cpu(req->NegotiateContextCount); __le32 status = STATUS_INVALID_PARAMETER; ksmbd_debug(SMB, "decoding %d negotiate contexts\n", neg_ctxt_cnt); @@ -983,7 +983,7 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn, while (i++ < neg_ctxt_cnt) { int clen, ctxt_len; - if (len_of_ctxts < sizeof(struct smb2_neg_context)) + if (len_of_ctxts < (int)sizeof(struct smb2_neg_context)) break; pctx = (struct smb2_neg_context *)((char *)pctx + offset); @@ -1038,9 +1038,8 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn, } /* offsets must be 8 byte aligned */ - clen = (clen + 7) & ~0x7; - offset = clen + sizeof(struct smb2_neg_context); - len_of_ctxts -= clen + sizeof(struct smb2_neg_context); + offset = (ctxt_len + 7) & ~0x7; + len_of_ctxts -= offset; } return status; } From fc6c6a3c324c1b3e93a03d0cfa3749c781f23de0 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 28 May 2023 00:23:41 +0900 Subject: [PATCH 388/934] ksmbd: fix out-of-bound read in parse_lease_state() This bug is in parse_lease_state, and it is caused by the missing check of `struct create_context`. When the ksmbd traverses the create_contexts, it doesn't check if the field of `NameOffset` and `Next` is valid, The KASAN message is following: [ 6.664323] BUG: KASAN: slab-out-of-bounds in parse_lease_state+0x7d/0x280 [ 6.664738] Read of size 2 at addr ffff888005c08988 by task kworker/0:3/103 ... [ 6.666644] Call Trace: [ 6.666796] [ 6.666933] dump_stack_lvl+0x33/0x50 [ 6.667167] print_report+0xcc/0x620 [ 6.667903] kasan_report+0xae/0xe0 [ 6.668374] kasan_check_range+0x35/0x1b0 [ 6.668621] parse_lease_state+0x7d/0x280 [ 6.668868] smb2_open+0xbe8/0x4420 [ 6.675137] handle_ksmbd_work+0x282/0x820 Use smb2_find_context_vals() to find smb2 create request lease context. smb2_find_context_vals validate create context fields. Cc: stable@vger.kernel.org Reported-by: Chih-Yen Chang Tested-by: Chih-Yen Chang Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 66 +++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 42 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index db181bdad73a..844b303baf29 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1415,56 +1415,38 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) */ struct lease_ctx_info *parse_lease_state(void *open_req) { - char *data_offset; struct create_context *cc; - unsigned int next = 0; - char *name; - bool found = false; struct smb2_create_req *req = (struct smb2_create_req *)open_req; - struct lease_ctx_info *lreq = kzalloc(sizeof(struct lease_ctx_info), - GFP_KERNEL); + struct lease_ctx_info *lreq; + + cc = smb2_find_context_vals(req, SMB2_CREATE_REQUEST_LEASE, 4); + if (IS_ERR_OR_NULL(cc)) + return NULL; + + lreq = kzalloc(sizeof(struct lease_ctx_info), GFP_KERNEL); if (!lreq) return NULL; - data_offset = (char *)req + le32_to_cpu(req->CreateContextsOffset); - cc = (struct create_context *)data_offset; - do { - cc = (struct create_context *)((char *)cc + next); - name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) != 4 || - strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) { - next = le32_to_cpu(cc->Next); - continue; - } - found = true; - break; - } while (next != 0); + if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) { + struct create_lease_v2 *lc = (struct create_lease_v2 *)cc; - if (found) { - if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) { - struct create_lease_v2 *lc = (struct create_lease_v2 *)cc; + memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); + lreq->req_state = lc->lcontext.LeaseState; + lreq->flags = lc->lcontext.LeaseFlags; + lreq->duration = lc->lcontext.LeaseDuration; + memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, + SMB2_LEASE_KEY_SIZE); + lreq->version = 2; + } else { + struct create_lease *lc = (struct create_lease *)cc; - memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); - lreq->req_state = lc->lcontext.LeaseState; - lreq->flags = lc->lcontext.LeaseFlags; - lreq->duration = lc->lcontext.LeaseDuration; - memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, - SMB2_LEASE_KEY_SIZE); - lreq->version = 2; - } else { - struct create_lease *lc = (struct create_lease *)cc; - - memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); - lreq->req_state = lc->lcontext.LeaseState; - lreq->flags = lc->lcontext.LeaseFlags; - lreq->duration = lc->lcontext.LeaseDuration; - lreq->version = 1; - } - return lreq; + memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); + lreq->req_state = lc->lcontext.LeaseState; + lreq->flags = lc->lcontext.LeaseFlags; + lreq->duration = lc->lcontext.LeaseDuration; + lreq->version = 1; } - - kfree(lreq); - return NULL; + return lreq; } /** From 25933573ef48f3586f559c2cac6c436c62dcf63f Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 30 May 2023 21:42:34 +0900 Subject: [PATCH 389/934] ksmbd: fix posix_acls and acls dereferencing possible ERR_PTR() Dan reported the following error message: fs/smb/server/smbacl.c:1296 smb_check_perm_dacl() error: 'posix_acls' dereferencing possible ERR_PTR() fs/smb/server/vfs.c:1323 ksmbd_vfs_make_xattr_posix_acl() error: 'posix_acls' dereferencing possible ERR_PTR() fs/smb/server/vfs.c:1830 ksmbd_vfs_inherit_posix_acl() error: 'acls' dereferencing possible ERR_PTR() __get_acl() returns a mix of error pointers and NULL. This change it with IS_ERR_OR_NULL(). Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org Reported-by: Dan Carpenter Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smbacl.c | 4 ++-- fs/smb/server/vfs.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 6d6cfb6957a9..0a5862a61c77 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1290,7 +1290,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, if (IS_ENABLED(CONFIG_FS_POSIX_ACL)) { posix_acls = get_inode_acl(d_inode(path->dentry), ACL_TYPE_ACCESS); - if (posix_acls && !found) { + if (!IS_ERR_OR_NULL(posix_acls) && !found) { unsigned int id = -1; pa_entry = posix_acls->a_entries; @@ -1314,7 +1314,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, const struct path *path, } } } - if (posix_acls) + if (!IS_ERR_OR_NULL(posix_acls)) posix_acl_release(posix_acls); } diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 6f302919e9f7..f9fb778247e7 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1321,7 +1321,7 @@ static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct mnt_idmap *id return NULL; posix_acls = get_inode_acl(inode, acl_type); - if (!posix_acls) + if (IS_ERR_OR_NULL(posix_acls)) return NULL; smb_acl = kzalloc(sizeof(struct xattr_smb_acl) + @@ -1830,7 +1830,7 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, return -EOPNOTSUPP; acls = get_inode_acl(parent_inode, ACL_TYPE_DEFAULT); - if (!acls) + if (IS_ERR_OR_NULL(acls)) return -ENOENT; pace = acls->a_entries; From 368ba06881c395f1c9a7ba22203cf8d78b4addc0 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 30 May 2023 23:10:31 +0900 Subject: [PATCH 390/934] ksmbd: check the validation of pdu_size in ksmbd_conn_handler_loop The length field of netbios header must be greater than the SMB header sizes(smb1 or smb2 header), otherwise the packet is an invalid SMB packet. If `pdu_size` is 0, ksmbd allocates a 4 bytes chunk to `conn->request_buf`. In the function `get_smb2_cmd_val` ksmbd will read cmd from `rcv_hdr->Command`, which is `conn->request_buf + 12`, causing the KASAN detector to print the following error message: [ 7.205018] BUG: KASAN: slab-out-of-bounds in get_smb2_cmd_val+0x45/0x60 [ 7.205423] Read of size 2 at addr ffff8880062d8b50 by task ksmbd:42632/248 ... [ 7.207125] [ 7.209191] get_smb2_cmd_val+0x45/0x60 [ 7.209426] ksmbd_conn_enqueue_request+0x3a/0x100 [ 7.209712] ksmbd_server_process_request+0x72/0x160 [ 7.210295] ksmbd_conn_handler_loop+0x30c/0x550 [ 7.212280] kthread+0x160/0x190 [ 7.212762] ret_from_fork+0x1f/0x30 [ 7.212981] Cc: stable@vger.kernel.org Reported-by: Chih-Yen Chang Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 4882a812ea86..e11d4a1e63d7 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -294,6 +294,9 @@ bool ksmbd_conn_alive(struct ksmbd_conn *conn) return true; } +#define SMB1_MIN_SUPPORTED_HEADER_SIZE (sizeof(struct smb_hdr)) +#define SMB2_MIN_SUPPORTED_HEADER_SIZE (sizeof(struct smb2_hdr) + 4) + /** * ksmbd_conn_handler_loop() - session thread to listen on new smb requests * @p: connection instance @@ -350,6 +353,9 @@ int ksmbd_conn_handler_loop(void *p) if (pdu_size > MAX_STREAM_PROT_LEN) break; + if (pdu_size < SMB1_MIN_SUPPORTED_HEADER_SIZE) + break; + /* 4 for rfc1002 length field */ /* 1 for implied bcc[0] */ size = pdu_size + 4 + 1; @@ -377,6 +383,12 @@ int ksmbd_conn_handler_loop(void *p) continue; } + if (((struct smb2_hdr *)smb2_get_msg(conn->request_buf))->ProtocolId == + SMB2_PROTO_NUMBER) { + if (pdu_size < SMB2_MIN_SUPPORTED_HEADER_SIZE) + break; + } + if (!default_conn_ops.process_fn) { pr_err("No connection request callback\n"); break; From 1c1bcf2d3ea061613119b534f57507c377df20f9 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 31 May 2023 17:59:32 +0900 Subject: [PATCH 391/934] ksmbd: validate smb request protocol id This patch add the validation for smb request protocol id. If it is not one of the four ids(SMB1_PROTO_NUMBER, SMB2_PROTO_NUMBER, SMB2_TRANSFORM_PROTO_NUM, SMB2_COMPRESSION_TRANSFORM_ID), don't allow processing the request. And this will fix the following KASAN warning also. [ 13.905265] BUG: KASAN: slab-out-of-bounds in init_smb2_rsp_hdr+0x1b9/0x1f0 [ 13.905900] Read of size 16 at addr ffff888005fd2f34 by task kworker/0:2/44 ... [ 13.908553] Call Trace: [ 13.908793] [ 13.908995] dump_stack_lvl+0x33/0x50 [ 13.909369] print_report+0xcc/0x620 [ 13.910870] kasan_report+0xae/0xe0 [ 13.911519] kasan_check_range+0x35/0x1b0 [ 13.911796] init_smb2_rsp_hdr+0x1b9/0x1f0 [ 13.912492] handle_ksmbd_work+0xe5/0x820 Cc: stable@vger.kernel.org Reported-by: Chih-Yen Chang Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 5 +++-- fs/smb/server/smb_common.c | 14 +++++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index e11d4a1e63d7..2a717d158f02 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -364,8 +364,6 @@ int ksmbd_conn_handler_loop(void *p) break; memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf)); - if (!ksmbd_smb_request(conn)) - break; /* * We already read 4 bytes to find out PDU size, now @@ -383,6 +381,9 @@ int ksmbd_conn_handler_loop(void *p) continue; } + if (!ksmbd_smb_request(conn)) + break; + if (((struct smb2_hdr *)smb2_get_msg(conn->request_buf))->ProtocolId == SMB2_PROTO_NUMBER) { if (pdu_size < SMB2_MIN_SUPPORTED_HEADER_SIZE) diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index af0c2a9b8529..569e5eecdf3d 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -158,7 +158,19 @@ int ksmbd_verify_smb_message(struct ksmbd_work *work) */ bool ksmbd_smb_request(struct ksmbd_conn *conn) { - return conn->request_buf[0] == 0; + __le32 *proto = (__le32 *)smb2_get_msg(conn->request_buf); + + if (*proto == SMB2_COMPRESSION_TRANSFORM_ID) { + pr_err_ratelimited("smb2 compression not support yet"); + return false; + } + + if (*proto != SMB1_PROTO_NUMBER && + *proto != SMB2_PROTO_NUMBER && + *proto != SMB2_TRANSFORM_PROTO_NUM) + return false; + + return true; } static bool supported_protocol(int idx) From 3a4cdef13fa389ffe8f74c22581155688c827d18 Mon Sep 17 00:00:00 2001 From: Vaishnav Achath Date: Fri, 21 Apr 2023 11:04:30 +0100 Subject: [PATCH 392/934] media: v4l2-mc: Drop subdev check in v4l2_create_fwnode_links_to_pad() While updating v4l2_create_fwnode_links_to_pad() to accept non-subdev sinks, the check is_media_entity_v4l2_subdev() was not removed which prevented the function from being used with non-subdev sinks, Drop the unnecessary check. Fixes: bd5a03bc5be8 ("media: Accept non-subdev sinks in v4l2_create_fwnode_links_to_pad()") Signed-off-by: Vaishnav Achath Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-mc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-mc.c b/drivers/media/v4l2-core/v4l2-mc.c index bf0c18100664..22fe08fce0a9 100644 --- a/drivers/media/v4l2-core/v4l2-mc.c +++ b/drivers/media/v4l2-core/v4l2-mc.c @@ -314,8 +314,7 @@ int v4l2_create_fwnode_links_to_pad(struct v4l2_subdev *src_sd, { struct fwnode_handle *endpoint; - if (!(sink->flags & MEDIA_PAD_FL_SINK) || - !is_media_entity_v4l2_subdev(sink->entity)) + if (!(sink->flags & MEDIA_PAD_FL_SINK)) return -EINVAL; fwnode_graph_for_each_endpoint(dev_fwnode(src_sd->dev), endpoint) { From 6970888d38be0a960673e3d203e8517a5c300ae5 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 18 Apr 2023 08:46:52 +0100 Subject: [PATCH 393/934] media: staging: media: imx: initialize hs_settle to avoid warning Initialize hs_settle to 0 to avoid this compiler warning: imx8mq-mipi-csi2.c: In function 'imx8mq_mipi_csi_start_stream.part.0': imx8mq-mipi-csi2.c:91:55: warning: 'hs_settle' may be used uninitialized [-Wmaybe-uninitialized] 91 | #define GPR_CSI2_1_S_PRG_RXHS_SETTLE(x) (((x) & 0x3f) << 2) | ^~ imx8mq-mipi-csi2.c:357:13: note: 'hs_settle' was declared here 357 | u32 hs_settle; | ^~~~~~~~~ It's a false positive, but it is too complicated for the compiler to detect that. Signed-off-by: Hans Verkuil Reviewed-by: Martin Kepplinger Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/imx/imx8mq-mipi-csi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/imx/imx8mq-mipi-csi2.c b/drivers/staging/media/imx/imx8mq-mipi-csi2.c index 32700cb8bc4d..ca2efcc21efe 100644 --- a/drivers/staging/media/imx/imx8mq-mipi-csi2.c +++ b/drivers/staging/media/imx/imx8mq-mipi-csi2.c @@ -354,7 +354,7 @@ static int imx8mq_mipi_csi_start_stream(struct csi_state *state, struct v4l2_subdev_state *sd_state) { int ret; - u32 hs_settle; + u32 hs_settle = 0; ret = imx8mq_mipi_csi_sw_reset(state); if (ret) From b37a356df86b9e56d30cef4673cba2621c7b7a1e Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Mon, 22 May 2023 11:52:45 +0100 Subject: [PATCH 394/934] media: v4l2-subdev: Fix missing kerneldoc for client_caps Add missing kernel doc for the new 'client_caps' field in struct v4l2_subdev_fh. Signed-off-by: Tomi Valkeinen Reviewed-by: Laurent Pinchart Fixes: f57fa2959244 ("media: v4l2-subdev: Add new ioctl for client capabilities") Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-subdev.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index cfd19e72d0fc..b325df0d54d6 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1119,6 +1119,7 @@ struct v4l2_subdev { * @vfh: pointer to &struct v4l2_fh * @state: pointer to &struct v4l2_subdev_state * @owner: module pointer to the owner of this file handle + * @client_caps: bitmask of ``V4L2_SUBDEV_CLIENT_CAP_*`` */ struct v4l2_subdev_fh { struct v4l2_fh vfh; From 81f3affa19d6ab0c32aef46b053838219eef7e71 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 20 Apr 2023 10:45:59 +0100 Subject: [PATCH 395/934] media: uvcvideo: Don't expose unsupported formats to userspace When the uvcvideo driver encounters a format descriptor with an unknown format GUID, it creates a corresponding struct uvc_format instance with the fcc field set to 0. Since commit 50459f103edf ("media: uvcvideo: Remove format descriptions"), the driver relies on the V4L2 core to provide the format description string, which the V4L2 core can't do without a valid 4CC. This triggers a WARN_ON. As a format with a zero 4CC can't be selected, it is unusable for applications. Ignore the format completely without creating a uvc_format instance, which fixes the warning. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217252 Link: https://bugzilla.redhat.com/show_bug.cgi?id=2180107 Fixes: 50459f103edf ("media: uvcvideo: Remove format descriptions") Signed-off-by: Laurent Pinchart Reviewed-by: Ricardo Ribalda Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/uvc/uvc_driver.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 7aefa76a42b3..d631ce4f9f7b 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -251,14 +251,17 @@ static int uvc_parse_format(struct uvc_device *dev, /* Find the format descriptor from its GUID. */ fmtdesc = uvc_format_by_guid(&buffer[5]); - if (fmtdesc != NULL) { - format->fcc = fmtdesc->fcc; - } else { + if (!fmtdesc) { + /* + * Unknown video formats are not fatal errors, the + * caller will skip this descriptor. + */ dev_info(&streaming->intf->dev, "Unknown video format %pUl\n", &buffer[5]); - format->fcc = 0; + return 0; } + format->fcc = fmtdesc->fcc; format->bpp = buffer[21]; /* @@ -675,7 +678,7 @@ static int uvc_parse_streaming(struct uvc_device *dev, interval = (u32 *)&frame[nframes]; streaming->format = format; - streaming->nformats = nformats; + streaming->nformats = 0; /* Parse the format descriptors. */ while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE) { @@ -689,7 +692,10 @@ static int uvc_parse_streaming(struct uvc_device *dev, &interval, buffer, buflen); if (ret < 0) goto error; + if (!ret) + break; + streaming->nformats++; frame += format->nframes; format++; From 3582e74599d376bc18cae123045cd295360d885b Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Tue, 30 May 2023 18:03:39 +0530 Subject: [PATCH 396/934] Revert "ext4: remove ac->ac_found > sbi->s_mb_min_to_scan dead check in ext4_mb_check_limits" This reverts commit 32c0869370194ae5ac9f9f501953ef693040f6a1. The reverted commit was intended to remove a dead check however it was observed that this check was actually being used to exit early instead of looping sbi->s_mb_max_to_scan times when we are able to find a free extent bigger than the goal extent. Due to this, a my performance tests (fsmark, parallel file writes in a highly fragmented FS) were seeing a 2x-3x regression. Example, the default value of the following variables is: sbi->s_mb_max_to_scan = 200 sbi->s_mb_min_to_scan = 10 In ext4_mb_check_limits() if we find an extent smaller than goal, then we return early and try again. This loop will go on until we have processed sbi->s_mb_max_to_scan(=200) number of free extents at which point we exit and just use whatever we have even if it is smaller than goal extent. Now, the regression comes when we find an extent bigger than goal. Earlier, in this case we would loop only sbi->s_mb_min_to_scan(=10) times and then just use the bigger extent. However with commit 32c08693 that check was removed and hence we would loop sbi->s_mb_max_to_scan(=200) times even though we have a big enough free extent to satisfy the request. The only time we would exit early would be when the free extent is *exactly* the size of our goal, which is pretty uncommon occurrence and so we would almost always end up looping 200 times. Hence, revert the commit by adding the check back to fix the regression. Also add a comment to outline this policy. Fixes: 32c086937019 ("ext4: remove ac->ac_found > sbi->s_mb_min_to_scan dead check in ext4_mb_check_limits") Signed-off-by: Ojaswin Mujoo Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: Kemeng Shi Link: https://lore.kernel.org/r/ddcae9658e46880dfec2fb0aa61d01fb3353d202.1685449706.git.ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7b2e36d103cb..20f67a260df5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2062,7 +2062,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac, if (bex->fe_len < gex->fe_len) return; - if (finish_group) + if (finish_group || ac->ac_found > sbi->s_mb_min_to_scan) ext4_mb_use_best_found(ac, e4b); } @@ -2074,6 +2074,20 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac, * in the context. Later, the best found extent will be used, if * mballoc can't find good enough extent. * + * The algorithm used is roughly as follows: + * + * * If free extent found is exactly as big as goal, then + * stop the scan and use it immediately + * + * * If free extent found is smaller than goal, then keep retrying + * upto a max of sbi->s_mb_max_to_scan times (default 200). After + * that stop scanning and use whatever we have. + * + * * If free extent found is bigger than goal, then keep retrying + * upto a max of sbi->s_mb_min_to_scan times (default 10) before + * stopping the scan and using the extent. + * + * * FIXME: real allocation policy is to be designed yet! */ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac, From 4ecd704a4c51fd95973fcc3a60444e0e24eb9439 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Tue, 30 May 2023 18:41:16 +0200 Subject: [PATCH 397/934] tpm, tpm_tis: correct tpm_tis_flags enumeration values With commit 858e8b792d06 ("tpm, tpm_tis: Avoid cache incoherency in test for interrupts") bit accessor functions are used to access flags in tpm_tis_data->flags. However these functions expect bit numbers, while the flags are defined as bit masks in enum tpm_tis_flag. Fix this inconsistency by using numbers instead of masks also for the flags in the enum. Reported-by: Pavel Machek Fixes: 858e8b792d06 ("tpm, tpm_tis: Avoid cache incoherency in test for interrupts") Signed-off-by: Lino Sanfilippo Cc: stable@vger.kernel.org Reviewed-by: Pavel Machek Signed-off-by: Linus Torvalds --- drivers/char/tpm/tpm_tis_core.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h index e978f457fd4d..610bfadb6acf 100644 --- a/drivers/char/tpm/tpm_tis_core.h +++ b/drivers/char/tpm/tpm_tis_core.h @@ -84,10 +84,10 @@ enum tis_defaults { #define ILB_REMAP_SIZE 0x100 enum tpm_tis_flags { - TPM_TIS_ITPM_WORKAROUND = BIT(0), - TPM_TIS_INVALID_STATUS = BIT(1), - TPM_TIS_DEFAULT_CANCELLATION = BIT(2), - TPM_TIS_IRQ_TESTED = BIT(3), + TPM_TIS_ITPM_WORKAROUND = 0, + TPM_TIS_INVALID_STATUS = 1, + TPM_TIS_DEFAULT_CANCELLATION = 2, + TPM_TIS_IRQ_TESTED = 3, }; struct tpm_tis_data { From cba41bb78d70aad98d8e61e019fd48c561f7f396 Mon Sep 17 00:00:00 2001 From: Rhys Rustad-Elliott Date: Fri, 2 Jun 2023 19:02:02 +0000 Subject: [PATCH 398/934] bpf: Fix elem_size not being set for inner maps Commit d937bc3449fa ("bpf: make uniform use of array->elem_size everywhere in arraymap.c") changed array_map_gen_lookup to use array->elem_size instead of round_up(map->value_size, 8) as the element size when generating code to access a value in an array map. array->elem_size, however, is not set by bpf_map_meta_alloc when initializing an BPF_MAP_TYPE_ARRAY_OF_MAPS or BPF_MAP_TYPE_HASH_OF_MAPS. This results in array_map_gen_lookup incorrectly outputting code that always accesses index 0 in the array (as the index will be calculated via a multiplication with the element size, which is incorrectly set to 0). Set elem_size on the bpf_array object when allocating an array or hash of maps to fix this. Fixes: d937bc3449fa ("bpf: make uniform use of array->elem_size everywhere in arraymap.c") Signed-off-by: Rhys Rustad-Elliott Link: https://lore.kernel.org/r/20230602190110.47068-2-me@rhysre.net Signed-off-by: Martin KaFai Lau --- kernel/bpf/map_in_map.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 2c5c64c2a53b..cd5eafaba97e 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -69,9 +69,13 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) /* Misc members not needed in bpf_map_meta_equal() check. */ inner_map_meta->ops = inner_map->ops; if (inner_map->ops == &array_map_ops) { + struct bpf_array *inner_array_meta = + container_of(inner_map_meta, struct bpf_array, map); + struct bpf_array *inner_array = container_of(inner_map, struct bpf_array, map); + + inner_array_meta->index_mask = inner_array->index_mask; + inner_array_meta->elem_size = inner_array->elem_size; inner_map_meta->bypass_spec_v1 = inner_map->bypass_spec_v1; - container_of(inner_map_meta, struct bpf_array, map)->index_mask = - container_of(inner_map, struct bpf_array, map)->index_mask; } fdput(f); From 817fa998362d6ea9fabd5e97af8e9e2eb5f0e6f2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 1 Jun 2023 18:01:37 -0700 Subject: [PATCH 399/934] KVM: x86/mmu: Grab memslot for correct address space in NX recovery worker Factor in the address space (non-SMM vs. SMM) of the target shadow page when recovering potential NX huge pages, otherwise KVM will retrieve the wrong memslot when zapping shadow pages that were created for SMM. The bug most visibly manifests as a WARN on the memslot being non-NULL, but the worst case scenario is that KVM could unaccount the shadow page without ensuring KVM won't install a huge page, i.e. if the non-SMM slot is being dirty logged, but the SMM slot is not. ------------[ cut here ]------------ WARNING: CPU: 1 PID: 3911 at arch/x86/kvm/mmu/mmu.c:7015 kvm_nx_huge_page_recovery_worker+0x38c/0x3d0 [kvm] CPU: 1 PID: 3911 Comm: kvm-nx-lpage-re RIP: 0010:kvm_nx_huge_page_recovery_worker+0x38c/0x3d0 [kvm] RSP: 0018:ffff99b284f0be68 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff99b284edd000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff9271397024e0 R08: 0000000000000000 R09: ffff927139702450 R10: 0000000000000000 R11: 0000000000000001 R12: ffff99b284f0be98 R13: 0000000000000000 R14: ffff9270991fcd80 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff927f9f640000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0aacad3ae0 CR3: 000000088fc2c005 CR4: 00000000003726e0 Call Trace: __pfx_kvm_nx_huge_page_recovery_worker+0x10/0x10 [kvm] kvm_vm_worker_thread+0x106/0x1c0 [kvm] kthread+0xd9/0x100 ret_from_fork+0x2c/0x50 ---[ end trace 0000000000000000 ]--- This bug was exposed by commit edbdb43fc96b ("KVM: x86: Preserve TDP MMU roots until they are explicitly invalidated"), which allowed KVM to retain SMM TDP MMU roots effectively indefinitely. Before commit edbdb43fc96b, KVM would zap all SMM TDP MMU roots and thus all SMM TDP MMU shadow pages once all vCPUs exited SMM, which made the window where this bug (recovering an SMM NX huge page) could be encountered quite tiny. To hit the bug, the NX recovery thread would have to run while at least one vCPU was in SMM. Most VMs typically only use SMM during boot, and so the problematic shadow pages were gone by the time the NX recovery thread ran. Now that KVM preserves TDP MMU roots until they are explicitly invalidated (e.g. by a memslot deletion), the window to trigger the bug is effectively never closed because most VMMs don't delete memslots after boot (except for a handful of special scenarios). Fixes: eb298605705a ("KVM: x86/mmu: Do not recover dirty-tracked NX Huge Pages") Reported-by: Fabio Coatti Closes: https://lore.kernel.org/all/CADpTngX9LESCdHVu_2mQkNGena_Ng2CphWNwsRGSMxzDsTjU2A@mail.gmail.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230602010137.784664-1-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c8961f45e3b1..6eaa3d6994ae 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7091,7 +7091,10 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm) */ slot = NULL; if (atomic_read(&kvm->nr_memslots_dirty_logging)) { - slot = gfn_to_memslot(kvm, sp->gfn); + struct kvm_memslots *slots; + + slots = kvm_memslots_for_spte_role(kvm, sp->role); + slot = __gfn_to_memslot(slots, sp->gfn); WARN_ON_ONCE(!slot); } From b2ce89978889b848a6dd652695dd1887e416f9d2 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Fri, 19 May 2023 13:26:18 +0200 Subject: [PATCH 400/934] KVM: SVM: vNMI pending bit is V_NMI_PENDING_MASK not V_NMI_BLOCKING_MASK While testing Hyper-V enabled Windows Server 2019 guests on Zen4 hardware I noticed that with vCPU count large enough (> 16) they sometimes froze at boot. With vCPU count of 64 they never booted successfully - suggesting some kind of a race condition. Since adding "vnmi=0" module parameter made these guests boot successfully it was clear that the problem is most likely (v)NMI-related. Running kvm-unit-tests quickly showed failing NMI-related tests cases, like "multiple nmi" and "pending nmi" from apic-split, x2apic and xapic tests and the NMI parts of eventinj test. The issue was that once one NMI was being serviced no other NMI was allowed to be set pending (NMI limit = 0), which was traced to svm_is_vnmi_pending() wrongly testing for the "NMI blocked" flag rather than for the "NMI pending" flag. Fix this by testing for the right flag in svm_is_vnmi_pending(). Once this is done, the NMI-related kvm-unit-tests pass successfully and the Windows guest no longer freezes at boot. Fixes: fa4c027a7956 ("KVM: x86: Add support for SVM's Virtual NMI") Signed-off-by: Maciej S. Szmigiero Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/be4ca192eb0c1e69a210db3009ca984e6a54ae69.1684495380.git.maciej.szmigiero@oracle.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ca32389f3c36..54089f990c8f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3510,7 +3510,7 @@ static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu) if (!is_vnmi_enabled(svm)) return false; - return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK); + return !!(svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK); } static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu) From 8b703a49c9df5e74870381ad7ba9c85d8a74ed2c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 1 Jun 2023 18:19:19 -0700 Subject: [PATCH 401/934] KVM: x86: Account fastpath-only VM-Exits in vCPU stats Increment vcpu->stat.exits when handling a fastpath VM-Exit without going through any part of the "slow" path. Not bumping the exits stat can result in wildly misleading exit counts, e.g. if the primary reason the guest is exiting is to program the TSC deadline timer. Fixes: 404d5d7bff0d ("KVM: X86: Introduce more exit_fastpath_completion enum values") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230602011920.787844-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c0778ca39650..04b57a336b34 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10758,6 +10758,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; break; } + + /* Note, VM-Exits that go down the "slow" path are accounted below. */ + ++vcpu->stat.exits; } /* From a37f2699c36a7f6606ba3300f243227856c5ad6b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 17 May 2023 11:26:41 -0500 Subject: [PATCH 402/934] x86/head/64: Switch to KERNEL_CS as soon as new GDT is installed The call to startup_64_setup_env() will install a new GDT but does not actually switch to using the KERNEL_CS entry until returning from the function call. Commit bcce82908333 ("x86/sev: Detect/setup SEV/SME features earlier in boot") moved the call to sme_enable() earlier in the boot process and in between the call to startup_64_setup_env() and the switch to KERNEL_CS. An SEV-ES or an SEV-SNP guest will trigger #VC exceptions during the call to sme_enable() and if the CS pushed on the stack as part of the exception and used by IRETQ is not mapped by the new GDT, then problems occur. Today, the current CS when entering startup_64 is the kernel CS value because it was set up by the decompressor code, so no issue is seen. However, a recent patchset that looked to avoid using the legacy decompressor during an EFI boot exposed this bug. At entry to startup_64, the CS value is that of EFI and is not mapped in the new kernel GDT. So when a #VC exception occurs, the CS value used by IRETQ is not valid and the guest boot crashes. Fix this issue by moving the block that switches to the KERNEL_CS value to be done immediately after returning from startup_64_setup_env(). Fixes: bcce82908333 ("x86/sev: Detect/setup SEV/SME features earlier in boot") Signed-off-by: Tom Lendacky Signed-off-by: Dave Hansen Reviewed-by: Joerg Roedel Link: https://lore.kernel.org/all/6ff1f28af2829cc9aea357ebee285825f90a431f.1684340801.git.thomas.lendacky%40amd.com --- arch/x86/kernel/head_64.S | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a5df3e994f04..113c13376e51 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -77,6 +77,15 @@ SYM_CODE_START_NOALIGN(startup_64) call startup_64_setup_env popq %rsi + /* Now switch to __KERNEL_CS so IRET works reliably */ + pushq $__KERNEL_CS + leaq .Lon_kernel_cs(%rip), %rax + pushq %rax + lretq + +.Lon_kernel_cs: + UNWIND_HINT_END_OF_STACK + #ifdef CONFIG_AMD_MEM_ENCRYPT /* * Activate SEV/SME memory encryption if supported/enabled. This needs to @@ -90,15 +99,6 @@ SYM_CODE_START_NOALIGN(startup_64) popq %rsi #endif - /* Now switch to __KERNEL_CS so IRET works reliably */ - pushq $__KERNEL_CS - leaq .Lon_kernel_cs(%rip), %rax - pushq %rax - lretq - -.Lon_kernel_cs: - UNWIND_HINT_END_OF_STACK - /* Sanitize CPU configuration */ call verify_cpu From 1022b67b89ce5b92906fec6447f02acf6ba018e4 Mon Sep 17 00:00:00 2001 From: Rhys Rustad-Elliott Date: Fri, 2 Jun 2023 19:02:24 +0000 Subject: [PATCH 403/934] selftests/bpf: Add access_inner_map selftest Add a selftest that accesses a BPF_MAP_TYPE_ARRAY (at a nonzero index) nested within a BPF_MAP_TYPE_HASH_OF_MAPS to flex a previously buggy case. Signed-off-by: Rhys Rustad-Elliott Link: https://lore.kernel.org/r/20230602190110.47068-3-me@rhysre.net Signed-off-by: Martin KaFai Lau --- .../bpf/prog_tests/inner_array_lookup.c | 31 +++++++++++++ .../selftests/bpf/progs/inner_array_lookup.c | 45 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c create mode 100644 tools/testing/selftests/bpf/progs/inner_array_lookup.c diff --git a/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c b/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c new file mode 100644 index 000000000000..9ab4cd195108 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include + +#include "inner_array_lookup.skel.h" + +void test_inner_array_lookup(void) +{ + int map1_fd, err; + int key = 3; + int val = 1; + struct inner_array_lookup *skel; + + skel = inner_array_lookup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load_skeleton")) + return; + + err = inner_array_lookup__attach(skel); + if (!ASSERT_OK(err, "skeleton_attach")) + goto cleanup; + + map1_fd = bpf_map__fd(skel->maps.inner_map1); + bpf_map_update_elem(map1_fd, &key, &val, 0); + + /* Probe should have set the element at index 3 to 2 */ + bpf_map_lookup_elem(map1_fd, &key, &val); + ASSERT_EQ(val, 2, "value_is_2"); + +cleanup: + inner_array_lookup__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/inner_array_lookup.c b/tools/testing/selftests/bpf/progs/inner_array_lookup.c new file mode 100644 index 000000000000..c2c8f2fa451d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/inner_array_lookup.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, int); + __type(value, int); +} inner_map1 SEC(".maps"); + +struct outer_map { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 3); + __type(key, int); + __array(values, struct inner_map); +} outer_map1 SEC(".maps") = { + .values = { + [2] = &inner_map1, + }, +}; + +SEC("raw_tp/sys_enter") +int handle__sys_enter(void *ctx) +{ + int outer_key = 2, inner_key = 3; + int *val; + void *map; + + map = bpf_map_lookup_elem(&outer_map1, &outer_key); + if (!map) + return 1; + + val = bpf_map_lookup_elem(map, &inner_key); + if (!val) + return 1; + + if (*val == 1) + *val = 2; + + return 0; +} + +char _license[] SEC("license") = "GPL"; From 4364b287982bd05bfafa461c80650c732001974b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Jun 2023 16:32:48 -0700 Subject: [PATCH 404/934] KVM: x86: Bail from kvm_recalculate_phys_map() if x2APIC ID is out-of-bounds Bail from kvm_recalculate_phys_map() and disable the optimized map if the target vCPU's x2APIC ID is out-of-bounds, i.e. if the vCPU was added and/or enabled its local APIC after the map was allocated. This fixes an out-of-bounds access bug in the !x2apic_format path where KVM would write beyond the end of phys_map. Check the x2APIC ID regardless of whether or not x2APIC is enabled, as KVM's hardcodes x2APIC ID to be the vCPU ID, i.e. it can't change, and the map allocation in kvm_recalculate_apic_map() doesn't check for x2APIC being enabled, i.e. the check won't get false postivies. Note, this also affects the x2apic_format path, which previously just ignored the "x2apic_id > new->max_apic_id" case. That too is arguably a bug fix, as ignoring the vCPU meant that KVM would not send interrupts to the vCPU until the next map recalculation. In practice, that "bug" is likely benign as a newly present vCPU/APIC would immediately trigger a recalc. But, there's no functional downside to disabling the map, and a future patch will gracefully handle the -E2BIG case by retrying instead of simply disabling the optimized map. Opportunistically add a sanity check on the xAPIC ID size, along with a comment explaining why the xAPIC ID is guaranteed to be "good". Reported-by: Michal Luczaj Fixes: 5b84b0291702 ("KVM: x86: Honor architectural behavior for aliased 8-bit APIC IDs") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230602233250.1014316-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e542cf285b51..3c300a196bdf 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -228,6 +228,23 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new, u32 xapic_id = kvm_xapic_id(apic); u32 physical_id; + /* + * For simplicity, KVM always allocates enough space for all possible + * xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on + * without the optimized map. + */ + if (WARN_ON_ONCE(xapic_id > new->max_apic_id)) + return -EINVAL; + + /* + * Bail if a vCPU was added and/or enabled its APIC between allocating + * the map and doing the actual calculations for the map. Note, KVM + * hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if + * the compiler decides to reload x2apic_id after this check. + */ + if (x2apic_id > new->max_apic_id) + return -E2BIG; + /* * Deliberately truncate the vCPU ID when detecting a mismatched APIC * ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a @@ -253,8 +270,7 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new, */ if (vcpu->kvm->arch.x2apic_format) { /* See also kvm_apic_match_physical_addr(). */ - if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && - x2apic_id <= new->max_apic_id) + if (apic_x2apic_mode(apic) || x2apic_id > 0xff) new->phys_map[x2apic_id] = apic; if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) From 47d2804bc99ca873470df17c20737b28225a320d Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Fri, 2 Jun 2023 16:32:50 -0700 Subject: [PATCH 405/934] KVM: selftests: Add test for race in kvm_recalculate_apic_map() Keep switching between LAPIC_MODE_X2APIC and LAPIC_MODE_DISABLED during APIC map construction to hunt for TOCTOU bugs in KVM. KVM's optimized map recalc makes multiple passes over the list of vCPUs, and the calculations ignore vCPU's whose APIC is hardware-disabled, i.e. there's a window where toggling LAPIC_MODE_DISABLED is quite interesting. Signed-off-by: Michal Luczaj Co-developed-by: Sean Christopherson Link: https://lore.kernel.org/r/20230602233250.1014316-4-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/Makefile | 1 + .../kvm/x86_64/recalc_apic_map_test.c | 74 +++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 7a5ff646e7e7..4761b768b773 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -116,6 +116,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests TEST_GEN_PROGS_x86_64 += x86_64/amx_test TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test +TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c new file mode 100644 index 000000000000..4c416ebe7d66 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test edge cases and race conditions in kvm_recalculate_apic_map(). + */ + +#include +#include +#include + +#include "processor.h" +#include "test_util.h" +#include "kvm_util.h" +#include "apic.h" + +#define TIMEOUT 5 /* seconds */ + +#define LAPIC_DISABLED 0 +#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) +#define MAX_XAPIC_ID 0xff + +static void *race(void *arg) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu = arg; + + while (1) { + /* Trigger kvm_recalculate_apic_map(). */ + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + pthread_testcancel(); + } + + return NULL; +} + +int main(void) +{ + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + struct kvm_vcpu *vcpuN; + struct kvm_vm *vm; + pthread_t thread; + time_t t; + int i; + + kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID); + + /* + * Create the max number of vCPUs supported by selftests so that KVM + * has decent amount of work to do when recalculating the map, i.e. to + * make the problematic window large enough to hit. + */ + vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus); + + /* + * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc + * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits). + */ + for (i = 0; i < KVM_MAX_VCPUS; i++) + vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC); + + ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); + + vcpuN = vcpus[KVM_MAX_VCPUS - 1]; + for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { + vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC); + vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED); + } + + ASSERT_EQ(pthread_cancel(thread), 0); + ASSERT_EQ(pthread_join(thread, NULL), 0); + + kvm_vm_free(vm); + + return 0; +} From edf2e1d2019b2730d6076dbe4c040d37d7c10bbe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Jun 2023 16:04:44 +0000 Subject: [PATCH 406/934] net/ipv6: fix bool/int mismatch for skip_notify_on_dev_down skip_notify_on_dev_down ctl table expects this field to be an int (4 bytes), not a bool (1 byte). Because proc_dou8vec_minmax() was added in 5.13, this patch converts skip_notify_on_dev_down to an int. Following patch then converts the field to u8 and use proc_dou8vec_minmax(). Fixes: 7c6bb7d2faaf ("net/ipv6: Add knob to skip DELROUTE message on device down") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Acked-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- include/net/netns/ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 3cceb3e9320b..d44b2ee49698 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -53,7 +53,7 @@ struct netns_sysctl_ipv6 { int seg6_flowlabel; u32 ioam6_id; u64 ioam6_id_wide; - bool skip_notify_on_dev_down; + int skip_notify_on_dev_down; u8 fib_notify_on_flag_change; u8 icmpv6_error_anycast_as_unicast; }; From ef62c0ae6db11c095880e473db9f846132d7eba8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Jun 2023 16:04:45 +0000 Subject: [PATCH 407/934] net/ipv6: convert skip_notify_on_dev_down sysctl to u8 Save a bit a space, and could help future sysctls to use the same pattern. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Acked-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- include/net/netns/ipv6.h | 2 +- net/ipv6/route.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index d44b2ee49698..5f2cfd84570a 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -53,7 +53,7 @@ struct netns_sysctl_ipv6 { int seg6_flowlabel; u32 ioam6_id; u64 ioam6_id_wide; - int skip_notify_on_dev_down; + u8 skip_notify_on_dev_down; u8 fib_notify_on_flag_change; u8 icmpv6_error_anycast_as_unicast; }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e3aec46bd466..392aaa373b66 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6412,9 +6412,9 @@ static struct ctl_table ipv6_route_table_template[] = { { .procname = "skip_notify_on_dev_down", .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down, - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, From eb50d0f250e96ede9192d936d220cd97adc93b89 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sun, 19 Mar 2023 11:53:32 +0900 Subject: [PATCH 408/934] selftests/ftrace: Choose target function for filter test from samples Since the event-filter-function.tc expects the 'exit_mmap()' directly calls 'kmem_cache_free()', this is vulnerable to code modifications. Choose the target function for the filter test from the sample event data so that it can keep test running correctly even if the caller function name will be changed. Link: https://lore.kernel.org/linux-trace-kernel/167919441260.1922645.18355804179347364057.stgit@mhiramat.roam.corp.google.com/ Link: https://lore.kernel.org/all/CA+G9fYtF-XEKi9YNGgR=Kf==7iRb2FrmEC7qtwAeQbfyah-UhA@mail.gmail.com/ Reported-by: Linux Kernel Functional Testing Fixes: 7f09d639b8c4 ("tracing/selftests: Add test for event filtering on function name") Signed-off-by: Masami Hiramatsu (Google) Acked-by: Steven Rostedt (Google) --- .../test.d/filter/event-filter-function.tc | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index e2ff3bf4df80..2de7c61d1ae3 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -9,18 +9,33 @@ fail() { #msg exit_fail } -echo "Test event filter function name" +sample_events() { + echo > trace + echo 1 > events/kmem/kmem_cache_free/enable + echo 1 > tracing_on + ls > /dev/null + echo 0 > tracing_on + echo 0 > events/kmem/kmem_cache_free/enable +} + echo 0 > tracing_on echo 0 > events/enable -echo > trace -echo 'call_site.function == exit_mmap' > events/kmem/kmem_cache_free/filter -echo 1 > events/kmem/kmem_cache_free/enable -echo 1 > tracing_on -ls > /dev/null -echo 0 > events/kmem/kmem_cache_free/enable -hitcnt=`grep kmem_cache_free trace| grep exit_mmap | wc -l` -misscnt=`grep kmem_cache_free trace| grep -v exit_mmap | wc -l` +echo "Get the most frequently calling function" +sample_events + +target_func=`cut -d: -f3 trace | sed 's/call_site=\([^+]*\)+0x.*/\1/' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'` +if [ -z "$target_func" ]; then + exit_fail +fi +echo > trace + +echo "Test event filter function name" +echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter +sample_events + +hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l` +misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l` if [ $hitcnt -eq 0 ]; then exit_fail @@ -30,20 +45,14 @@ if [ $misscnt -gt 0 ]; then exit_fail fi -address=`grep ' exit_mmap$' /proc/kallsyms | cut -d' ' -f1` +address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1` echo "Test event filter function address" -echo 0 > tracing_on -echo 0 > events/enable -echo > trace echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter -echo 1 > events/kmem/kmem_cache_free/enable -echo 1 > tracing_on -sleep 1 -echo 0 > events/kmem/kmem_cache_free/enable +sample_events -hitcnt=`grep kmem_cache_free trace| grep exit_mmap | wc -l` -misscnt=`grep kmem_cache_free trace| grep -v exit_mmap | wc -l` +hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l` +misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l` if [ $hitcnt -eq 0 ]; then exit_fail From 03c44a21d0333a2f469680bc9caaf96a9bfaea87 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 1 Jun 2023 10:12:06 +0100 Subject: [PATCH 409/934] net: phylink: actually fix ksettings_set() ethtool call Raju Lakkaraju reported that the below commit caused a regression with Lan743x drivers and a 2.5G SFP. Sadly, this is because the commit was utterly wrong. Let's fix this properly by not moving the linkmode_and(), but instead copying the link ksettings and then modifying the advertising mask before passing the modified link ksettings to phylib. Fixes: df0acdc59b09 ("net: phylink: fix ksettings_set() ethtool call") Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1q4eLm-00Ayxk-GZ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index e237949deee6..b4831110003c 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2225,11 +2225,13 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, ASSERT_RTNL(); - /* Mask out unsupported advertisements */ - linkmode_and(config.advertising, kset->link_modes.advertising, - pl->supported); - if (pl->phydev) { + struct ethtool_link_ksettings phy_kset = *kset; + + linkmode_and(phy_kset.link_modes.advertising, + phy_kset.link_modes.advertising, + pl->supported); + /* We can rely on phylib for this update; we also do not need * to update the pl->link_config settings: * - the configuration returned via ksettings_get() will come @@ -2248,10 +2250,13 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, * the presence of a PHY, this should not be changed as that * should be determined from the media side advertisement. */ - return phy_ethtool_ksettings_set(pl->phydev, kset); + return phy_ethtool_ksettings_set(pl->phydev, &phy_kset); } config = pl->link_config; + /* Mask out unsupported advertisements */ + linkmode_and(config.advertising, kset->link_modes.advertising, + pl->supported); /* FIXME: should we reject autoneg if phy/mac does not support it? */ switch (kset->base.autoneg) { From b05d39466ba111fc3775d5d46180b73c34ebe8f7 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 15 May 2023 09:26:04 -0700 Subject: [PATCH 410/934] leds: qcom-lpg: Fix PWM period limits The introduction of high resolution PWM support changed the order of the operations in the calculation of min and max period. The result in both divisions is in most cases a truncation to 0, which limits the period to the range of [0, 0]. Both numerators (and denominators) are within 64 bits, so the whole expression can be put directly into the div64_u64, instead of doing it partially. Fixes: b00d2ed37617 ("leds: rgb: leds-qcom-lpg: Add support for high resolution PWM") Reviewed-by: Caleb Connolly Tested-by: Steev Klimaszewski Signed-off-by: Bjorn Andersson Acked-by: Lee Jones Tested-by: Johan Hovold Tested-by: Neil Armstrong # on SM8550-QRD Link: https://lore.kernel.org/r/20230515162604.649203-1-quic_bjorande@quicinc.com Signed-off-by: Johan Hovold --- drivers/leds/rgb/leds-qcom-lpg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/leds/rgb/leds-qcom-lpg.c b/drivers/leds/rgb/leds-qcom-lpg.c index 55a037234df1..1c849814a491 100644 --- a/drivers/leds/rgb/leds-qcom-lpg.c +++ b/drivers/leds/rgb/leds-qcom-lpg.c @@ -312,14 +312,14 @@ static int lpg_calc_freq(struct lpg_channel *chan, uint64_t period) max_res = LPG_RESOLUTION_9BIT; } - min_period = (u64)NSEC_PER_SEC * - div64_u64((1 << pwm_resolution_arr[0]), clk_rate_arr[clk_len - 1]); + min_period = div64_u64((u64)NSEC_PER_SEC * (1 << pwm_resolution_arr[0]), + clk_rate_arr[clk_len - 1]); if (period <= min_period) return -EINVAL; /* Limit period to largest possible value, to avoid overflows */ - max_period = (u64)NSEC_PER_SEC * max_res * LPG_MAX_PREDIV * - div64_u64((1 << LPG_MAX_M), 1024); + max_period = div64_u64((u64)NSEC_PER_SEC * max_res * LPG_MAX_PREDIV * (1 << LPG_MAX_M), + 1024); if (period > max_period) period = max_period; From 02a7eee1ebf213dda4d517b9ed09fae4c1734312 Mon Sep 17 00:00:00 2001 From: Weihao Gao Date: Fri, 2 Jun 2023 19:54:50 +0000 Subject: [PATCH 411/934] Fix gitignore for recently added usptream self tests This resolves the issue that generated binary is showing up as an untracked git file after every build on the kernel. Signed-off-by: Weihao Gao Signed-off-by: David S. Miller --- tools/testing/selftests/net/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 80f06aa62034..f27a7338b60e 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -8,8 +8,10 @@ diag_uid fin_ack_lat gro hwtstamp_config +io_uring_zerocopy_tx ioam6_parser ip_defrag +ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr @@ -26,6 +28,7 @@ reuseport_bpf_cpu reuseport_bpf_numa reuseport_dualstack rxtimestamp +sctp_hello sk_bind_sendto_listen sk_connect_zero_addr socket From c308e9ec004721a656c193243eab61a8be324657 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 1 Jun 2023 16:41:52 +0800 Subject: [PATCH 412/934] net/smc: Avoid to access invalid RMBs' MRs in SMCRv1 ADD LINK CONT SMCRv1 has a similar issue to SMCRv2 (see link below) that may access invalid MRs of RMBs when construct LLC ADD LINK CONT messages. BUG: kernel NULL pointer dereference, address: 0000000000000014 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 5 PID: 48 Comm: kworker/5:0 Kdump: loaded Tainted: G W E 6.4.0-rc3+ #49 Workqueue: events smc_llc_add_link_work [smc] RIP: 0010:smc_llc_add_link_cont+0x160/0x270 [smc] RSP: 0018:ffffa737801d3d50 EFLAGS: 00010286 RAX: ffff964f82144000 RBX: ffffa737801d3dd8 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff964f81370c30 RBP: ffffa737801d3dd4 R08: ffff964f81370000 R09: ffffa737801d3db0 R10: 0000000000000001 R11: 0000000000000060 R12: ffff964f82e70000 R13: ffff964f81370c38 R14: ffffa737801d3dd3 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff9652bfd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000014 CR3: 000000008fa20004 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: smc_llc_srv_rkey_exchange+0xa7/0x190 [smc] smc_llc_srv_add_link+0x3ae/0x5a0 [smc] smc_llc_add_link_work+0xb8/0x140 [smc] process_one_work+0x1e5/0x3f0 worker_thread+0x4d/0x2f0 ? __pfx_worker_thread+0x10/0x10 kthread+0xe5/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2c/0x50 When an alernate RNIC is available in system, SMC will try to add a new link based on the RNIC for resilience. All the RMBs in use will be mapped to the new link. Then the RMBs' MRs corresponding to the new link will be filled into LLC messages. For SMCRv1, they are ADD LINK CONT messages. However smc_llc_add_link_cont() may mistakenly access to unused RMBs which haven't been mapped to the new link and have no valid MRs, thus causing a crash. So this patch fixes it. Fixes: 87f88cda2128 ("net/smc: rkey processing for a new link as SMC client") Link: https://lore.kernel.org/r/1685101741-74826-3-git-send-email-guwen@linux.alibaba.com Signed-off-by: Wen Gu Reviewed-by: Wenjia Zhang Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 7a8d9163d186..90f0b60b196a 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -851,6 +851,8 @@ static int smc_llc_add_link_cont(struct smc_link *link, addc_llc->num_rkeys = *num_rkeys_todo; n = *num_rkeys_todo; for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) { + while (*buf_pos && !(*buf_pos)->used) + *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); if (!*buf_pos) { addc_llc->num_rkeys = addc_llc->num_rkeys - *num_rkeys_todo; @@ -867,8 +869,6 @@ static int smc_llc_add_link_cont(struct smc_link *link, (*num_rkeys_todo)--; *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); - while (*buf_pos && !(*buf_pos)->used) - *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos); } addc_llc->hd.common.llc_type = SMC_LLC_ADD_LINK_CONT; addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont); From ddad59331a4e16088468ca0ad228a9fe32d7955a Mon Sep 17 00:00:00 2001 From: Tian Lan Date: Sat, 13 May 2023 18:12:27 -0400 Subject: [PATCH 413/934] blk-mq: fix blk_mq_hw_ctx active request accounting The nr_active counter continues to increase over time which causes the blk_mq_get_tag to hang until the thread is rescheduled to a different core despite there are still tags available. kernel-stack INFO: task inboundIOReacto:3014879 blocked for more than 2 seconds Not tainted 6.1.15-amd64 #1 Debian 6.1.15~debian11 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:inboundIOReacto state:D stack:0 pid:3014879 ppid:4557 flags:0x00000000 Call Trace: __schedule+0x351/0xa20 scheduler+0x5d/0xe0 io_schedule+0x42/0x70 blk_mq_get_tag+0x11a/0x2a0 ? dequeue_task_stop+0x70/0x70 __blk_mq_alloc_requests+0x191/0x2e0 kprobe output showing RQF_MQ_INFLIGHT bit is not cleared before __blk_mq_free_request being called. 320 320 kworker/29:1H __blk_mq_free_request rq_flags 0x220c0 in-flight 1 b'__blk_mq_free_request+0x1 [kernel]' b'bt_iter+0x50 [kernel]' b'blk_mq_queue_tag_busy_iter+0x318 [kernel]' b'blk_mq_timeout_work+0x7c [kernel]' b'process_one_work+0x1c4 [kernel]' b'worker_thread+0x4d [kernel]' b'kthread+0xe6 [kernel]' b'ret_from_fork+0x1f [kernel]' Signed-off-by: Tian Lan Fixes: 2e315dc07df0 ("blk-mq: grab rq->refcount before calling ->fn in blk_mq_tagset_busy_iter") Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20230513221227.497327-1-tilan7663@gmail.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f6dad0886a2f..850bfb844ed2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -683,6 +683,10 @@ static void __blk_mq_free_request(struct request *rq) blk_crypto_free_request(rq); blk_pm_mark_last_busy(rq); rq->mq_hctx = NULL; + + if (rq->rq_flags & RQF_MQ_INFLIGHT) + __blk_mq_dec_active_requests(hctx); + if (rq->tag != BLK_MQ_NO_TAG) blk_mq_put_tag(hctx->tags, ctx, rq->tag); if (sched_tag != BLK_MQ_NO_TAG) @@ -694,15 +698,11 @@ static void __blk_mq_free_request(struct request *rq) void blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx = rq->mq_hctx; if ((rq->rq_flags & RQF_ELVPRIV) && q->elevator->type->ops.finish_request) q->elevator->type->ops.finish_request(rq); - if (rq->rq_flags & RQF_MQ_INFLIGHT) - __blk_mq_dec_active_requests(hctx); - if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->disk->bdi); From 9bf2e534313fcf420367668cc1f30e10469901dc Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 May 2023 06:22:54 -0500 Subject: [PATCH 414/934] arm64: dts: imx8mn-beacon: Fix SPI CS pinmux The final production baseboard had a different chip select than earlier prototype boards. When the newer board was released, the SPI stopped working because the wrong pin was used in the device tree and conflicted with the UART RTS. Fix the pinmux for production boards. Fixes: 36ca3c8ccb53 ("arm64: dts: imx: Add Beacon i.MX8M Nano development kit") Signed-off-by: Adam Ford Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi index 9e82069c941f..5a1f7c30afe5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-baseboard.dtsi @@ -81,7 +81,7 @@ &ecspi2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_espi2>; - cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>; + cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>; status = "okay"; eeprom@0 { @@ -202,7 +202,7 @@ MX8MN_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82 MX8MN_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82 MX8MN_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82 - MX8MN_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x41 + MX8MN_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x41 >; }; From 7190d0ff0e17690a9b1279d84a06473600ba2060 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 2 Jun 2023 17:46:58 +0800 Subject: [PATCH 415/934] net: enetc: correct the statistics of rx bytes The rx_bytes of struct net_device_stats should count the length of ethernet frames excluding the FCS. However, there are two problems with the rx_bytes statistics of the current enetc driver. one is that the length of VLAN header is not counted if the VLAN extraction feature is enabled. The other is that the length of L2 header is not counted, because eth_type_trans() is invoked before updating rx_bytes which will subtract the length of L2 header from skb->len. BTW, the rx_bytes statistics of XDP path also have similar problem, I will fix it in another patch. Fixes: a800abd3ecb9 ("net: enetc: move skb creation into enetc_build_skb") Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3c4fa26f0f9b..d6c0f3f46c2a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1229,7 +1229,13 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, if (!skb) break; - rx_byte_cnt += skb->len; + /* When set, the outer VLAN header is extracted and reported + * in the receive buffer descriptor. So rx_byte_cnt should + * add the length of the extracted VLAN header. + */ + if (bd_status & ENETC_RXBD_FLAG_VLAN) + rx_byte_cnt += VLAN_HLEN; + rx_byte_cnt += skb->len + ETH_HLEN; rx_frm_cnt++; napi_gro_receive(napi, skb); From fdebd850cc065495abf1d64756496050bb22db67 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 2 Jun 2023 17:46:59 +0800 Subject: [PATCH 416/934] net: enetc: correct rx_bytes statistics of XDP The rx_bytes statistics of XDP are always zero, because rx_byte_cnt is not updated after it is initialized to 0. So fix it. Fixes: d1b15102dd16 ("net: enetc: add support for XDP_DROP and XDP_PASS") Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index d6c0f3f46c2a..9e1b2536e9a9 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1571,6 +1571,14 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i, &cleaned_cnt, &xdp_buff); + /* When set, the outer VLAN header is extracted and reported + * in the receive buffer descriptor. So rx_byte_cnt should + * add the length of the extracted VLAN header. + */ + if (bd_status & ENETC_RXBD_FLAG_VLAN) + rx_byte_cnt += VLAN_HLEN; + rx_byte_cnt += xdp_get_buff_len(&xdp_buff); + xdp_act = bpf_prog_run_xdp(prog, &xdp_buff); switch (xdp_act) { From 8cde87b007dad2e461015ff70352af56ceb02c75 Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Sat, 3 Jun 2023 07:52:09 +0800 Subject: [PATCH 417/934] net: sched: wrap tc_skip_wrapper with CONFIG_RETPOLINE This patch fixes the following sparse warning: net/sched/sch_api.c:2305:1: sparse: warning: symbol 'tc_skip_wrapper' was not declared. Should it be static? No functional change intended. Signed-off-by: Min-Hua Chen Acked-by: Pedro Tammela Signed-off-by: David S. Miller --- net/sched/sch_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 014209b1dd58..9ea51812b9cf 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2302,7 +2302,9 @@ static struct pernet_operations psched_net_ops = { .exit = psched_net_exit, }; +#if IS_ENABLED(CONFIG_RETPOLINE) DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); +#endif static int __init pktsched_init(void) { From 8681f71759010503892f9e3ddb05f65c0f21b690 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Fri, 2 Jun 2023 19:50:34 -0700 Subject: [PATCH 418/934] KVM: arm64: PMU: Restore the host's PMUSERENR_EL0 Restore the host's PMUSERENR_EL0 value instead of clearing it, before returning back to userspace, as the host's EL0 might have a direct access to PMU registers (some bits of PMUSERENR_EL0 for might not be zero for the host EL0). Fixes: 83a7a4d643d3 ("arm64: perf: Enable PMU counter userspace access for perf event") Signed-off-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230603025035.3781797-2-reijiw@google.com --- arch/arm64/kvm/hyp/include/hyp/switch.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 5c15c58f90cc..0dcb8f7620d1 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -82,7 +82,12 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) * EL1 instead of being trapped to EL2. */ if (kvm_arm_support_pmu_v3()) { + struct kvm_cpu_context *hctxt; + write_sysreg(0, pmselr_el0); + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); } @@ -106,8 +111,12 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); write_sysreg(0, hstr_el2); - if (kvm_arm_support_pmu_v3()) - write_sysreg(0, pmuserenr_el0); + if (kvm_arm_support_pmu_v3()) { + struct kvm_cpu_context *hctxt; + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); + } if (cpus_have_final_cap(ARM64_SME)) { sysreg_clear_set_s(SYS_HFGRTR_EL2, 0, From 0c2f9acf6ae74118385f7a7d48f4b2d93637b628 Mon Sep 17 00:00:00 2001 From: Reiji Watanabe Date: Fri, 2 Jun 2023 19:50:35 -0700 Subject: [PATCH 419/934] KVM: arm64: PMU: Don't overwrite PMUSERENR with vcpu loaded Currently, with VHE, KVM sets ER, CR, SW and EN bits of PMUSERENR_EL0 to 1 on vcpu_load(), and saves and restores the register value for the host on vcpu_load() and vcpu_put(). If the value of those bits are cleared on a pCPU with a vCPU loaded (armv8pmu_start() would do that when PMU counters are programmed for the guest), PMU access from the guest EL0 might be trapped to the guest EL1 directly regardless of the current PMUSERENR_EL0 value of the vCPU. Fix this by not letting armv8pmu_start() overwrite PMUSERENR_EL0 on the pCPU where PMUSERENR_EL0 for the guest is loaded, and instead updating the saved shadow register value for the host so that the value can be restored on vcpu_put() later. While vcpu_{put,load}() are manipulating PMUSERENR_EL0, disable IRQs to prevent a race condition between these processes and IPIs that attempt to update PMUSERENR_EL0 for the host EL0. Suggested-by: Mark Rutland Suggested-by: Marc Zyngier Fixes: 83a7a4d643d3 ("arm64: perf: Enable PMU counter userspace access for perf event") Signed-off-by: Reiji Watanabe Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230603025035.3781797-3-reijiw@google.com --- arch/arm/include/asm/arm_pmuv3.h | 5 +++++ arch/arm64/include/asm/kvm_host.h | 7 +++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 2 ++ arch/arm64/kvm/hyp/vhe/switch.c | 14 +++++++++++++ arch/arm64/kvm/pmu.c | 27 +++++++++++++++++++++++++ drivers/perf/arm_pmuv3.c | 21 ++++++++++++++++--- 6 files changed, 73 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index 78d3d4b82c6c..92ddd950478e 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -222,6 +222,11 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) return false; } +static inline bool kvm_set_pmuserenr(u64 val) +{ + return false; +} + /* PMU Version in DFR Register */ #define ARMV8_PMU_DFR_VER_NI 0 #define ARMV8_PMU_DFR_VER_V3P4 0x5 diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7e7e19ef6993..9787503ff43f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -699,6 +699,8 @@ struct kvm_vcpu_arch { #define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4)) /* Software step state is Active-pending */ #define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5)) +/* PMUSERENR for the guest EL0 is on physical CPU */ +#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6)) /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ @@ -1065,9 +1067,14 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); void kvm_clr_pmu_events(u32 clr); +bool kvm_set_pmuserenr(u64 val); #else static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} +static inline bool kvm_set_pmuserenr(u64 val) +{ + return false; +} #endif void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 0dcb8f7620d1..4fe217efa218 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -89,6 +89,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); } vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); @@ -116,6 +117,7 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); + vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU); } if (cpus_have_final_cap(ARM64_SME)) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 7a1aa511e7da..b37e7c96efea 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -92,14 +92,28 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) } NOKPROBE_SYMBOL(__deactivate_traps); +/* + * Disable IRQs in {activate,deactivate}_traps_vhe_{load,put}() to + * prevent a race condition between context switching of PMUSERENR_EL0 + * in __{activate,deactivate}_traps_common() and IPIs that attempts to + * update PMUSERENR_EL0. See also kvm_set_pmuserenr(). + */ void activate_traps_vhe_load(struct kvm_vcpu *vcpu) { + unsigned long flags; + + local_irq_save(flags); __activate_traps_common(vcpu); + local_irq_restore(flags); } void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) { + unsigned long flags; + + local_irq_save(flags); __deactivate_traps_common(vcpu); + local_irq_restore(flags); } static const exit_handler_fn hyp_exit_handlers[] = { diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 7887133d15f0..121f1a14c829 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -209,3 +209,30 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) kvm_vcpu_pmu_enable_el0(events_host); kvm_vcpu_pmu_disable_el0(events_guest); } + +/* + * With VHE, keep track of the PMUSERENR_EL0 value for the host EL0 on the pCPU + * where PMUSERENR_EL0 for the guest is loaded, since PMUSERENR_EL0 is switched + * to the value for the guest on vcpu_load(). The value for the host EL0 + * will be restored on vcpu_put(), before returning to userspace. + * This isn't necessary for nVHE, as the register is context switched for + * every guest enter/exit. + * + * Return true if KVM takes care of the register. Otherwise return false. + */ +bool kvm_set_pmuserenr(u64 val) +{ + struct kvm_cpu_context *hctxt; + struct kvm_vcpu *vcpu; + + if (!kvm_arm_support_pmu_v3() || !has_vhe()) + return false; + + vcpu = kvm_get_running_vcpu(); + if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU)) + return false; + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val; + return true; +} diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index c98e4039386d..93b7edb5f1e7 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -677,9 +677,25 @@ static inline u32 armv8pmu_getreset_flags(void) return value; } +static void update_pmuserenr(u64 val) +{ + lockdep_assert_irqs_disabled(); + + /* + * The current PMUSERENR_EL0 value might be the value for the guest. + * If that's the case, have KVM keep tracking of the register value + * for the host EL0 so that KVM can restore it before returning to + * the host EL0. Otherwise, update the register now. + */ + if (kvm_set_pmuserenr(val)) + return; + + write_pmuserenr(val); +} + static void armv8pmu_disable_user_access(void) { - write_pmuserenr(0); + update_pmuserenr(0); } static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) @@ -695,8 +711,7 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) armv8pmu_write_evcntr(i, 0); } - write_pmuserenr(0); - write_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR); + update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR); } static void armv8pmu_enable_event(struct perf_event *event) From 9561de3a55bed6bdd44a12820ba81ec416e705a7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Jun 2023 14:04:27 -0400 Subject: [PATCH 420/934] Linux 6.4-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 836643eaefee..09866a85bc2a 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 89a4bf0dc3857569a77061d3d5ea2ac85f7e13c6 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 5 Jun 2023 04:05:27 +1000 Subject: [PATCH 421/934] xfs: buffer pins need to hold a buffer reference When a buffer is unpinned by xfs_buf_item_unpin(), we need to access the buffer after we've dropped the buffer log item reference count. This opens a window where we can have two racing unpins for the buffer item (e.g. shutdown checkpoint context callback processing racing with journal IO iclog completion processing) and both attempt to access the buffer after dropping the BLI reference count. If we are unlucky, the "BLI freed" context wins the race and frees the buffer before the "BLI still active" case checks the buffer pin count. This results in a use after free that can only be triggered in active filesystem shutdown situations. To fix this, we need to ensure that buffer existence extends beyond the BLI reference count checks and until the unpin processing is complete. This implies that a buffer pin operation must also take a buffer reference to ensure that the buffer cannot be freed until the buffer unpin processing is complete. Reported-by: yangerkun Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf_item.c | 88 ++++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index df7322ed73fa..023d4e0385dd 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -452,10 +452,18 @@ xfs_buf_item_format( * This is called to pin the buffer associated with the buf log item in memory * so it cannot be written out. * - * We also always take a reference to the buffer log item here so that the bli - * is held while the item is pinned in memory. This means that we can - * unconditionally drop the reference count a transaction holds when the - * transaction is completed. + * We take a reference to the buffer log item here so that the BLI life cycle + * extends at least until the buffer is unpinned via xfs_buf_item_unpin() and + * inserted into the AIL. + * + * We also need to take a reference to the buffer itself as the BLI unpin + * processing requires accessing the buffer after the BLI has dropped the final + * BLI reference. See xfs_buf_item_unpin() for an explanation. + * If unpins race to drop the final BLI reference and only the + * BLI owns a reference to the buffer, then the loser of the race can have the + * buffer fgreed from under it (e.g. on shutdown). Taking a buffer reference per + * pin count ensures the life cycle of the buffer extends for as + * long as we hold the buffer pin reference in xfs_buf_item_unpin(). */ STATIC void xfs_buf_item_pin( @@ -470,13 +478,30 @@ xfs_buf_item_pin( trace_xfs_buf_item_pin(bip); + xfs_buf_hold(bip->bli_buf); atomic_inc(&bip->bli_refcount); atomic_inc(&bip->bli_buf->b_pin_count); } /* - * This is called to unpin the buffer associated with the buf log item which - * was previously pinned with a call to xfs_buf_item_pin(). + * This is called to unpin the buffer associated with the buf log item which was + * previously pinned with a call to xfs_buf_item_pin(). We enter this function + * with a buffer pin count, a buffer reference and a BLI reference. + * + * We must drop the BLI reference before we unpin the buffer because the AIL + * doesn't acquire a BLI reference whenever it accesses it. Therefore if the + * refcount drops to zero, the bli could still be AIL resident and the buffer + * submitted for I/O at any point before we return. This can result in IO + * completion freeing the buffer while we are still trying to access it here. + * This race condition can also occur in shutdown situations where we abort and + * unpin buffers from contexts other that journal IO completion. + * + * Hence we have to hold a buffer reference per pin count to ensure that the + * buffer cannot be freed until we have finished processing the unpin operation. + * The reference is taken in xfs_buf_item_pin(), and we must hold it until we + * are done processing the buffer state. In the case of an abort (remove = + * true) then we re-use the current pin reference as the IO reference we hand + * off to IO failure handling. */ STATIC void xfs_buf_item_unpin( @@ -493,24 +518,18 @@ xfs_buf_item_unpin( trace_xfs_buf_item_unpin(bip); - /* - * Drop the bli ref associated with the pin and grab the hold required - * for the I/O simulation failure in the abort case. We have to do this - * before the pin count drops because the AIL doesn't acquire a bli - * reference. Therefore if the refcount drops to zero, the bli could - * still be AIL resident and the buffer submitted for I/O (and freed on - * completion) at any point before we return. This can be removed once - * the AIL properly holds a reference on the bli. - */ freed = atomic_dec_and_test(&bip->bli_refcount); - if (freed && !stale && remove) - xfs_buf_hold(bp); if (atomic_dec_and_test(&bp->b_pin_count)) wake_up_all(&bp->b_waiters); - /* nothing to do but drop the pin count if the bli is active */ - if (!freed) + /* + * Nothing to do but drop the buffer pin reference if the BLI is + * still active. + */ + if (!freed) { + xfs_buf_rele(bp); return; + } if (stale) { ASSERT(bip->bli_flags & XFS_BLI_STALE); @@ -522,6 +541,15 @@ xfs_buf_item_unpin( trace_xfs_buf_item_unpin_stale(bip); + /* + * The buffer has been locked and referenced since it was marked + * stale so we own both lock and reference exclusively here. We + * do not need the pin reference any more, so drop it now so + * that we only have one reference to drop once item completion + * processing is complete. + */ + xfs_buf_rele(bp); + /* * If we get called here because of an IO error, we may or may * not have the item on the AIL. xfs_trans_ail_delete() will @@ -538,16 +566,30 @@ xfs_buf_item_unpin( ASSERT(bp->b_log_item == NULL); } xfs_buf_relse(bp); - } else if (remove) { + return; + } + + if (remove) { /* - * The buffer must be locked and held by the caller to simulate - * an async I/O failure. We acquired the hold for this case - * before the buffer was unpinned. + * We need to simulate an async IO failures here to ensure that + * the correct error completion is run on this buffer. This + * requires a reference to the buffer and for the buffer to be + * locked. We can safely pass ownership of the pin reference to + * the IO to ensure that nothing can free the buffer while we + * wait for the lock and then run the IO failure completion. */ xfs_buf_lock(bp); bp->b_flags |= XBF_ASYNC; xfs_buf_ioend_fail(bp); + return; } + + /* + * BLI has no more active references - it will be moved to the AIL to + * manage the remaining BLI/buffer life cycle. There is nothing left for + * us to do here so drop the pin reference to the buffer. + */ + xfs_buf_rele(bp); } STATIC uint From 00dcd17cfa7f103f7d640ffd34645a2ddab96330 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 5 Jun 2023 04:06:27 +1000 Subject: [PATCH 422/934] xfs: restore allocation trylock iteration It was accidentally dropped when refactoring the allocation code, resulting in the AG iteration always doing blocking AG iteration. This results in a small performance regression for a specific fsmark test that runs more user data writer threads than there are AGs. Reported-by: kernel test robot Fixes: 2edf06a50f5b ("xfs: factor xfs_alloc_vextent_this_ag() for _iterate_ags()") Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index fdfa08cbf4db..61eb65be17f3 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3187,7 +3187,8 @@ xfs_alloc_vextent_check_args( */ static int xfs_alloc_vextent_prepare_ag( - struct xfs_alloc_arg *args) + struct xfs_alloc_arg *args, + uint32_t flags) { bool need_pag = !args->pag; int error; @@ -3196,7 +3197,7 @@ xfs_alloc_vextent_prepare_ag( args->pag = xfs_perag_get(args->mp, args->agno); args->agbp = NULL; - error = xfs_alloc_fix_freelist(args, 0); + error = xfs_alloc_fix_freelist(args, flags); if (error) { trace_xfs_alloc_vextent_nofix(args); if (need_pag) @@ -3336,7 +3337,7 @@ xfs_alloc_vextent_this_ag( return error; } - error = xfs_alloc_vextent_prepare_ag(args); + error = xfs_alloc_vextent_prepare_ag(args, 0); if (!error && args->agbp) error = xfs_alloc_ag_vextent_size(args); @@ -3380,7 +3381,7 @@ restart: for_each_perag_wrap_range(mp, start_agno, restart_agno, mp->m_sb.sb_agcount, agno, args->pag) { args->agno = agno; - error = xfs_alloc_vextent_prepare_ag(args); + error = xfs_alloc_vextent_prepare_ag(args, flags); if (error) break; if (!args->agbp) { @@ -3546,7 +3547,7 @@ xfs_alloc_vextent_exact_bno( return error; } - error = xfs_alloc_vextent_prepare_ag(args); + error = xfs_alloc_vextent_prepare_ag(args, 0); if (!error && args->agbp) error = xfs_alloc_ag_vextent_exact(args); @@ -3587,7 +3588,7 @@ xfs_alloc_vextent_near_bno( if (needs_perag) args->pag = xfs_perag_grab(mp, args->agno); - error = xfs_alloc_vextent_prepare_ag(args); + error = xfs_alloc_vextent_prepare_ag(args, 0); if (!error && args->agbp) error = xfs_alloc_ag_vextent_near(args); From cb042117488dbf0b3b38b05771639890fada9a52 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 5 Jun 2023 04:07:27 +1000 Subject: [PATCH 423/934] xfs: defered work could create precommits To fix a AGI-AGF-inode cluster buffer deadlock, we need to move inode cluster buffer operations to the ->iop_precommit() method. However, this means that deferred operations can require precommits to be run on the final transaction that the deferred ops pass back to xfs_trans_commit() context. This will be exposed by attribute handling, in that the last changes to the inode in the attr set state machine "disappear" because the precommit operation is not run. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_trans.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 8afc0c080861..f81fbf081b01 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -970,6 +970,11 @@ __xfs_trans_commit( error = xfs_defer_finish_noroll(&tp); if (error) goto out_unreserve; + + /* Run precommits from final tx in defer chain. */ + error = xfs_trans_run_precommits(tp); + if (error) + goto out_unreserve; } /* From 82842fee6e5979ca7e2bf4d839ef890c22ffb7aa Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 5 Jun 2023 04:08:27 +1000 Subject: [PATCH 424/934] xfs: fix AGF vs inode cluster buffer deadlock Lock order in XFS is AGI -> AGF, hence for operations involving inode unlinked list operations we always lock the AGI first. Inode unlinked list operations operate on the inode cluster buffer, so the lock order there is AGI -> inode cluster buffer. For O_TMPFILE operations, this now means the lock order set down in xfs_rename and xfs_link is AGI -> inode cluster buffer -> AGF as the unlinked ops are done before the directory modifications that may allocate space and lock the AGF. Unfortunately, we also now lock the inode cluster buffer when logging an inode so that we can attach the inode to the cluster buffer and pin it in memory. This creates a lock order of AGF -> inode cluster buffer in directory operations as we have to log the inode after we've allocated new space for it. This creates a lock inversion between the AGF and the inode cluster buffer. Because the inode cluster buffer is shared across multiple inodes, the inversion is not specific to individual inodes but can occur when inodes in the same cluster buffer are accessed in different orders. To fix this we need move all the inode log item cluster buffer interactions to the end of the current transaction. Unfortunately, xfs_trans_log_inode() calls are littered throughout the transactions with no thought to ordering against other items or locking. This makes it difficult to do anything that involves changing the call sites of xfs_trans_log_inode() to change locking orders. However, we do now have a mechanism that allows is to postpone dirty item processing to just before we commit the transaction: the ->iop_precommit method. This will be called after all the modifications are done and high level objects like AGI and AGF buffers have been locked and modified, thereby providing a mechanism that guarantees we don't lock the inode cluster buffer before those high level objects are locked. This change is largely moving the guts of xfs_trans_log_inode() to xfs_inode_item_precommit() and providing an extra flag context in the inode log item to track the dirty state of the inode in the current transaction. This also means we do a lot less repeated work in xfs_trans_log_inode() by only doing it once per transaction when all the work is done. Fixes: 298f7bec503f ("xfs: pin inode backing buffer to the inode log item") Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_log_format.h | 9 +- fs/xfs/libxfs/xfs_trans_inode.c | 113 ++---------------------- fs/xfs/xfs_inode_item.c | 149 ++++++++++++++++++++++++++++++++ fs/xfs/xfs_inode_item.h | 1 + 4 files changed, 166 insertions(+), 106 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index f13e0809dc63..269573c82808 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -324,7 +324,6 @@ struct xfs_inode_log_format_32 { #define XFS_ILOG_DOWNER 0x200 /* change the data fork owner on replay */ #define XFS_ILOG_AOWNER 0x400 /* change the attr fork owner on replay */ - /* * The timestamps are dirty, but not necessarily anything else in the inode * core. Unlike the other fields above this one must never make it to disk @@ -333,6 +332,14 @@ struct xfs_inode_log_format_32 { */ #define XFS_ILOG_TIMESTAMP 0x4000 +/* + * The version field has been changed, but not necessarily anything else of + * interest. This must never make it to disk - it is used purely to ensure that + * the inode item ->precommit operation can update the fsync flag triggers + * in the inode item correctly. + */ +#define XFS_ILOG_IVERSION 0x8000 + #define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \ XFS_ILOG_DBROOT | XFS_ILOG_DEV | \ XFS_ILOG_ADATA | XFS_ILOG_AEXT | \ diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index 8b5547073379..cb4796b6e693 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -40,9 +40,8 @@ xfs_trans_ijoin( iip->ili_lock_flags = lock_flags; ASSERT(!xfs_iflags_test(ip, XFS_ISTALE)); - /* - * Get a log_item_desc to point at the new item. - */ + /* Reset the per-tx dirty context and add the item to the tx. */ + iip->ili_dirty_flags = 0; xfs_trans_add_item(tp, &iip->ili_item); } @@ -76,17 +75,10 @@ xfs_trans_ichgtime( /* * This is called to mark the fields indicated in fieldmask as needing to be * logged when the transaction is committed. The inode must already be - * associated with the given transaction. - * - * The values for fieldmask are defined in xfs_inode_item.h. We always log all - * of the core inode if any of it has changed, and we always log all of the - * inline data/extents/b-tree root if any of them has changed. - * - * Grab and pin the cluster buffer associated with this inode to avoid RMW - * cycles at inode writeback time. Avoid the need to add error handling to every - * xfs_trans_log_inode() call by shutting down on read error. This will cause - * transactions to fail and everything to error out, just like if we return a - * read error in a dirty transaction and cancel it. + * associated with the given transaction. All we do here is record where the + * inode was dirtied and mark the transaction and inode log item dirty; + * everything else is done in the ->precommit log item operation after the + * changes in the transaction have been completed. */ void xfs_trans_log_inode( @@ -96,7 +88,6 @@ xfs_trans_log_inode( { struct xfs_inode_log_item *iip = ip->i_itemp; struct inode *inode = VFS_I(ip); - uint iversion_flags = 0; ASSERT(iip); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); @@ -104,18 +95,6 @@ xfs_trans_log_inode( tp->t_flags |= XFS_TRANS_DIRTY; - /* - * Don't bother with i_lock for the I_DIRTY_TIME check here, as races - * don't matter - we either will need an extra transaction in 24 hours - * to log the timestamps, or will clear already cleared fields in the - * worst case. - */ - if (inode->i_state & I_DIRTY_TIME) { - spin_lock(&inode->i_lock); - inode->i_state &= ~I_DIRTY_TIME; - spin_unlock(&inode->i_lock); - } - /* * First time we log the inode in a transaction, bump the inode change * counter if it is configured for this to occur. While we have the @@ -128,86 +107,10 @@ xfs_trans_log_inode( if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) { if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE)) - iversion_flags = XFS_ILOG_CORE; + flags |= XFS_ILOG_IVERSION; } - /* - * If we're updating the inode core or the timestamps and it's possible - * to upgrade this inode to bigtime format, do so now. - */ - if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) && - xfs_has_bigtime(ip->i_mount) && - !xfs_inode_has_bigtime(ip)) { - ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME; - flags |= XFS_ILOG_CORE; - } - - /* - * Inode verifiers do not check that the extent size hint is an integer - * multiple of the rt extent size on a directory with both rtinherit - * and extszinherit flags set. If we're logging a directory that is - * misconfigured in this way, clear the hint. - */ - if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && - (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && - (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) { - ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | - XFS_DIFLAG_EXTSZINHERIT); - ip->i_extsize = 0; - flags |= XFS_ILOG_CORE; - } - - /* - * Record the specific change for fdatasync optimisation. This allows - * fdatasync to skip log forces for inodes that are only timestamp - * dirty. - */ - spin_lock(&iip->ili_lock); - iip->ili_fsync_fields |= flags; - - if (!iip->ili_item.li_buf) { - struct xfs_buf *bp; - int error; - - /* - * We hold the ILOCK here, so this inode is not going to be - * flushed while we are here. Further, because there is no - * buffer attached to the item, we know that there is no IO in - * progress, so nothing will clear the ili_fields while we read - * in the buffer. Hence we can safely drop the spin lock and - * read the buffer knowing that the state will not change from - * here. - */ - spin_unlock(&iip->ili_lock); - error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp); - if (error) { - xfs_force_shutdown(ip->i_mount, SHUTDOWN_META_IO_ERROR); - return; - } - - /* - * We need an explicit buffer reference for the log item but - * don't want the buffer to remain attached to the transaction. - * Hold the buffer but release the transaction reference once - * we've attached the inode log item to the buffer log item - * list. - */ - xfs_buf_hold(bp); - spin_lock(&iip->ili_lock); - iip->ili_item.li_buf = bp; - bp->b_flags |= _XBF_INODES; - list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list); - xfs_trans_brelse(tp, bp); - } - - /* - * Always OR in the bits from the ili_last_fields field. This is to - * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines - * in the eventual clearing of the ili_fields bits. See the big comment - * in xfs_iflush() for an explanation of this coordination mechanism. - */ - iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags); - spin_unlock(&iip->ili_lock); + iip->ili_dirty_flags |= flags; } int diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index ca2941ab6cbc..91c847a84e10 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -29,6 +29,153 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_inode_log_item, ili_item); } +static uint64_t +xfs_inode_item_sort( + struct xfs_log_item *lip) +{ + return INODE_ITEM(lip)->ili_inode->i_ino; +} + +/* + * Prior to finally logging the inode, we have to ensure that all the + * per-modification inode state changes are applied. This includes VFS inode + * state updates, format conversions, verifier state synchronisation and + * ensuring the inode buffer remains in memory whilst the inode is dirty. + * + * We have to be careful when we grab the inode cluster buffer due to lock + * ordering constraints. The unlinked inode modifications (xfs_iunlink_item) + * require AGI -> inode cluster buffer lock order. The inode cluster buffer is + * not locked until ->precommit, so it happens after everything else has been + * modified. + * + * Further, we have AGI -> AGF lock ordering, and with O_TMPFILE handling we + * have AGI -> AGF -> iunlink item -> inode cluster buffer lock order. Hence we + * cannot safely lock the inode cluster buffer in xfs_trans_log_inode() because + * it can be called on a inode (e.g. via bumplink/droplink) before we take the + * AGF lock modifying directory blocks. + * + * Rather than force a complete rework of all the transactions to call + * xfs_trans_log_inode() once and once only at the end of every transaction, we + * move the pinning of the inode cluster buffer to a ->precommit operation. This + * matches how the xfs_iunlink_item locks the inode cluster buffer, and it + * ensures that the inode cluster buffer locking is always done last in a + * transaction. i.e. we ensure the lock order is always AGI -> AGF -> inode + * cluster buffer. + * + * If we return the inode number as the precommit sort key then we'll also + * guarantee that the order all inode cluster buffer locking is the same all the + * inodes and unlink items in the transaction. + */ +static int +xfs_inode_item_precommit( + struct xfs_trans *tp, + struct xfs_log_item *lip) +{ + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; + struct inode *inode = VFS_I(ip); + unsigned int flags = iip->ili_dirty_flags; + + /* + * Don't bother with i_lock for the I_DIRTY_TIME check here, as races + * don't matter - we either will need an extra transaction in 24 hours + * to log the timestamps, or will clear already cleared fields in the + * worst case. + */ + if (inode->i_state & I_DIRTY_TIME) { + spin_lock(&inode->i_lock); + inode->i_state &= ~I_DIRTY_TIME; + spin_unlock(&inode->i_lock); + } + + /* + * If we're updating the inode core or the timestamps and it's possible + * to upgrade this inode to bigtime format, do so now. + */ + if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) && + xfs_has_bigtime(ip->i_mount) && + !xfs_inode_has_bigtime(ip)) { + ip->i_diflags2 |= XFS_DIFLAG2_BIGTIME; + flags |= XFS_ILOG_CORE; + } + + /* + * Inode verifiers do not check that the extent size hint is an integer + * multiple of the rt extent size on a directory with both rtinherit + * and extszinherit flags set. If we're logging a directory that is + * misconfigured in this way, clear the hint. + */ + if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && + (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && + (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) { + ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | + XFS_DIFLAG_EXTSZINHERIT); + ip->i_extsize = 0; + flags |= XFS_ILOG_CORE; + } + + /* + * Record the specific change for fdatasync optimisation. This allows + * fdatasync to skip log forces for inodes that are only timestamp + * dirty. Once we've processed the XFS_ILOG_IVERSION flag, convert it + * to XFS_ILOG_CORE so that the actual on-disk dirty tracking + * (ili_fields) correctly tracks that the version has changed. + */ + spin_lock(&iip->ili_lock); + iip->ili_fsync_fields |= (flags & ~XFS_ILOG_IVERSION); + if (flags & XFS_ILOG_IVERSION) + flags = ((flags & ~XFS_ILOG_IVERSION) | XFS_ILOG_CORE); + + if (!iip->ili_item.li_buf) { + struct xfs_buf *bp; + int error; + + /* + * We hold the ILOCK here, so this inode is not going to be + * flushed while we are here. Further, because there is no + * buffer attached to the item, we know that there is no IO in + * progress, so nothing will clear the ili_fields while we read + * in the buffer. Hence we can safely drop the spin lock and + * read the buffer knowing that the state will not change from + * here. + */ + spin_unlock(&iip->ili_lock); + error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &bp); + if (error) + return error; + + /* + * We need an explicit buffer reference for the log item but + * don't want the buffer to remain attached to the transaction. + * Hold the buffer but release the transaction reference once + * we've attached the inode log item to the buffer log item + * list. + */ + xfs_buf_hold(bp); + spin_lock(&iip->ili_lock); + iip->ili_item.li_buf = bp; + bp->b_flags |= _XBF_INODES; + list_add_tail(&iip->ili_item.li_bio_list, &bp->b_li_list); + xfs_trans_brelse(tp, bp); + } + + /* + * Always OR in the bits from the ili_last_fields field. This is to + * coordinate with the xfs_iflush() and xfs_buf_inode_iodone() routines + * in the eventual clearing of the ili_fields bits. See the big comment + * in xfs_iflush() for an explanation of this coordination mechanism. + */ + iip->ili_fields |= (flags | iip->ili_last_fields); + spin_unlock(&iip->ili_lock); + + /* + * We are done with the log item transaction dirty state, so clear it so + * that it doesn't pollute future transactions. + */ + iip->ili_dirty_flags = 0; + return 0; +} + /* * The logged size of an inode fork is always the current size of the inode * fork. This means that when an inode fork is relogged, the size of the logged @@ -662,6 +809,8 @@ xfs_inode_item_committing( } static const struct xfs_item_ops xfs_inode_item_ops = { + .iop_sort = xfs_inode_item_sort, + .iop_precommit = xfs_inode_item_precommit, .iop_size = xfs_inode_item_size, .iop_format = xfs_inode_item_format, .iop_pin = xfs_inode_item_pin, diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index bbd836a44ff0..377e06007804 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -17,6 +17,7 @@ struct xfs_inode_log_item { struct xfs_log_item ili_item; /* common portion */ struct xfs_inode *ili_inode; /* inode ptr */ unsigned short ili_lock_flags; /* inode lock flags */ + unsigned int ili_dirty_flags; /* dirty in current tx */ /* * The ili_lock protects the interactions between the dirty state and * the flush state of the inode log item. This allows us to do atomic From 4320f34666363e202f8c290869c2d78c1ce9f3f1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 5 Jun 2023 04:09:27 +1000 Subject: [PATCH 425/934] xfs: Fix undefined behavior of shift into sign bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc-5: In file included from ./include/trace/define_trace.h:102:0, from ./fs/xfs/scrub/trace.h:988, from fs/xfs/scrub/trace.c:40: ./fs/xfs/./scrub/trace.h: In function ‘trace_raw_output_xchk_fsgate_class’: ./fs/xfs/scrub/scrub.h:111:28: error: initializer element is not constant #define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */ ^ Shifting the (signed) value 1 into the sign bit is undefined behavior. Fix this for all definitions in the file by shifting "1U" instead of "1". This was exposed by the first user added in commit 466c525d6d35e691 ("xfs: minimize overhead of drain wakeups by using jump labels"). Fixes: 160b5a784525e8a4 ("xfs: hoist the already_fixed variable to the scrub context") Signed-off-by: Geert Uytterhoeven Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/scrub/scrub.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index b38e93830dde..e113f2f5c254 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -105,10 +105,10 @@ struct xfs_scrub { }; /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */ -#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */ -#define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */ -#define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */ -#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */ +#define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */ +#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */ +#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */ +#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */ /* * The XCHK_FSGATES* flags reflect functionality in the main filesystem that From 6be73cecb5a241309008d7fc4c47749e5324bfb0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 5 Jun 2023 14:48:12 +1000 Subject: [PATCH 426/934] xfs: fix broken logic when detecting mergeable bmap records Commit 6bc6c99a944c was a well-intentioned effort to initiate consolidation of adjacent bmbt mapping records by setting the PREEN flag. Consolidation can only happen if the length of the combined record doesn't overflow the 21-bit blockcount field of the bmbt recordset. Unfortunately, the length test is inverted, leading to it triggering on data forks like these: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL 0: [0..16777207]: 76110848..92888055 0 (76110848..92888055) 16777208 1: [16777208..20639743]: 92888056..96750591 0 (92888056..96750591) 3862536 Note that record 0 has a length of 16777208 512b blocks. This corresponds to 2097151 4k fsblocks, which is the maximum. Hence the two records cannot be merged. However, the logic is still wrong even if we change the in-loop comparison, because the scope of our examination isn't broad enough inside the loop to detect mappings like this: 0: [0..9]: 76110838..76110847 0 (76110838..76110847) 10 1: [10..16777217]: 76110848..92888055 0 (76110848..92888055) 16777208 2: [16777218..20639753]: 92888056..96750591 0 (92888056..96750591) 3862536 These three records could be merged into two, but one cannot determine this purely from looking at records 0-1 or 1-2 in isolation. Hoist the mergability detection outside the loop, and base its decision making on whether or not a merged mapping could be expressed in fewer bmbt records. While we're at it, fix the incorrect return type of the iter function. Fixes: 336642f79283 ("xfs: alert the user about data/attr fork mappings that could be merged") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/scrub/bmap.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 69bc89d0fc68..5bf4326e9783 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -769,14 +769,14 @@ xchk_are_bmaps_contiguous( * mapping or false if there are no more mappings. Caller must ensure that * @info.icur is zeroed before the first call. */ -static int +static bool xchk_bmap_iext_iter( struct xchk_bmap_info *info, struct xfs_bmbt_irec *irec) { struct xfs_bmbt_irec got; struct xfs_ifork *ifp; - xfs_filblks_t prev_len; + unsigned int nr = 0; ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); @@ -790,12 +790,12 @@ xchk_bmap_iext_iter( irec->br_startoff); return false; } + nr++; /* * Iterate subsequent iextent records and merge them with the one * that we just read, if possible. */ - prev_len = irec->br_blockcount; while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) { if (!xchk_are_bmaps_contiguous(irec, &got)) break; @@ -805,20 +805,21 @@ xchk_bmap_iext_iter( got.br_startoff); return false; } - - /* - * Notify the user of mergeable records in the data or attr - * forks. CoW forks only exist in memory so we ignore them. - */ - if (info->whichfork != XFS_COW_FORK && - prev_len + got.br_blockcount > BMBT_BLOCKCOUNT_MASK) - xchk_ino_set_preen(info->sc, info->sc->ip->i_ino); + nr++; irec->br_blockcount += got.br_blockcount; - prev_len = got.br_blockcount; xfs_iext_next(ifp, &info->icur); } + /* + * If the merged mapping could be expressed with fewer bmbt records + * than we actually found, notify the user that this fork could be + * optimized. CoW forks only exist in memory so we ignore them. + */ + if (nr > 1 && info->whichfork != XFS_COW_FORK && + howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr) + xchk_ino_set_preen(info->sc, info->sc->ip->i_ino); + return true; } From 1e473279f492faf33ed3fbb3ecf8eec9f56b951c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 5 Jun 2023 14:48:15 +1000 Subject: [PATCH 427/934] xfs: fix double xfs_perag_rele() in xfs_filestream_pick_ag() xfs_bmap_longest_free_extent() can return an error when accessing the AGF fails. In this case, the behaviour of xfs_filestream_pick_ag() is conditional on the error. We may continue the loop, or break out of it. The error handling after the loop cleans up the perag reference held when the break occurs. If we continue, the next loop iteration handles cleaning up the perag reference. EIther way, we don't need to release the active perag reference when xfs_bmap_longest_free_extent() fails. Doing so means we do a double decrement on the active reference count, and this causes tha active reference count to fall to zero. At this point, new active references will fail. This leads to unmount hanging because it tries to grab active references to that perag, only for it to fail. This happens inside a loop that retries until a inode tree radix tree tag is cleared, which cannot happen because we can't get an active reference to the perag. The unmount livelocks in this path: xfs_reclaim_inodes+0x80/0xc0 xfs_unmount_flush_inodes+0x5b/0x70 xfs_unmountfs+0x5b/0x1a0 xfs_fs_put_super+0x49/0x110 generic_shutdown_super+0x7c/0x1a0 kill_block_super+0x27/0x50 deactivate_locked_super+0x30/0x90 deactivate_super+0x3c/0x50 cleanup_mnt+0xc2/0x160 __cleanup_mnt+0x12/0x20 task_work_run+0x5e/0xa0 exit_to_user_mode_prepare+0x1bc/0x1c0 syscall_exit_to_user_mode+0x16/0x40 do_syscall_64+0x40/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Reported-by: Pengfei Xu Fixes: eb70aa2d8ed9 ("xfs: use for_each_perag_wrap in xfs_filestream_pick_ag") Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_filestream.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 22c13933c8f8..2fc98d313708 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -78,7 +78,6 @@ restart: *longest = 0; err = xfs_bmap_longest_free_extent(pag, NULL, longest); if (err) { - xfs_perag_rele(pag); if (err != -EAGAIN) break; /* Couldn't lock the AGF, skip this AG. */ From e0a8de7da35e5b22b44fa1013ccc0716e17b0c14 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 5 Jun 2023 14:48:15 +1000 Subject: [PATCH 428/934] xfs: fix agf/agfl verification on v4 filesystems When a v4 filesystem has fl_last - fl_first != fl_count, we do not not detect the corruption and allow the AGF to be used as it if was fully valid. On V5 filesystems, we reset the AGFL to empty in these cases and avoid the corruption at a small cost of leaked blocks. If we don't catch the corruption on V4 filesystems, bad things happen later when an allocation attempts to trim the free list and either double-frees stale entries in the AGFl or tries to free NULLAGBNO entries. Either way, this is bad. Prevent this from happening by using the AGFL_NEED_RESET logic for v4 filesysetms, too. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 59 ++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 61eb65be17f3..fd3293a8c659 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -628,6 +628,25 @@ xfs_alloc_fixup_trees( return 0; } +/* + * We do not verify the AGFL contents against AGF-based index counters here, + * even though we may have access to the perag that contains shadow copies. We + * don't know if the AGF based counters have been checked, and if they have they + * still may be inconsistent because they haven't yet been reset on the first + * allocation after the AGF has been read in. + * + * This means we can only check that all agfl entries contain valid or null + * values because we can't reliably determine the active range to exclude + * NULLAGBNO as a valid value. + * + * However, we can't even do that for v4 format filesystems because there are + * old versions of mkfs out there that does not initialise the AGFL to known, + * verifiable values. HEnce we can't tell the difference between a AGFL block + * allocated by mkfs and a corrupted AGFL block here on v4 filesystems. + * + * As a result, we can only fully validate AGFL block numbers when we pull them + * from the freelist in xfs_alloc_get_freelist(). + */ static xfs_failaddr_t xfs_agfl_verify( struct xfs_buf *bp) @@ -637,12 +656,6 @@ xfs_agfl_verify( __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp); int i; - /* - * There is no verification of non-crc AGFLs because mkfs does not - * initialise the AGFL to zero or NULL. Hence the only valid part of the - * AGFL is what the AGF says is active. We can't get to the AGF, so we - * can't verify just those entries are valid. - */ if (!xfs_has_crc(mp)) return NULL; @@ -2321,12 +2334,16 @@ xfs_free_agfl_block( } /* - * Check the agfl fields of the agf for inconsistency or corruption. The purpose - * is to detect an agfl header padding mismatch between current and early v5 - * kernels. This problem manifests as a 1-slot size difference between the - * on-disk flcount and the active [first, last] range of a wrapped agfl. This - * may also catch variants of agfl count corruption unrelated to padding. Either - * way, we'll reset the agfl and warn the user. + * Check the agfl fields of the agf for inconsistency or corruption. + * + * The original purpose was to detect an agfl header padding mismatch between + * current and early v5 kernels. This problem manifests as a 1-slot size + * difference between the on-disk flcount and the active [first, last] range of + * a wrapped agfl. + * + * However, we need to use these same checks to catch agfl count corruptions + * unrelated to padding. This could occur on any v4 or v5 filesystem, so either + * way, we need to reset the agfl and warn the user. * * Return true if a reset is required before the agfl can be used, false * otherwise. @@ -2342,10 +2359,6 @@ xfs_agfl_needs_reset( int agfl_size = xfs_agfl_size(mp); int active; - /* no agfl header on v4 supers */ - if (!xfs_has_crc(mp)) - return false; - /* * The agf read verifier catches severe corruption of these fields. * Repeat some sanity checks to cover a packed -> unpacked mismatch if @@ -2889,6 +2902,19 @@ xfs_alloc_put_freelist( return 0; } +/* + * Verify the AGF is consistent. + * + * We do not verify the AGFL indexes in the AGF are fully consistent here + * because of issues with variable on-disk structure sizes. Instead, we check + * the agfl indexes for consistency when we initialise the perag from the AGF + * information after a read completes. + * + * If the index is inconsistent, then we mark the perag as needing an AGFL + * reset. The first AGFL update performed then resets the AGFL indexes and + * refills the AGFL with known good free blocks, allowing the filesystem to + * continue operating normally at the cost of a few leaked free space blocks. + */ static xfs_failaddr_t xfs_agf_verify( struct xfs_buf *bp) @@ -2962,7 +2988,6 @@ xfs_agf_verify( return __this_address; return NULL; - } static void From 3148ebf2c0782340946732bfaf3073d23ac833fa Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 5 Jun 2023 14:48:15 +1000 Subject: [PATCH 429/934] xfs: validity check agbnos on the AGFL If the agfl or the indexing in the AGF has been corrupted, getting a block form the AGFL could return an invalid block number. If this happens, bad things happen. Check the agbno we pull off the AGFL and return -EFSCORRUPTED if we find somethign bad. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index fd3293a8c659..643d17877832 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2780,6 +2780,9 @@ xfs_alloc_get_freelist( */ agfl_bno = xfs_buf_to_agfl_bno(agflbp); bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); + if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno))) + return -EFSCORRUPTED; + be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) From 7dfee17b13e5024c5c0ab1911859ded4182de3e5 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 5 Jun 2023 14:48:15 +1000 Subject: [PATCH 430/934] xfs: validate block number being freed before adding to xefi Bad things happen in defered extent freeing operations if it is passed a bad block number in the xefi. This can come from a bogus agno/agbno pair from deferred agfl freeing, or just a bad fsbno being passed to __xfs_free_extent_later(). Either way, it's very difficult to diagnose where a null perag oops in EFI creation is coming from when the operation that queued the xefi has already been completed and there's no longer any trace of it around.... Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_ag.c | 5 ++++- fs/xfs/libxfs/xfs_alloc.c | 16 +++++++++++++--- fs/xfs/libxfs/xfs_alloc.h | 6 +++--- fs/xfs/libxfs/xfs_bmap.c | 10 ++++++++-- fs/xfs/libxfs/xfs_bmap_btree.c | 7 +++++-- fs/xfs/libxfs/xfs_ialloc.c | 24 ++++++++++++++++-------- fs/xfs/libxfs/xfs_refcount.c | 13 ++++++++++--- fs/xfs/xfs_reflink.c | 4 +++- 8 files changed, 62 insertions(+), 23 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 9b373a0c7aaf..ee84835ebc66 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -984,7 +984,10 @@ xfs_ag_shrink_space( if (err2 != -ENOSPC) goto resv_err; - __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true); + err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, + true); + if (err2) + goto resv_err; /* * Roll the transaction before trying to re-init the per-ag diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 643d17877832..c20fe99405d8 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2431,7 +2431,7 @@ xfs_agfl_reset( * the real allocation can proceed. Deferring the free disconnects freeing up * the AGFL slot from freeing the block. */ -STATIC void +static int xfs_defer_agfl_block( struct xfs_trans *tp, xfs_agnumber_t agno, @@ -2450,17 +2450,21 @@ xfs_defer_agfl_block( xefi->xefi_blockcount = 1; xefi->xefi_owner = oinfo->oi_owner; + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock))) + return -EFSCORRUPTED; + trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); xfs_extent_free_get_group(mp, xefi); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); + return 0; } /* * Add the extent to the list of extents to be free at transaction end. * The list is maintained sorted (by block number). */ -void +int __xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, @@ -2487,6 +2491,9 @@ __xfs_free_extent_later( #endif ASSERT(xfs_extfree_item_cache != NULL); + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) + return -EFSCORRUPTED; + xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); xefi->xefi_startblock = bno; @@ -2510,6 +2517,7 @@ __xfs_free_extent_later( xfs_extent_free_get_group(mp, xefi); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list); + return 0; } #ifdef DEBUG @@ -2670,7 +2678,9 @@ xfs_alloc_fix_freelist( goto out_agbp_relse; /* defer agfl frees */ - xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); + error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); + if (error) + goto out_agbp_relse; } targs.tp = tp; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 5dbb25546d0b..85ac470be0da 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -230,7 +230,7 @@ xfs_buf_to_agfl_bno( return bp->b_addr; } -void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, +int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, bool skip_discard); @@ -254,14 +254,14 @@ void xfs_extent_free_get_group(struct xfs_mount *mp, #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ -static inline void +static inline int xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo) { - __xfs_free_extent_later(tp, bno, len, oinfo, false); + return __xfs_free_extent_later(tp, bno, len, oinfo, false); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index cd8870a16fd1..fef35696adb7 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -572,8 +572,12 @@ xfs_bmap_btree_to_extents( cblock = XFS_BUF_TO_BLOCK(cbp); if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); + if (error) + return error; + ip->i_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); @@ -5230,10 +5234,12 @@ xfs_bmap_del_extent_real( if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { xfs_refcount_decrease_extent(tp, del); } else { - __xfs_free_extent_later(tp, del->br_startblock, + error = __xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, (bflags & XFS_BMAPI_NODISCARD) || del->br_state == XFS_EXT_UNWRITTEN); + if (error) + goto done; } } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 1b40e5f8b1ec..36564ae3084f 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -268,11 +268,14 @@ xfs_bmbt_free_block( struct xfs_trans *tp = cur->bc_tp; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); struct xfs_owner_info oinfo; + int error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); - xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); - ip->i_nblocks--; + error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); + if (error) + return error; + ip->i_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); return 0; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index a16d5de16933..34600f94c2f4 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1834,7 +1834,7 @@ retry: * might be sparse and only free the regions that are allocated as part of the * chunk. */ -STATIC void +static int xfs_difree_inode_chunk( struct xfs_trans *tp, xfs_agnumber_t agno, @@ -1851,10 +1851,10 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ - xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), - M_IGEO(mp)->ialloc_blks, - &XFS_RMAP_OINFO_INODES); - return; + return xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, agno, sagbno), + M_IGEO(mp)->ialloc_blks, + &XFS_RMAP_OINFO_INODES); } /* holemask is only 16-bits (fits in an unsigned long) */ @@ -1871,6 +1871,8 @@ xfs_difree_inode_chunk( XFS_INOBT_HOLEMASK_BITS); nextbit = startidx + 1; while (startidx < XFS_INOBT_HOLEMASK_BITS) { + int error; + nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS, nextbit); /* @@ -1896,8 +1898,11 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); - xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno), - contigblk, &XFS_RMAP_OINFO_INODES); + error = xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, agno, agbno), + contigblk, &XFS_RMAP_OINFO_INODES); + if (error) + return error; /* reset range to current bit and carry on... */ startidx = endidx = nextbit; @@ -1905,6 +1910,7 @@ xfs_difree_inode_chunk( next: nextbit++; } + return 0; } STATIC int @@ -2003,7 +2009,9 @@ xfs_difree_inobt( goto error0; } - xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); + error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); + if (error) + goto error0; } else { xic->deleted = false; diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index c1c65774dcc2..b6e21433925c 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1151,8 +1151,10 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, tmp.rc_startblock); - xfs_free_extent_later(cur->bc_tp, fsbno, + error = xfs_free_extent_later(cur->bc_tp, fsbno, tmp.rc_blockcount, NULL); + if (error) + goto out_error; } (*agbno) += tmp.rc_blockcount; @@ -1210,8 +1212,10 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, ext.rc_startblock); - xfs_free_extent_later(cur->bc_tp, fsbno, + error = xfs_free_extent_later(cur->bc_tp, fsbno, ext.rc_blockcount, NULL); + if (error) + goto out_error; } skip: @@ -1976,7 +1980,10 @@ xfs_refcount_recover_cow_leftovers( rr->rr_rrec.rc_blockcount); /* Free the block. */ - xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); + error = xfs_free_extent_later(tp, fsb, + rr->rr_rrec.rc_blockcount, NULL); + if (error) + goto out_trans; error = xfs_trans_commit(tp); if (error) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index f5dc46ce9803..abcc559f3c64 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -616,8 +616,10 @@ xfs_reflink_cancel_cow_blocks( xfs_refcount_free_cow_extent(*tpp, del.br_startblock, del.br_blockcount); - xfs_free_extent_later(*tpp, del.br_startblock, + error = xfs_free_extent_later(*tpp, del.br_startblock, del.br_blockcount, NULL); + if (error) + break; /* Roll the transaction */ error = xfs_defer_finish(tpp); From d4d12c02bf5f768f1b423c7ae2909c5afdfe0d5f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 5 Jun 2023 14:48:15 +1000 Subject: [PATCH 431/934] xfs: collect errors from inodegc for unlinked inode recovery Unlinked list recovery requires errors removing the inode the from the unlinked list get fed back to the main recovery loop. Now that we offload the unlinking to the inodegc work, we don't get errors being fed back when we trip over a corruption that prevents the inode from being removed from the unlinked list. This means we never clear the corrupt unlinked list bucket, resulting in runtime operations eventually tripping over it and shutting down. Fix this by collecting inodegc worker errors and feed them back to the flush caller. This is largely best effort - the only context that really cares is log recovery, and it only flushes a single inode at a time so we don't need complex synchronised handling. Essentially the inodegc workers will capture the first error that occurs and the next flush will gather them and clear them. The flush itself will only report the first gathered error. In the cases where callers can return errors, propagate the collected inodegc flush error up the error handling chain. In the case of inode unlinked list recovery, there are several superfluous calls to flush queued unlinked inodes - xlog_recover_iunlink_bucket() guarantees that it has flushed the inodegc and collected errors before it returns. Hence nothing in the calling path needs to run a flush, even when an error is returned. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_icache.c | 46 ++++++++++++++++++++++++++++++++-------- fs/xfs/xfs_icache.h | 4 ++-- fs/xfs/xfs_inode.c | 20 ++++++----------- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_log_recover.c | 19 ++++++++--------- fs/xfs/xfs_mount.h | 1 + fs/xfs/xfs_super.c | 1 + fs/xfs/xfs_trans.c | 4 +++- 8 files changed, 60 insertions(+), 37 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 0f60e301eb1f..453890942d9f 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -454,6 +454,27 @@ xfs_inodegc_queue_all( return ret; } +/* Wait for all queued work and collect errors */ +static int +xfs_inodegc_wait_all( + struct xfs_mount *mp) +{ + int cpu; + int error = 0; + + flush_workqueue(mp->m_inodegc_wq); + for_each_online_cpu(cpu) { + struct xfs_inodegc *gc; + + gc = per_cpu_ptr(mp->m_inodegc, cpu); + if (gc->error && !error) + error = gc->error; + gc->error = 0; + } + + return error; +} + /* * Check the validity of the inode we just found it the cache */ @@ -1491,15 +1512,14 @@ xfs_blockgc_free_space( if (error) return error; - xfs_inodegc_flush(mp); - return 0; + return xfs_inodegc_flush(mp); } /* * Reclaim all the free space that we can by scheduling the background blockgc * and inodegc workers immediately and waiting for them all to clear. */ -void +int xfs_blockgc_flush_all( struct xfs_mount *mp) { @@ -1520,7 +1540,7 @@ xfs_blockgc_flush_all( for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG) flush_delayed_work(&pag->pag_blockgc_work); - xfs_inodegc_flush(mp); + return xfs_inodegc_flush(mp); } /* @@ -1842,13 +1862,17 @@ xfs_inodegc_set_reclaimable( * This is the last chance to make changes to an otherwise unreferenced file * before incore reclamation happens. */ -static void +static int xfs_inodegc_inactivate( struct xfs_inode *ip) { + int error; + trace_xfs_inode_inactivating(ip); - xfs_inactive(ip); + error = xfs_inactive(ip); xfs_inodegc_set_reclaimable(ip); + return error; + } void @@ -1880,8 +1904,12 @@ xfs_inodegc_worker( WRITE_ONCE(gc->shrinker_hits, 0); llist_for_each_entry_safe(ip, n, node, i_gclist) { + int error; + xfs_iflags_set(ip, XFS_INACTIVATING); - xfs_inodegc_inactivate(ip); + error = xfs_inodegc_inactivate(ip); + if (error && !gc->error) + gc->error = error; } memalloc_nofs_restore(nofs_flag); @@ -1905,13 +1933,13 @@ xfs_inodegc_push( * Force all currently queued inode inactivation work to run immediately and * wait for the work to finish. */ -void +int xfs_inodegc_flush( struct xfs_mount *mp) { xfs_inodegc_push(mp); trace_xfs_inodegc_flush(mp, __return_address); - flush_workqueue(mp->m_inodegc_wq); + return xfs_inodegc_wait_all(mp); } /* diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 87910191a9dd..1dcdcb23796e 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -62,7 +62,7 @@ int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp, unsigned int iwalk_flags); int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags); int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm); -void xfs_blockgc_flush_all(struct xfs_mount *mp); +int xfs_blockgc_flush_all(struct xfs_mount *mp); void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); @@ -80,7 +80,7 @@ void xfs_blockgc_start(struct xfs_mount *mp); void xfs_inodegc_worker(struct work_struct *work); void xfs_inodegc_push(struct xfs_mount *mp); -void xfs_inodegc_flush(struct xfs_mount *mp); +int xfs_inodegc_flush(struct xfs_mount *mp); void xfs_inodegc_stop(struct xfs_mount *mp); void xfs_inodegc_start(struct xfs_mount *mp); void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 5808abab786c..9e62cc500140 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1620,16 +1620,7 @@ xfs_inactive_ifree( */ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); - /* - * Just ignore errors at this point. There is nothing we can do except - * to try to keep going. Make sure it's not a silent error. - */ - error = xfs_trans_commit(tp); - if (error) - xfs_notice(mp, "%s: xfs_trans_commit returned error %d", - __func__, error); - - return 0; + return xfs_trans_commit(tp); } /* @@ -1693,12 +1684,12 @@ xfs_inode_needs_inactive( * now be truncated. Also, we clear all of the read-ahead state * kept for the inode here since the file is now closed. */ -void +int xfs_inactive( xfs_inode_t *ip) { struct xfs_mount *mp; - int error; + int error = 0; int truncate = 0; /* @@ -1736,7 +1727,7 @@ xfs_inactive( * reference to the inode at this point anyways. */ if (xfs_can_free_eofblocks(ip, true)) - xfs_free_eofblocks(ip); + error = xfs_free_eofblocks(ip); goto out; } @@ -1773,7 +1764,7 @@ xfs_inactive( /* * Free the inode. */ - xfs_inactive_ifree(ip); + error = xfs_inactive_ifree(ip); out: /* @@ -1781,6 +1772,7 @@ out: * the attached dquots. */ xfs_qm_dqdetach(ip); + return error; } /* diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 69d21e42c10a..7547caf2f2ab 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -470,7 +470,7 @@ enum layout_break_reason { (xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID)) int xfs_release(struct xfs_inode *ip); -void xfs_inactive(struct xfs_inode *ip); +int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct mnt_idmap *idmap, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 322eb2ee6c55..82c81d20459d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2711,7 +2711,9 @@ xlog_recover_iunlink_bucket( * just to flush the inodegc queue and wait for it to * complete. */ - xfs_inodegc_flush(mp); + error = xfs_inodegc_flush(mp); + if (error) + break; } prev_agino = agino; @@ -2719,10 +2721,15 @@ xlog_recover_iunlink_bucket( } if (prev_ip) { + int error2; + ip->i_prev_unlinked = prev_agino; xfs_irele(prev_ip); + + error2 = xfs_inodegc_flush(mp); + if (error2 && !error) + return error2; } - xfs_inodegc_flush(mp); return error; } @@ -2789,7 +2796,6 @@ xlog_recover_iunlink_ag( * bucket and remaining inodes on it unreferenced and * unfreeable. */ - xfs_inodegc_flush(pag->pag_mount); xlog_recover_clear_agi_bucket(pag, bucket); } } @@ -2806,13 +2812,6 @@ xlog_recover_process_iunlinks( for_each_perag(log->l_mp, agno, pag) xlog_recover_iunlink_ag(pag); - - /* - * Flush the pending unlinked inodes to ensure that the inactivations - * are fully completed on disk and the incore inodes can be reclaimed - * before we signal that recovery is complete. - */ - xfs_inodegc_flush(log->l_mp); } STATIC void diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index aaaf5ec13492..6c09f89534d3 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -62,6 +62,7 @@ struct xfs_error_cfg { struct xfs_inodegc { struct llist_head list; struct delayed_work work; + int error; /* approximate count of inodes in the list */ unsigned int items; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 7e706255f165..4120bd1cba90 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1100,6 +1100,7 @@ xfs_inodegc_init_percpu( #endif init_llist_head(&gc->list); gc->items = 0; + gc->error = 0; INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); } return 0; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index f81fbf081b01..8c0bfc9a33b1 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -290,7 +290,9 @@ retry: * Do not perform a synchronous scan because callers can hold * other locks. */ - xfs_blockgc_flush_all(mp); + error = xfs_blockgc_flush_all(mp); + if (error) + return error; want_retry = false; goto retry; } From 2a84aea80e925ecba6349090559754f8e8eb68ef Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 26 May 2023 10:19:46 +0200 Subject: [PATCH 432/934] can: j1939: j1939_sk_send_loop_abort(): improved error queue handling in J1939 Socket This patch addresses an issue within the j1939_sk_send_loop_abort() function in the j1939/socket.c file, specifically in the context of Transport Protocol (TP) sessions. Without this patch, when a TP session is initiated and a Clear To Send (CTS) frame is received from the remote side requesting one data packet, the kernel dispatches the first Data Transport (DT) frame and then waits for the next CTS. If the remote side doesn't respond with another CTS, the kernel aborts due to a timeout. This leads to the user-space receiving an EPOLLERR on the socket, and the socket becomes active. However, when trying to read the error queue from the socket with sock.recvmsg(, , socket.MSG_ERRQUEUE), it returns -EAGAIN, given that the socket is non-blocking. This situation results in an infinite loop: the user-space repeatedly calls epoll(), epoll() returns the socket file descriptor with EPOLLERR, but the socket then blocks on the recv() of ERRQUEUE. This patch introduces an additional check for the J1939_SOCK_ERRQUEUE flag within the j1939_sk_send_loop_abort() function. If the flag is set, it indicates that the application has subscribed to receive error queue messages. In such cases, the kernel can communicate the current transfer state via the error queue. This allows for the function to return early, preventing the unnecessary setting of the socket into an error state, and breaking the infinite loop. It is crucial to note that a socket error is only needed if the application isn't using the error queue, as, without it, the application wouldn't be aware of transfer issues. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Reported-by: David Jander Tested-by: David Jander Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20230526081946.715190-1-o.rempel@pengutronix.de Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 1790469b2580..35970c25496a 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -1088,6 +1088,11 @@ void j1939_sk_errqueue(struct j1939_session *session, void j1939_sk_send_loop_abort(struct sock *sk, int err) { + struct j1939_sock *jsk = j1939_sk(sk); + + if (jsk->state & J1939_SOCK_ERRQUEUE) + return; + sk->sk_err = err; sk_error_report(sk); From cd9c790de2088b0d797dc4d244b4f174f9962554 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Fri, 26 May 2023 20:19:09 +0300 Subject: [PATCH 433/934] can: j1939: change j1939_netdev_lock type to mutex It turns out access to j1939_can_rx_register() needs to be serialized, otherwise j1939_priv can be corrupted when parallel threads call j1939_netdev_start() and j1939_can_rx_register() fails. This issue is thoroughly covered in other commit which serializes access to j1939_can_rx_register(). Change j1939_netdev_lock type to mutex so that we do not need to remove GFP_KERNEL from can_rx_register(). j1939_netdev_lock seems to be used in normal contexts where mutex usage is not prohibited. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Suggested-by: Alexey Khoroshilov Signed-off-by: Fedor Pchelkin Tested-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://lore.kernel.org/r/20230526171910.227615-2-pchelkin@ispras.ru Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/j1939/main.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 821d4ff303b3..6ed79afe19a5 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -126,7 +126,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data) #define J1939_CAN_ID CAN_EFF_FLAG #define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG) -static DEFINE_SPINLOCK(j1939_netdev_lock); +static DEFINE_MUTEX(j1939_netdev_lock); static struct j1939_priv *j1939_priv_create(struct net_device *ndev) { @@ -220,7 +220,7 @@ static void __j1939_rx_release(struct kref *kref) j1939_can_rx_unregister(priv); j1939_ecu_unmap_all(priv); j1939_priv_set(priv->ndev, NULL); - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); } /* get pointer to priv without increasing ref counter */ @@ -248,9 +248,9 @@ static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev) { struct j1939_priv *priv; - spin_lock(&j1939_netdev_lock); + mutex_lock(&j1939_netdev_lock); priv = j1939_priv_get_by_ndev_locked(ndev); - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); return priv; } @@ -260,14 +260,14 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) struct j1939_priv *priv, *priv_new; int ret; - spin_lock(&j1939_netdev_lock); + mutex_lock(&j1939_netdev_lock); priv = j1939_priv_get_by_ndev_locked(ndev); if (priv) { kref_get(&priv->rx_kref); - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); return priv; } - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); priv = j1939_priv_create(ndev); if (!priv) @@ -277,20 +277,20 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) spin_lock_init(&priv->j1939_socks_lock); INIT_LIST_HEAD(&priv->j1939_socks); - spin_lock(&j1939_netdev_lock); + mutex_lock(&j1939_netdev_lock); priv_new = j1939_priv_get_by_ndev_locked(ndev); if (priv_new) { /* Someone was faster than us, use their priv and roll * back our's. */ kref_get(&priv_new->rx_kref); - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); dev_put(ndev); kfree(priv); return priv_new; } j1939_priv_set(ndev, priv); - spin_unlock(&j1939_netdev_lock); + mutex_unlock(&j1939_netdev_lock); ret = j1939_can_rx_register(priv); if (ret < 0) @@ -308,7 +308,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) void j1939_netdev_stop(struct j1939_priv *priv) { - kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock); + kref_put_mutex(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock); j1939_priv_put(priv); } From 9f16eb106aa5fce15904625661312623ec783ed3 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Fri, 26 May 2023 20:19:10 +0300 Subject: [PATCH 434/934] can: j1939: avoid possible use-after-free when j1939_can_rx_register fails Syzkaller reports the following failure: BUG: KASAN: use-after-free in kref_put include/linux/kref.h:64 [inline] BUG: KASAN: use-after-free in j1939_priv_put+0x25/0xa0 net/can/j1939/main.c:172 Write of size 4 at addr ffff888141c15058 by task swapper/3/0 CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.10.144-syzkaller #0 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x107/0x167 lib/dump_stack.c:118 print_address_description.constprop.0+0x1c/0x220 mm/kasan/report.c:385 __kasan_report mm/kasan/report.c:545 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:562 check_memory_region_inline mm/kasan/generic.c:186 [inline] check_memory_region+0x145/0x190 mm/kasan/generic.c:192 instrument_atomic_read_write include/linux/instrumented.h:101 [inline] atomic_fetch_sub_release include/asm-generic/atomic-instrumented.h:220 [inline] __refcount_sub_and_test include/linux/refcount.h:272 [inline] __refcount_dec_and_test include/linux/refcount.h:315 [inline] refcount_dec_and_test include/linux/refcount.h:333 [inline] kref_put include/linux/kref.h:64 [inline] j1939_priv_put+0x25/0xa0 net/can/j1939/main.c:172 j1939_sk_sock_destruct+0x44/0x90 net/can/j1939/socket.c:374 __sk_destruct+0x4e/0x820 net/core/sock.c:1784 rcu_do_batch kernel/rcu/tree.c:2485 [inline] rcu_core+0xb35/0x1a30 kernel/rcu/tree.c:2726 __do_softirq+0x289/0x9a3 kernel/softirq.c:298 asm_call_irq_on_stack+0x12/0x20 __run_on_irqstack arch/x86/include/asm/irq_stack.h:26 [inline] run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:77 [inline] do_softirq_own_stack+0xaa/0xe0 arch/x86/kernel/irq_64.c:77 invoke_softirq kernel/softirq.c:393 [inline] __irq_exit_rcu kernel/softirq.c:423 [inline] irq_exit_rcu+0x136/0x200 kernel/softirq.c:435 sysvec_apic_timer_interrupt+0x4d/0x100 arch/x86/kernel/apic/apic.c:1095 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:635 Allocated by task 1141: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc.constprop.0+0xc9/0xd0 mm/kasan/common.c:461 kmalloc include/linux/slab.h:552 [inline] kzalloc include/linux/slab.h:664 [inline] j1939_priv_create net/can/j1939/main.c:131 [inline] j1939_netdev_start+0x111/0x860 net/can/j1939/main.c:268 j1939_sk_bind+0x8ea/0xd30 net/can/j1939/socket.c:485 __sys_bind+0x1f2/0x260 net/socket.c:1645 __do_sys_bind net/socket.c:1656 [inline] __se_sys_bind net/socket.c:1654 [inline] __x64_sys_bind+0x6f/0xb0 net/socket.c:1654 do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x61/0xc6 Freed by task 1141: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track+0x1c/0x30 mm/kasan/common.c:56 kasan_set_free_info+0x1b/0x30 mm/kasan/generic.c:355 __kasan_slab_free+0x112/0x170 mm/kasan/common.c:422 slab_free_hook mm/slub.c:1542 [inline] slab_free_freelist_hook+0xad/0x190 mm/slub.c:1576 slab_free mm/slub.c:3149 [inline] kfree+0xd9/0x3b0 mm/slub.c:4125 j1939_netdev_start+0x5ee/0x860 net/can/j1939/main.c:300 j1939_sk_bind+0x8ea/0xd30 net/can/j1939/socket.c:485 __sys_bind+0x1f2/0x260 net/socket.c:1645 __do_sys_bind net/socket.c:1656 [inline] __se_sys_bind net/socket.c:1654 [inline] __x64_sys_bind+0x6f/0xb0 net/socket.c:1654 do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x61/0xc6 It can be caused by this scenario: CPU0 CPU1 j1939_sk_bind(socket0, ndev0, ...) j1939_netdev_start() j1939_sk_bind(socket1, ndev0, ...) j1939_netdev_start() mutex_lock(&j1939_netdev_lock) j1939_priv_set(ndev0, priv) mutex_unlock(&j1939_netdev_lock) if (priv_new) kref_get(&priv_new->rx_kref) return priv_new; /* inside j1939_sk_bind() */ jsk->priv = priv j1939_can_rx_register(priv) // fails j1939_priv_set(ndev, NULL) kfree(priv) j1939_sk_sock_destruct() j1939_priv_put() // <- uaf To avoid this, call j1939_can_rx_register() under j1939_netdev_lock so that a concurrent thread cannot process j1939_priv before j1939_can_rx_register() returns. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Fedor Pchelkin Tested-by: Oleksij Rempel Acked-by: Oleksij Rempel Link: https://lore.kernel.org/r/20230526171910.227615-3-pchelkin@ispras.ru Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/j1939/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 6ed79afe19a5..ecff1c947d68 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -290,16 +290,18 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) return priv_new; } j1939_priv_set(ndev, priv); - mutex_unlock(&j1939_netdev_lock); ret = j1939_can_rx_register(priv); if (ret < 0) goto out_priv_put; + mutex_unlock(&j1939_netdev_lock); return priv; out_priv_put: j1939_priv_set(ndev, NULL); + mutex_unlock(&j1939_netdev_lock); + dev_put(ndev); kfree(priv); From bd574889c25d2ed7e3d61d784c59a539a95f0167 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Wed, 24 May 2023 21:15:29 +0200 Subject: [PATCH 435/934] selftests: alsa: pcm-test: Fix compiler warnings about the format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC 11.3.0 issues warnings in this module about wrong sizes of format specifiers: pcm-test.c: In function ‘test_pcm_time’: pcm-test.c:384:68: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 5 \ has type ‘unsigned int’ [-Wformat=] 384 | snprintf(msg, sizeof(msg), "rate mismatch %ld != %ld", rate, rrate); pcm-test.c:455:53: warning: format ‘%d’ expects argument of type ‘int’, but argument 4 has \ type ‘long int’ [-Wformat=] 455 | "expected %d, wrote %li", rate, frames); pcm-test.c:462:53: warning: format ‘%d’ expects argument of type ‘int’, but argument 4 has \ type ‘long int’ [-Wformat=] 462 | "expected %d, wrote %li", rate, frames); pcm-test.c:467:53: warning: format ‘%d’ expects argument of type ‘int’, but argument 4 has \ type ‘long int’ [-Wformat=] 467 | "expected %d, wrote %li", rate, frames); Simple fix according to compiler's suggestion removed the warnings. Signed-off-by: Mirsad Goran Todorovac Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20230524191528.13203-1-mirsad.todorovac@alu.unizg.hr Signed-off-by: Takashi Iwai --- tools/testing/selftests/alsa/pcm-test.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index 3e390fe67eb9..b7eef32addb4 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -381,7 +381,7 @@ __format: goto __close; } if (rrate != rate) { - snprintf(msg, sizeof(msg), "rate mismatch %ld != %ld", rate, rrate); + snprintf(msg, sizeof(msg), "rate mismatch %ld != %d", rate, rrate); goto __close; } rperiod_size = period_size; @@ -447,24 +447,24 @@ __format: frames = snd_pcm_writei(handle, samples, rate); if (frames < 0) { snprintf(msg, sizeof(msg), - "Write failed: expected %d, wrote %li", rate, frames); + "Write failed: expected %ld, wrote %li", rate, frames); goto __close; } if (frames < rate) { snprintf(msg, sizeof(msg), - "expected %d, wrote %li", rate, frames); + "expected %ld, wrote %li", rate, frames); goto __close; } } else { frames = snd_pcm_readi(handle, samples, rate); if (frames < 0) { snprintf(msg, sizeof(msg), - "expected %d, wrote %li", rate, frames); + "expected %ld, wrote %li", rate, frames); goto __close; } if (frames < rate) { snprintf(msg, sizeof(msg), - "expected %d, wrote %li", rate, frames); + "expected %ld, wrote %li", rate, frames); goto __close; } } From 527c356b51f3ddee02c9ed5277538f85e30a2cdc Mon Sep 17 00:00:00 2001 From: Ai Chao Date: Fri, 26 May 2023 17:47:04 +0800 Subject: [PATCH 436/934] ALSA: hda/realtek: Add a quirk for HP Slim Desktop S01 Add a quirk for HP Slim Desktop S01 to fixup headset MIC no presence. Signed-off-by: Ai Chao Cc: Link: https://lore.kernel.org/r/20230526094704.14597-1-aichao@kylinos.cn Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7b5f194513c7..079a6d2835eb 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11694,6 +11694,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB), SND_PCI_QUIRK(0x103c, 0x872b, "HP", ALC897_FIXUP_HP_HSMIC_VERB), SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2), + SND_PCI_QUIRK(0x103c, 0x8768, "HP Slim Desktop S01", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2), SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE), From 7ca4c8d4d3f41c2cd9b4cf22bb829bf03dac0956 Mon Sep 17 00:00:00 2001 From: RenHai Date: Fri, 2 Jun 2023 08:36:04 +0800 Subject: [PATCH 437/934] ALSA: hda/realtek: Add Lenovo P3 Tower platform Headset microphone on this platform does not work without ALC897_FIXUP_HEADSET_MIC_PIN fixup. Signed-off-by: RenHai Cc: Link: https://lore.kernel.org/r/20230602003604.975892-1-kean0048@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 079a6d2835eb..9c346fa21c75 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11716,6 +11716,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x1057, "Lenovo P360", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x1064, "Lenovo P3 Tower", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32ca, "Lenovo ThinkCentre M80", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN), From 1a93f10c5b12bd766a537b24a50fca5373467303 Mon Sep 17 00:00:00 2001 From: "Sayed, Karimuddin" Date: Fri, 2 Jun 2023 14:38:12 -0500 Subject: [PATCH 438/934] ALSA: hda/realtek: Add "Intel Reference board" and "NUC 13" SSID in the ALC256 Add "Intel Reference boad" and "Intel NUC 13" SSID in the alc256. Enable jack headset volume buttons Reviewed-by: Kai Vehmanen Signed-off-by: Sayed, Karimuddin Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230602193812.66768-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9c346fa21c75..57a2dd07efaf 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9588,6 +9588,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10ec, 0x124c, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1252, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC225_FIXUP_HEADSET_JACK), SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP), @@ -9807,6 +9808,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), + SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC225_FIXUP_HEADSET_JACK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 From 63b679a962acc49a9e412d5c511da8e25bda598f Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 2 Jun 2023 03:21:00 +0200 Subject: [PATCH 439/934] MAINTAINERS: Add myself as I2C host drivers maintainer I will help Wolfram out with the i2c controllers patches. Signed-off-by: Andi Shyti Acked-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0dab9737ec16..c2e40cc1d6be 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9687,8 +9687,9 @@ F: include/uapi/linux/i2c-*.h F: include/uapi/linux/i2c.h I2C SUBSYSTEM HOST DRIVERS +M: Andi Shyti L: linux-i2c@vger.kernel.org -S: Odd Fixes +S: Maintained W: https://i2c.wiki.kernel.org/ Q: https://patchwork.ozlabs.org/project/linux-i2c/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git From 8110bf24c82d08aa2ed1d09a2b996d06c41f2e67 Mon Sep 17 00:00:00 2001 From: Christian Heusel Date: Mon, 8 May 2023 16:18:01 +0200 Subject: [PATCH 440/934] i2c: img-scb: Fix spelling mistake "innacurate" -> "inaccurate" There is a spelling mistake in a comment. Fix it. Signed-off-by: Christian Heusel Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-img-scb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c index 8e987945ed45..39c479f96eb5 100644 --- a/drivers/i2c/busses/i2c-img-scb.c +++ b/drivers/i2c/busses/i2c-img-scb.c @@ -257,7 +257,7 @@ #define IMG_I2C_TIMEOUT (msecs_to_jiffies(1000)) /* - * Worst incs are 1 (innacurate) and 16*256 (irregular). + * Worst incs are 1 (inaccurate) and 16*256 (irregular). * So a sensible inc is the logarithmic mean: 64 (2^6), which is * in the middle of the valid range (0-127). */ From 7ebfd881abe9e0ea9557b29dab6aa28d294fabb4 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 10 May 2023 14:32:17 +0200 Subject: [PATCH 441/934] i2c: mchp-pci1xxxx: Avoid cast to incompatible function type Rather than casting pci1xxxx_i2c_shutdown to an incompatible function type, update the type to match that expected by __devm_add_action. Reported by clang-16 with W-1: .../i2c-mchp-pci1xxxx.c:1159:29: error: cast from 'void (*)(struct pci1xxxx_i2c *)' to 'void (*)(void *)' converts to incompatible function type [-Werror,-Wcast-function-type-strict] ret = devm_add_action(dev, (void (*)(void *))pci1xxxx_i2c_shutdown, i2c); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/device.h:251:29: note: expanded from macro 'devm_add_action' __devm_add_action(release, action, data, #action) ^~~~~~ No functional change intended. Compile tested only. Signed-off-by: Simon Horman Reviewed-by: Horatiu Vultur Reviewed-by: Andi Shyti Reviewed-by: Tharun Kumar P Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mchp-pci1xxxx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c index b21ffd6df927..5ef136c3ecb1 100644 --- a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c +++ b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c @@ -1118,8 +1118,10 @@ static int pci1xxxx_i2c_resume(struct device *dev) static DEFINE_SIMPLE_DEV_PM_OPS(pci1xxxx_i2c_pm_ops, pci1xxxx_i2c_suspend, pci1xxxx_i2c_resume); -static void pci1xxxx_i2c_shutdown(struct pci1xxxx_i2c *i2c) +static void pci1xxxx_i2c_shutdown(void *data) { + struct pci1xxxx_i2c *i2c = data; + pci1xxxx_i2c_config_padctrl(i2c, false); pci1xxxx_i2c_configure_core_reg(i2c, false); } @@ -1156,7 +1158,7 @@ static int pci1xxxx_i2c_probe_pci(struct pci_dev *pdev, init_completion(&i2c->i2c_xfer_done); pci1xxxx_i2c_init(i2c); - ret = devm_add_action(dev, (void (*)(void *))pci1xxxx_i2c_shutdown, i2c); + ret = devm_add_action(dev, pci1xxxx_i2c_shutdown, i2c); if (ret) return ret; From cd2b8113c2e8b9f5a88a942e1eaca61eba401b85 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Jun 2023 12:37:47 +0000 Subject: [PATCH 442/934] net/sched: fq_pie: ensure reasonable TCA_FQ_PIE_QUANTUM values We got multiple syzbot reports, all duplicates of the following [1] syzbot managed to install fq_pie with a zero TCA_FQ_PIE_QUANTUM, thus triggering infinite loops. Use limits similar to sch_fq, with commits 3725a269815b ("pkt_sched: fq: avoid hang when quantum 0") and d9e15a273306 ("pkt_sched: fq: do not accept silly TCA_FQ_QUANTUM") [1] watchdog: BUG: soft lockup - CPU#0 stuck for 26s! [swapper/0:0] Modules linked in: irq event stamp: 172817 hardirqs last enabled at (172816): [] __el1_irq arch/arm64/kernel/entry-common.c:476 [inline] hardirqs last enabled at (172816): [] el1_interrupt+0x58/0x68 arch/arm64/kernel/entry-common.c:486 hardirqs last disabled at (172817): [] __el1_irq arch/arm64/kernel/entry-common.c:468 [inline] hardirqs last disabled at (172817): [] el1_interrupt+0x24/0x68 arch/arm64/kernel/entry-common.c:486 softirqs last enabled at (167634): [] softirq_handle_end kernel/softirq.c:414 [inline] softirqs last enabled at (167634): [] __do_softirq+0xac0/0xd54 kernel/softirq.c:600 softirqs last disabled at (167701): [] ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:80 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.4.0-rc3-syzkaller-geb0f1697d729 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/28/2023 pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : fq_pie_qdisc_dequeue+0x10c/0x8ac net/sched/sch_fq_pie.c:246 lr : fq_pie_qdisc_dequeue+0xe4/0x8ac net/sched/sch_fq_pie.c:240 sp : ffff800008007210 x29: ffff800008007280 x28: ffff0000c86f7890 x27: ffff0000cb20c2e8 x26: ffff0000cb20c2f0 x25: dfff800000000000 x24: ffff0000cb20c2e0 x23: ffff0000c86f7880 x22: 0000000000000040 x21: 1fffe000190def10 x20: ffff0000cb20c2e0 x19: ffff0000cb20c2e0 x18: ffff800008006e60 x17: 0000000000000000 x16: ffff80000850af6c x15: 0000000000000302 x14: 0000000000000100 x13: 0000000000000000 x12: 0000000000000001 x11: 0000000000000302 x10: 0000000000000100 x9 : 0000000000000000 x8 : 0000000000000000 x7 : ffff80000841c468 x6 : 0000000000000000 x5 : 0000000000000001 x4 : 0000000000000001 x3 : 0000000000000000 x2 : ffff0000cb20c2e0 x1 : ffff0000cb20c2e0 x0 : 0000000000000001 Call trace: fq_pie_qdisc_dequeue+0x10c/0x8ac net/sched/sch_fq_pie.c:246 dequeue_skb net/sched/sch_generic.c:292 [inline] qdisc_restart net/sched/sch_generic.c:397 [inline] __qdisc_run+0x1fc/0x231c net/sched/sch_generic.c:415 __dev_xmit_skb net/core/dev.c:3868 [inline] __dev_queue_xmit+0xc80/0x3318 net/core/dev.c:4210 dev_queue_xmit include/linux/netdevice.h:3085 [inline] neigh_connected_output+0x2f8/0x38c net/core/neighbour.c:1581 neigh_output include/net/neighbour.h:544 [inline] ip6_finish_output2+0xd60/0x1a1c net/ipv6/ip6_output.c:134 __ip6_finish_output net/ipv6/ip6_output.c:195 [inline] ip6_finish_output+0x538/0x8c8 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:292 [inline] ip6_output+0x270/0x594 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:458 [inline] NF_HOOK include/linux/netfilter.h:303 [inline] ndisc_send_skb+0xc30/0x1790 net/ipv6/ndisc.c:508 ndisc_send_rs+0x47c/0x5d4 net/ipv6/ndisc.c:718 addrconf_rs_timer+0x300/0x58c net/ipv6/addrconf.c:3936 call_timer_fn+0x19c/0x8cc kernel/time/timer.c:1700 expire_timers kernel/time/timer.c:1751 [inline] __run_timers+0x55c/0x734 kernel/time/timer.c:2022 run_timer_softirq+0x7c/0x114 kernel/time/timer.c:2035 __do_softirq+0x2d0/0xd54 kernel/softirq.c:571 ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:80 call_on_irq_stack+0x24/0x4c arch/arm64/kernel/entry.S:882 do_softirq_own_stack+0x20/0x2c arch/arm64/kernel/irq.c:85 invoke_softirq kernel/softirq.c:452 [inline] __irq_exit_rcu+0x28c/0x534 kernel/softirq.c:650 irq_exit_rcu+0x14/0x84 kernel/softirq.c:662 __el1_irq arch/arm64/kernel/entry-common.c:472 [inline] el1_interrupt+0x38/0x68 arch/arm64/kernel/entry-common.c:486 el1h_64_irq_handler+0x18/0x24 arch/arm64/kernel/entry-common.c:491 el1h_64_irq+0x64/0x68 arch/arm64/kernel/entry.S:587 __daif_local_irq_enable arch/arm64/include/asm/irqflags.h:33 [inline] arch_local_irq_enable+0x8/0xc arch/arm64/include/asm/irqflags.h:55 cpuidle_idle_call kernel/sched/idle.c:170 [inline] do_idle+0x1f0/0x4e8 kernel/sched/idle.c:282 cpu_startup_entry+0x24/0x28 kernel/sched/idle.c:379 rest_init+0x2dc/0x2f4 init/main.c:735 start_kernel+0x0/0x55c init/main.c:834 start_kernel+0x3f0/0x55c init/main.c:1088 __primary_switched+0xb8/0xc0 arch/arm64/kernel/head.S:523 Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_fq_pie.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 6980796d435d..c699e5095607 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -201,6 +201,11 @@ out: return NET_XMIT_CN; } +static struct netlink_range_validation fq_pie_q_range = { + .min = 1, + .max = 1 << 20, +}; + static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { [TCA_FQ_PIE_LIMIT] = {.type = NLA_U32}, [TCA_FQ_PIE_FLOWS] = {.type = NLA_U32}, @@ -208,7 +213,8 @@ static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = { [TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32}, [TCA_FQ_PIE_ALPHA] = {.type = NLA_U32}, [TCA_FQ_PIE_BETA] = {.type = NLA_U32}, - [TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32}, + [TCA_FQ_PIE_QUANTUM] = + NLA_POLICY_FULL_RANGE(NLA_U32, &fq_pie_q_range), [TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32}, [TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32}, [TCA_FQ_PIE_ECN] = {.type = NLA_U32}, From 1acfc6e753ed978b36d722f54e57fe4d1e8a6ffa Mon Sep 17 00:00:00 2001 From: David Zheng Date: Wed, 24 May 2023 11:14:59 -0700 Subject: [PATCH 443/934] i2c: designware: fix idx_write_cnt in read loop With IC_INTR_RX_FULL slave interrupt handler reads data in a loop until RX FIFO is empty. When testing with the slave-eeprom, each transaction has 2 bytes for address/index and 1 byte for value, the address byte can be written as data byte due to dropping STOP condition. In the test below, the master continuously writes to the slave, first 2 bytes are index, 3rd byte is value and follow by a STOP condition. i2c_write: i2c-3 #0 a=04b f=0000 l=3 [00-D1-D1] i2c_write: i2c-3 #0 a=04b f=0000 l=3 [00-D2-D2] i2c_write: i2c-3 #0 a=04b f=0000 l=3 [00-D3-D3] Upon receiving STOP condition slave eeprom would reset `idx_write_cnt` so next 2 bytes can be treated as buffer index for upcoming transaction. Supposedly the slave eeprom buffer would be written as EEPROM[0x00D1] = 0xD1 EEPROM[0x00D2] = 0xD2 EEPROM[0x00D3] = 0xD3 When CPU load is high the slave irq handler may not read fast enough, the interrupt status can be seen as 0x204 with both DW_IC_INTR_STOP_DET (0x200) and DW_IC_INTR_RX_FULL (0x4) bits. The slave device may see the transactions below. 0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x1594 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x1594 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x1594 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x1794 : INTR_STAT=0x204 0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x1790 : INTR_STAT=0x200 0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x1594 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x1594 : INTR_STAT=0x4 0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x1594 : INTR_STAT=0x4 After `D1` is received, read loop continues to read `00` which is the first bype of next index. Since STOP condition is ignored by the loop, eeprom buffer index increased to `D2` and `00` is written as value. So the slave eeprom buffer becomes EEPROM[0x00D1] = 0xD1 EEPROM[0x00D2] = 0x00 EEPROM[0x00D3] = 0xD3 The fix is to use `FIRST_DATA_BYTE` (bit 11) in `IC_DATA_CMD` to split the transactions. The first index byte in this case would have bit 11 set. Check this indication to inject I2C_SLAVE_WRITE_REQUESTED event which will reset `idx_write_cnt` in slave eeprom. Signed-off-by: David Zheng Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.h | 1 + drivers/i2c/busses/i2c-designware-slave.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index c5d87aae39c6..bf23bfb51aea 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -40,6 +40,7 @@ #define DW_IC_CON_BUS_CLEAR_CTRL BIT(11) #define DW_IC_DATA_CMD_DAT GENMASK(7, 0) +#define DW_IC_DATA_CMD_FIRST_DATA_BYTE BIT(11) /* * Registers offset diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index cec25054bb24..2e079cf20bb5 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -176,6 +176,10 @@ static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id) do { regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); + if (tmp & DW_IC_DATA_CMD_FIRST_DATA_BYTE) + i2c_slave_event(dev->slave, + I2C_SLAVE_WRITE_REQUESTED, + &val); val = tmp; i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED, &val); From 5578d0a79b6430fa1543640dd6f2d397d0886ce7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Sun, 21 May 2023 14:19:40 +0200 Subject: [PATCH 444/934] i2c: mv64xxx: Fix reading invalid status value in atomic mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There seems to be a bug within the mv64xxx I2C controller, wherein the status register may not necessarily contain valid value immediately after the IFLG flag is set in the control register. My theory is that the controller: - first sets the IFLG in control register - then updates the status register - then raises an interrupt This may sometime cause weird bugs when in atomic mode, since in this mode we do not wait for an interrupt, but instead we poll the control register for IFLG and read status register immediately after. I encountered -ENXIO from mv64xxx_i2c_fsm() due to this issue when using this driver in atomic mode. Note that I've only seen this issue on Armada 385, I don't know whether other SOCs with this controller are also affected. Also note that this fix has been in U-Boot for over 4 years [1] without anybody complaining, so it should not cause regressions. [1] https://source.denx.de/u-boot/u-boot/-/commit/d50e29662f78 Fixes: 544a8d75f3d6 ("i2c: mv64xxx: Add atomic_xfer method to driver") Signed-off-by: Marek Behún Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mv64xxx.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 047dfef7a657..878c076ebdc6 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -520,6 +520,17 @@ mv64xxx_i2c_intr(int irq, void *dev_id) while (readl(drv_data->reg_base + drv_data->reg_offsets.control) & MV64XXX_I2C_REG_CONTROL_IFLG) { + /* + * It seems that sometime the controller updates the status + * register only after it asserts IFLG in control register. + * This may result in weird bugs when in atomic mode. A delay + * of 100 ns before reading the status register solves this + * issue. This bug does not seem to appear when using + * interrupts. + */ + if (drv_data->atomic) + ndelay(100); + status = readl(drv_data->reg_base + drv_data->reg_offsets.status); mv64xxx_i2c_fsm(drv_data, status); mv64xxx_i2c_do_action(drv_data); From 9bc009734774549f8bb8d7e526ba10e70d751a7c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 2 Jun 2023 21:04:55 +0200 Subject: [PATCH 445/934] net: stmmac: dwmac-qcom-ethqos: fix a regression on EMAC < 3 We must not assign plat_dat->dwmac4_addrs unconditionally as for structures which don't set them, this will result in the core driver using zeroes everywhere and breaking the driver for older HW. On EMAC < 2 the address should remain NULL. Fixes: b68376191c69 ("net: stmmac: dwmac-qcom-ethqos: Add EMAC3 support") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Reviewed-by: Siddharth Vadapalli Reviewed-by: Vinod Koul Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 16a8c361283b..f07905f00f98 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -644,7 +644,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->fix_mac_speed = ethqos_fix_mac_speed; plat_dat->dump_debug_regs = rgmii_dump; plat_dat->has_gmac4 = 1; - plat_dat->dwmac4_addrs = &data->dwmac4_addrs; + if (ethqos->has_emac3) + plat_dat->dwmac4_addrs = &data->dwmac4_addrs; plat_dat->pmt = 1; plat_dat->tso_en = of_property_read_bool(np, "snps,tso"); if (of_device_is_compatible(np, "qcom,qcs404-ethqos")) From 40023959dbab3c6ad56fa7213770e63d197b69fb Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 26 May 2023 14:41:38 +0200 Subject: [PATCH 446/934] drm/i915/gt: Use the correct error value when kernel_context() fails kernel_context() returns an error pointer. Use pointer-error conversion functions to evaluate its return value, rather than checking for a '0' return. Fixes: eb5c10cbbc2f ("drm/i915: Remove I915_USER_PRIORITY_SHIFT") Reported-by: Dan Carpenter Signed-off-by: Andi Shyti Cc: Chris Wilson Cc: # v5.13+ Reviewed-by: Andrzej Hajda Acked-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20230526124138.2006110-1-andi.shyti@linux.intel.com (cherry picked from commit edad9ee94f17adc75d3b13ab51bbe3d615ce1e7e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/selftest_execlists.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 736b89a8ecf5..4202df5b8c12 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -1530,8 +1530,8 @@ static int live_busywait_preempt(void *arg) struct drm_i915_gem_object *obj; struct i915_vma *vma; enum intel_engine_id id; - int err = -ENOMEM; u32 *map; + int err; /* * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can @@ -1539,13 +1539,17 @@ static int live_busywait_preempt(void *arg) */ ctx_hi = kernel_context(gt->i915, NULL); - if (!ctx_hi) - return -ENOMEM; + if (IS_ERR(ctx_hi)) + return PTR_ERR(ctx_hi); + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(gt->i915, NULL); - if (!ctx_lo) + if (IS_ERR(ctx_lo)) { + err = PTR_ERR(ctx_lo); goto err_ctx_hi; + } + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); From cb2e701305f4ffe3a107c1d97f8588b4ed48ccb3 Mon Sep 17 00:00:00 2001 From: Chaitanya Kumar Borah Date: Mon, 29 May 2023 11:37:47 +0530 Subject: [PATCH 447/934] drm/i915/display: Set correct voltage level for 480MHz CDCLK According to Bspec, the voltage level for 480MHz is to be set as 1 instead of 2. BSpec: 49208 Fixes: 06f1b06dc5b7 ("drm/i915/display: Add 480 MHz CDCLK steps for RPL-U") v2: rebase Signed-off-by: Chaitanya Kumar Borah Reviewed-by: Mika Kahola Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20230529060747.3972259-1-chaitanya.kumar.borah@intel.com (cherry picked from commit 5a3c46b809d09f8ef59e2fbf2463b1c102aecbaa) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_cdclk.c | 30 +++++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 084a483f9776..2aaaba090cc0 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1453,6 +1453,18 @@ static u8 tgl_calc_voltage_level(int cdclk) return 0; } +static u8 rplu_calc_voltage_level(int cdclk) +{ + if (cdclk > 556800) + return 3; + else if (cdclk > 480000) + return 2; + else if (cdclk > 312000) + return 1; + else + return 0; +} + static void icl_readout_refclk(struct drm_i915_private *dev_priv, struct intel_cdclk_config *cdclk_config) { @@ -3242,6 +3254,13 @@ static const struct intel_cdclk_funcs mtl_cdclk_funcs = { .calc_voltage_level = tgl_calc_voltage_level, }; +static const struct intel_cdclk_funcs rplu_cdclk_funcs = { + .get_cdclk = bxt_get_cdclk, + .set_cdclk = bxt_set_cdclk, + .modeset_calc_cdclk = bxt_modeset_calc_cdclk, + .calc_voltage_level = rplu_calc_voltage_level, +}; + static const struct intel_cdclk_funcs tgl_cdclk_funcs = { .get_cdclk = bxt_get_cdclk, .set_cdclk = bxt_set_cdclk, @@ -3384,14 +3403,17 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) dev_priv->display.funcs.cdclk = &tgl_cdclk_funcs; dev_priv->display.cdclk.table = dg2_cdclk_table; } else if (IS_ALDERLAKE_P(dev_priv)) { - dev_priv->display.funcs.cdclk = &tgl_cdclk_funcs; /* Wa_22011320316:adl-p[a0] */ - if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) + if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0)) { dev_priv->display.cdclk.table = adlp_a_step_cdclk_table; - else if (IS_ADLP_RPLU(dev_priv)) + dev_priv->display.funcs.cdclk = &tgl_cdclk_funcs; + } else if (IS_ADLP_RPLU(dev_priv)) { dev_priv->display.cdclk.table = rplu_cdclk_table; - else + dev_priv->display.funcs.cdclk = &rplu_cdclk_funcs; + } else { dev_priv->display.cdclk.table = adlp_cdclk_table; + dev_priv->display.funcs.cdclk = &tgl_cdclk_funcs; + } } else if (IS_ROCKETLAKE(dev_priv)) { dev_priv->display.funcs.cdclk = &tgl_cdclk_funcs; dev_priv->display.cdclk.table = rkl_cdclk_table; From 2d6f2f79e06571d41eb1223abebe9097511c9544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Tue, 30 May 2023 13:16:49 +0300 Subject: [PATCH 448/934] drm/i915: Use 18 fast wake AUX sync len MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HW default for wake sync pulses is 18. 10 precharge and 8 preamble. There is no reason to change this especially as it is causing problems with certain eDP panels. v3: Change "Fixes:" commit v2: Remove "fast wake" repeat from subject Signed-off-by: Jouni Högander Fixes: e1c71f8f9180 ("drm/i915: Fix fast wake AUX sync len") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8475 Reviewed-by: Luca Coelho Link: https://patchwork.freedesktop.org/patch/msgid/20230530101649.2549949-1-jouni.hogander@intel.com (cherry picked from commit b29a20f7c4995a059ed764ce42389857426397c7) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_dp_aux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c index 705915d50565..524bd6da260c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_aux.c +++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c @@ -129,7 +129,7 @@ static int intel_dp_aux_sync_len(void) static int intel_dp_aux_fw_sync_len(void) { - int precharge = 16; /* 10-16 */ + int precharge = 10; /* 10-16 */ int preamble = 8; return precharge + preamble; From 8b1c94da1e481090f24127b2c420b0c0b0421ce3 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 4 Jun 2023 20:25:17 -0700 Subject: [PATCH 449/934] mptcp: only send RM_ADDR in nl_cmd_remove The specifications from [1] about the "REMOVE" command say: Announce that an address has been lost to the peer It was then only supposed to send a RM_ADDR and not trying to delete associated subflows. A new helper mptcp_pm_remove_addrs() is then introduced to do just that, compared to mptcp_pm_remove_addrs_and_subflows() also removing subflows. To delete a subflow, the userspace daemon can use the "SUB_DESTROY" command, see mptcp_nl_cmd_sf_destroy(). Fixes: d9a4594edabf ("mptcp: netlink: Add MPTCP_PM_CMD_REMOVE") Link: https://github.com/multipath-tcp/mptcp/blob/mptcp_v0.96/include/uapi/linux/mptcp.h [1] Cc: stable@vger.kernel.org Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 18 ++++++++++++++++++ net/mptcp/pm_userspace.c | 2 +- net/mptcp/protocol.h | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index bc343dab5e3f..59f8f3124855 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1558,6 +1558,24 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) return ret; } +void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) +{ + struct mptcp_rm_list alist = { .nr = 0 }; + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, rm_list, list) { + remove_anno_list_by_saddr(msk, &entry->addr); + if (alist.nr < MPTCP_RM_IDS_MAX) + alist.ids[alist.nr++] = entry->addr.id; + } + + if (alist.nr) { + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &alist); + spin_unlock_bh(&msk->pm.lock); + } +} + void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list) { diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 27a275805c06..6beadea8c67d 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -232,7 +232,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) list_move(&match->list, &free_list); - mptcp_pm_remove_addrs_and_subflows(msk, &free_list); + mptcp_pm_remove_addrs(msk, &free_list); release_sock((struct sock *)msk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c5255258bfb3..70c957bc56a8 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -832,6 +832,7 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); +void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list); From 48d73f609dcceeb563b0d960e59bf0362581e39c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 4 Jun 2023 20:25:18 -0700 Subject: [PATCH 450/934] selftests: mptcp: update userspace pm addr tests This patch is linked to the previous commit ("mptcp: only send RM_ADDR in nl_cmd_remove"). To align with what is done by the in-kernel PM, update userspace pm addr selftests, by sending a remove_subflows command together after the remove_addrs command. Fixes: d9a4594edabf ("mptcp: netlink: Add MPTCP_PM_CMD_REMOVE") Fixes: 97040cf9806e ("selftests: mptcp: userspace pm address tests") Cc: stable@vger.kernel.org Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 96f63172b8fe..651740a656f0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -862,7 +862,15 @@ do_transfer() sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id sleep 1 + sp=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + da=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + dp=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q') ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id + ip netns exec ${listener_ns} ./pm_nl_ctl dsf lip "::ffff:$addr" \ + lport $sp rip $da rport $dp token $tk fi counter=$((counter + 1)) From 24430f8bf51655c5ab7ddc2fafe939dd3cd0dd47 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 4 Jun 2023 20:25:19 -0700 Subject: [PATCH 451/934] mptcp: add address into userspace pm list Add the address into userspace_pm_local_addr_list when the subflow is created. Make sure it can be found in mptcp_nl_cmd_remove(). And delete it in the new helper mptcp_userspace_pm_delete_local_addr(). By doing this, the "REMOVE" command also works with subflows that have been created via the "SUB_CREATE" command instead of restricting to the addresses that have been announced via the "ANNOUNCE" command. Fixes: d9a4594edabf ("mptcp: netlink: Add MPTCP_PM_CMD_REMOVE") Link: https://github.com/multipath-tcp/mptcp_net-next/issues/379 Cc: stable@vger.kernel.org Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm_userspace.c | 41 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 6beadea8c67d..114548b09f47 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -79,6 +79,30 @@ append_err: return ret; } +/* If the subflow is closed from the other peer (not via a + * subflow destroy command then), we want to keep the entry + * not to assign the same ID to another address and to be + * able to send RM_ADDR after the removal of the subflow. + */ +static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *addr) +{ + struct mptcp_pm_addr_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) { + /* TODO: a refcount is needed because the entry can + * be used multiple times (e.g. fullmesh mode). + */ + list_del_rcu(&entry->list); + kfree(entry); + return 0; + } + } + + return -EINVAL; +} + int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) @@ -251,6 +275,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct mptcp_pm_addr_entry local = { 0 }; struct mptcp_addr_info addr_r; struct mptcp_addr_info addr_l; struct mptcp_sock *msk; @@ -302,12 +327,24 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) goto create_err; } + local.addr = addr_l; + err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + if (err < 0) { + GENL_SET_ERR_MSG(info, "did not match address and id"); + goto create_err; + } + lock_sock(sk); err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); release_sock(sk); + spin_lock_bh(&msk->pm.lock); + if (err) + mptcp_userspace_pm_delete_local_addr(msk, &local); + spin_unlock_bh(&msk->pm.lock); + create_err: sock_put((struct sock *)msk); return err; @@ -420,7 +457,11 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r); if (ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_pm_addr_entry entry = { .addr = addr_l }; + spin_lock_bh(&msk->pm.lock); + mptcp_userspace_pm_delete_local_addr(msk, &entry); + spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); mptcp_close_ssk(sk, ssk, subflow); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); From 6c160b636c91e71e50c39134f78257cc35305ff0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 4 Jun 2023 20:25:20 -0700 Subject: [PATCH 452/934] selftests: mptcp: update userspace pm subflow tests To align with what is done by the in-kernel PM, update userspace pm subflow selftests, by sending the a remove_addrs command together before the remove_subflows command. This will get a RM_ADDR in chk_rm_nr(). Fixes: d9a4594edabf ("mptcp: netlink: Add MPTCP_PM_CMD_REMOVE") Fixes: 5e986ec46874 ("selftests: mptcp: userspace pm subflow tests") Link: https://github.com/multipath-tcp/mptcp_net-next/issues/379 Cc: stable@vger.kernel.org Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 651740a656f0..29f0c99d9a46 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -936,6 +936,7 @@ do_transfer() sleep 1 sp=$(grep "type:10" "$evts_ns2" | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec ${connector_ns} ./pm_nl_ctl rem token $tk id $id ip netns exec ${connector_ns} ./pm_nl_ctl dsf lip $addr lport $sp \ rip $da rport $dp token $tk fi @@ -3150,7 +3151,7 @@ userspace_tests() pm_nl_set_limits $ns1 0 1 run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow chk_join_nr 1 1 1 - chk_rm_nr 0 1 + chk_rm_nr 1 1 kill_events_pids fi } From 77e4b94a3de692a09b79945ecac5b8e6b77f10c1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 4 Jun 2023 20:25:21 -0700 Subject: [PATCH 453/934] mptcp: update userspace pm infos Increase pm subflows counter on both server side and client side when userspace pm creates a new subflow, and decrease the counter when it closes a subflow. Increase add_addr_signaled counter in mptcp_nl_cmd_announce() when the address is announced by userspace PM. This modification is similar to how the in-kernel PM is updating the counter: when additional subflows are created/removed. Fixes: 9ab4807c84a4 ("mptcp: netlink: Add MPTCP_PM_CMD_ANNOUNCE") Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/329 Cc: stable@vger.kernel.org Reviewed-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm.c | 23 +++++++++++++++++++---- net/mptcp/pm_userspace.c | 5 +++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 78c924506e83..76612bca275a 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -87,8 +87,15 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) unsigned int subflows_max; int ret = 0; - if (mptcp_pm_is_userspace(msk)) - return mptcp_userspace_pm_active(msk); + if (mptcp_pm_is_userspace(msk)) { + if (mptcp_userspace_pm_active(msk)) { + spin_lock_bh(&pm->lock); + pm->subflows++; + spin_unlock_bh(&pm->lock); + return true; + } + return false; + } subflows_max = mptcp_pm_get_subflows_max(msk); @@ -181,8 +188,16 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, struct mptcp_pm_data *pm = &msk->pm; bool update_subflows; - update_subflows = (subflow->request_join || subflow->mp_join) && - mptcp_pm_is_kernel(msk); + update_subflows = subflow->request_join || subflow->mp_join; + if (mptcp_pm_is_userspace(msk)) { + if (update_subflows) { + spin_lock_bh(&pm->lock); + pm->subflows--; + spin_unlock_bh(&pm->lock); + } + return; + } + if (!READ_ONCE(pm->work_pending) && !update_subflows) return; diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 114548b09f47..b06aa58dfcf2 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -69,6 +69,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, MPTCP_PM_MAX_ADDR_ID + 1, 1); list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list); + msk->pm.local_addr_used++; ret = e->addr.id; } else if (match) { ret = entry->addr.id; @@ -96,6 +97,7 @@ static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, */ list_del_rcu(&entry->list); kfree(entry); + msk->pm.local_addr_used--; return 0; } } @@ -195,6 +197,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&msk->pm.lock); if (mptcp_pm_alloc_anno_list(msk, &addr_val)) { + msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &addr_val.addr, false); mptcp_pm_nl_addr_send_ack(msk); } @@ -343,6 +346,8 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) spin_lock_bh(&msk->pm.lock); if (err) mptcp_userspace_pm_delete_local_addr(msk, &local); + else + msk->pm.subflows++; spin_unlock_bh(&msk->pm.lock); create_err: From ad24919540fb4df83981d469b5253cc1aecca939 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 5 Jun 2023 15:32:38 +0100 Subject: [PATCH 454/934] firmware: cs_dsp: Log correct region name in bin error messages In cs_dsp_load_coeff() region_name should be set in the XM/YM/ZM cases otherwise any errors will log the region as "Unknown". While doing this also change one error message that logged the region type ID to log the region_name instead. This makes it consistent with other messages in the same function. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20230605143238.4001982-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/firmware/cirrus/cs_dsp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index e4ccfb6a8fa5..ec056f6f40ce 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -2124,6 +2124,7 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware file, blocks, le32_to_cpu(blk->len), type, le32_to_cpu(blk->id)); + region_name = cs_dsp_mem_region_name(type); mem = cs_dsp_find_region(dsp, type); if (!mem) { cs_dsp_err(dsp, "No base for region %x\n", type); @@ -2147,8 +2148,8 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware reg = dsp->ops->region_to_reg(mem, reg); reg += offset; } else { - cs_dsp_err(dsp, "No %x for algorithm %x\n", - type, le32_to_cpu(blk->id)); + cs_dsp_err(dsp, "No %s for algorithm %x\n", + region_name, le32_to_cpu(blk->id)); } break; From 4a672d500bfd6bb87092c33d5a2572c3d0a1cf83 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 10 May 2023 12:51:56 +0200 Subject: [PATCH 455/934] ARM: dts: Fix erroneous ADS touchscreen polarities Several device tree files get the polarity of the pendown-gpios wrong: this signal is active low. Fix up all incorrect flags, so that operating systems can rely on the flag being correctly set. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20230510105156.1134320-1-linus.walleij@linaro.org Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/am57xx-cl-som-am57x.dts | 2 +- arch/arm/boot/dts/at91sam9261ek.dts | 2 +- arch/arm/boot/dts/imx7d-pico-hobbit.dts | 2 +- arch/arm/boot/dts/imx7d-sdb.dts | 2 +- arch/arm/boot/dts/omap3-cm-t3x.dtsi | 2 +- arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi | 2 +- arch/arm/boot/dts/omap3-lilly-a83x.dtsi | 2 +- arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi | 2 +- arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi | 2 +- arch/arm/boot/dts/omap3-pandora-common.dtsi | 2 +- arch/arm/boot/dts/omap5-cm-t54.dts | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts index 2fc9a5d5e0c0..625b9b311b49 100644 --- a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts +++ b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts @@ -527,7 +527,7 @@ interrupt-parent = <&gpio1>; interrupts = <31 0>; - pendown-gpio = <&gpio1 31 0>; + pendown-gpio = <&gpio1 31 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; diff --git a/arch/arm/boot/dts/at91sam9261ek.dts b/arch/arm/boot/dts/at91sam9261ek.dts index 88869ca874d1..045cb253f23a 100644 --- a/arch/arm/boot/dts/at91sam9261ek.dts +++ b/arch/arm/boot/dts/at91sam9261ek.dts @@ -156,7 +156,7 @@ compatible = "ti,ads7843"; interrupts-extended = <&pioC 2 IRQ_TYPE_EDGE_BOTH>; spi-max-frequency = <3000000>; - pendown-gpio = <&pioC 2 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&pioC 2 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <150>; ti,x-max = /bits/ 16 <3830>; diff --git a/arch/arm/boot/dts/imx7d-pico-hobbit.dts b/arch/arm/boot/dts/imx7d-pico-hobbit.dts index d917dc4f2f22..6ad39dca7009 100644 --- a/arch/arm/boot/dts/imx7d-pico-hobbit.dts +++ b/arch/arm/boot/dts/imx7d-pico-hobbit.dts @@ -64,7 +64,7 @@ interrupt-parent = <&gpio2>; interrupts = <7 0>; spi-max-frequency = <1000000>; - pendown-gpio = <&gpio2 7 0>; + pendown-gpio = <&gpio2 7 GPIO_ACTIVE_LOW>; vcc-supply = <®_3p3v>; ti,x-min = /bits/ 16 <0>; ti,x-max = /bits/ 16 <4095>; diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index f483bc0afe5e..234e5fc647b2 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -205,7 +205,7 @@ pinctrl-0 = <&pinctrl_tsc2046_pendown>; interrupt-parent = <&gpio2>; interrupts = <29 0>; - pendown-gpio = <&gpio2 29 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio2 29 GPIO_ACTIVE_LOW>; touchscreen-max-pressure = <255>; wakeup-source; }; diff --git a/arch/arm/boot/dts/omap3-cm-t3x.dtsi b/arch/arm/boot/dts/omap3-cm-t3x.dtsi index e61b8a2bfb7d..51baedf1603b 100644 --- a/arch/arm/boot/dts/omap3-cm-t3x.dtsi +++ b/arch/arm/boot/dts/omap3-cm-t3x.dtsi @@ -227,7 +227,7 @@ interrupt-parent = <&gpio2>; interrupts = <25 0>; /* gpio_57 */ - pendown-gpio = <&gpio2 25 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio2 25 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; ti,x-max = /bits/ 16 <0x0fff>; diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi b/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi index 3decc2d78a6c..a7f99ae0c1fe 100644 --- a/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi +++ b/arch/arm/boot/dts/omap3-devkit8000-lcd-common.dtsi @@ -54,7 +54,7 @@ interrupt-parent = <&gpio1>; interrupts = <27 0>; /* gpio_27 */ - pendown-gpio = <&gpio1 27 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio1 27 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; ti,x-max = /bits/ 16 <0x0fff>; diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi index c595afe4181d..d310b5c7bac3 100644 --- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi +++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi @@ -311,7 +311,7 @@ interrupt-parent = <&gpio1>; interrupts = <8 0>; /* boot6 / gpio_8 */ spi-max-frequency = <1000000>; - pendown-gpio = <&gpio1 8 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio1 8 GPIO_ACTIVE_LOW>; vcc-supply = <®_vcc3>; pinctrl-names = "default"; pinctrl-0 = <&tsc2048_pins>; diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi index 1d6e88f99eb3..c3570acc35fa 100644 --- a/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi +++ b/arch/arm/boot/dts/omap3-overo-common-lcd35.dtsi @@ -149,7 +149,7 @@ interrupt-parent = <&gpio4>; interrupts = <18 0>; /* gpio_114 */ - pendown-gpio = <&gpio4 18 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio4 18 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; ti,x-max = /bits/ 16 <0x0fff>; diff --git a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi index 7e30f9d45790..d95a0e130058 100644 --- a/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi +++ b/arch/arm/boot/dts/omap3-overo-common-lcd43.dtsi @@ -160,7 +160,7 @@ interrupt-parent = <&gpio4>; interrupts = <18 0>; /* gpio_114 */ - pendown-gpio = <&gpio4 18 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio4 18 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; ti,x-max = /bits/ 16 <0x0fff>; diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi index 559853764487..4c3b6bab179c 100644 --- a/arch/arm/boot/dts/omap3-pandora-common.dtsi +++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi @@ -651,7 +651,7 @@ pinctrl-0 = <&penirq_pins>; interrupt-parent = <&gpio3>; interrupts = <30 IRQ_TYPE_NONE>; /* GPIO_94 */ - pendown-gpio = <&gpio3 30 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio3 30 GPIO_ACTIVE_LOW>; vcc-supply = <&vaux4>; ti,x-min = /bits/ 16 <0>; diff --git a/arch/arm/boot/dts/omap5-cm-t54.dts b/arch/arm/boot/dts/omap5-cm-t54.dts index 2d87b9fc230e..af288d63a26a 100644 --- a/arch/arm/boot/dts/omap5-cm-t54.dts +++ b/arch/arm/boot/dts/omap5-cm-t54.dts @@ -354,7 +354,7 @@ interrupt-parent = <&gpio1>; interrupts = <15 0>; /* gpio1_wk15 */ - pendown-gpio = <&gpio1 15 GPIO_ACTIVE_HIGH>; + pendown-gpio = <&gpio1 15 GPIO_ACTIVE_LOW>; ti,x-min = /bits/ 16 <0x0>; From 811dd426a9b16cf61a86fdb12d5f5b983cbfb130 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Mon, 5 Jun 2023 16:33:08 +0100 Subject: [PATCH 456/934] ALSA: hda/realtek: Add quirks for Asus ROG 2024 laptops using CS35L41 Add support for Asus ROG 2024 models using CS35L41 SPI with Internal Boost. Signed-off-by: Stefan Binding Cc: Link: https://lore.kernel.org/r/20230605153308.448550-1-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 57a2dd07efaf..f10790ace5c1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9547,6 +9547,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), + SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), @@ -9565,6 +9566,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1f12, "ASUS UM5302", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1f92, "ASUS ROG Flow X16", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), + SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2), + SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC), From da209f7a80dd633a32cbcbafe9e9f778933119c1 Mon Sep 17 00:00:00 2001 From: Tim Crawford Date: Mon, 5 Jun 2023 10:38:34 -0600 Subject: [PATCH 457/934] ALSA: hda/realtek: Add quirk for Clevo NS50AU Fixes headset detection on Clevo NS50AU. Signed-off-by: Tim Crawford Cc: Link: https://lore.kernel.org/r/20230605163834.24653-1-tcrawford@system76.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f10790ace5c1..699167b9175f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9643,6 +9643,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x51b1, "Clevo NS50AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), From 917ac77846b907dfdbd878688a9a61236ad6c51e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 26 May 2023 20:30:20 +0800 Subject: [PATCH 458/934] btrfs: subpage: fix a crash in metadata repair path [BUG] Test case btrfs/027 would crash with subpage (64K page size, 4K sectorsize) with the following dying messages: debug: map_length=16384 length=65536 type=metadata|raid6(0x104) assertion failed: map_length >= length, in fs/btrfs/volumes.c:8093 ------------[ cut here ]------------ kernel BUG at fs/btrfs/messages.c:259! Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 Call trace: btrfs_assertfail+0x28/0x2c [btrfs] btrfs_map_repair_block+0x150/0x2b8 [btrfs] btrfs_repair_io_failure+0xd4/0x31c [btrfs] btrfs_read_extent_buffer+0x150/0x16c [btrfs] read_tree_block+0x38/0xbc [btrfs] read_tree_root_path+0xfc/0x1bc [btrfs] btrfs_get_root_ref.part.0+0xd4/0x3a8 [btrfs] open_ctree+0xa30/0x172c [btrfs] btrfs_mount_root+0x3c4/0x4a4 [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xec vfs_kern_mount.part.0+0x90/0xd4 vfs_kern_mount+0x14/0x28 btrfs_mount+0x114/0x418 [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xec path_mount+0x3e0/0xb64 __arm64_sys_mount+0x200/0x2d8 invoke_syscall+0x48/0x114 el0_svc_common.constprop.0+0x60/0x11c do_el0_svc+0x38/0x98 el0_svc+0x40/0xa8 el0t_64_sync_handler+0xf4/0x120 el0t_64_sync+0x190/0x194 Code: aa0403e2 b0fff060 91010000 959c2024 (d4210000) [CAUSE] In btrfs/027 we test RAID6 with missing devices, in this particular case, we're repairing a metadata at the end of a data stripe. But at btrfs_repair_io_failure(), we always pass a full PAGE for repair, and for subpage case this can cross stripe boundary and lead to the above BUG_ON(). This metadata repair code is always there, since the introduction of subpage support, but this can trigger BUG_ON() after the bio split ability at btrfs_map_bio(). [FIX] Instead of passing the old PAGE_SIZE, we calculate the correct length based on the eb size and page size for both regular and subpage cases. CC: stable@vger.kernel.org # 6.3+ Reviewed-by: Christoph Hellwig Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2b1b227505f3..88e6d1072a35 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -242,7 +242,6 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) { struct btrfs_fs_info *fs_info = eb->fs_info; - u64 start = eb->start; int i, num_pages = num_extent_pages(eb); int ret = 0; @@ -251,12 +250,14 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; + u64 start = max_t(u64, eb->start, page_offset(p)); + u64 end = min_t(u64, eb->start + eb->len, page_offset(p) + PAGE_SIZE); + u32 len = end - start; - ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE, - start, p, start - page_offset(p), mirror_num); + ret = btrfs_repair_io_failure(fs_info, 0, start, len, + start, p, offset_in_page(start), mirror_num); if (ret) break; - start += PAGE_SIZE; } return ret; From 7c28afd5512e371773dbb2bf95a31ed5625651d9 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 5 Jun 2023 18:56:36 +0200 Subject: [PATCH 459/934] HID: hidpp: terminate retry loop on success It seems we forgot the normal case to terminate the retry loop, making us asking 3 times each command, which is probably a little bit too much. And remove the ugly "goto exit" that can be replaced by a simpler "break" Fixes: 586e8fede795 ("HID: logitech-hidpp: Retry commands when device is busy") Suggested-by: Mark Lord Tested-by: Mark Lord Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 2246044b1639..5e1a412fd28f 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -286,7 +286,7 @@ static int hidpp_send_message_sync(struct hidpp_device *hidpp, struct hidpp_report *message, struct hidpp_report *response) { - int ret; + int ret = -1; int max_retries = 3; mutex_lock(&hidpp->send_mutex); @@ -300,13 +300,13 @@ static int hidpp_send_message_sync(struct hidpp_device *hidpp, */ *response = *message; - for (; max_retries != 0; max_retries--) { + for (; max_retries != 0 && ret; max_retries--) { ret = __hidpp_send_report(hidpp->hid_dev, message); if (ret) { dbg_hid("__hidpp_send_report returned err: %d\n", ret); memset(response, 0, sizeof(struct hidpp_report)); - goto exit; + break; } if (!wait_event_timeout(hidpp->wait, hidpp->answer_available, @@ -314,14 +314,14 @@ static int hidpp_send_message_sync(struct hidpp_device *hidpp, dbg_hid("%s:timeout waiting for response\n", __func__); memset(response, 0, sizeof(struct hidpp_report)); ret = -ETIMEDOUT; - goto exit; + break; } if (response->report_id == REPORT_ID_HIDPP_SHORT && response->rap.sub_id == HIDPP_ERROR) { ret = response->rap.params[1]; dbg_hid("%s:got hidpp error %02X\n", __func__, ret); - goto exit; + break; } if ((response->report_id == REPORT_ID_HIDPP_LONG || @@ -330,13 +330,12 @@ static int hidpp_send_message_sync(struct hidpp_device *hidpp, ret = response->fap.params[1]; if (ret != HIDPP20_ERROR_BUSY) { dbg_hid("%s:got hidpp 2.0 error %02X\n", __func__, ret); - goto exit; + break; } dbg_hid("%s:got busy hidpp 2.0 error %02X, retrying\n", __func__, ret); } } -exit: mutex_unlock(&hidpp->send_mutex); return ret; From 132328e8e85174ea788faf8f627c33258c88fbad Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 Jun 2023 15:14:45 +0200 Subject: [PATCH 460/934] bpf: netfilter: Add BPF_NETFILTER bpf_attach_type Andrii Nakryiko writes: And we currently don't have an attach type for NETLINK BPF link. Thankfully it's not too late to add it. I see that link_create() in kernel/bpf/syscall.c just bypasses attach_type check. We shouldn't have done that. Instead we need to add BPF_NETLINK attach type to enum bpf_attach_type. And wire all that properly throughout the kernel and libbpf itself. This adds BPF_NETFILTER and uses it. This breaks uabi but this wasn't in any non-rc release yet, so it should be fine. v2: check link_attack prog type in link_create too Fixes: 84601d6ee68a ("bpf: add bpf_link support for BPF_NETFILTER programs") Suggested-by: Andrii Nakryiko Signed-off-by: Florian Westphal Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/CAEf4BzZ69YgrQW7DHCJUT_X+GqMq_ZQQPBwopaJJVGFD5=d5Vg@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20230605131445.32016-1-fw@strlen.de --- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 9 +++++++++ tools/include/uapi/linux/bpf.h | 1 + tools/lib/bpf/libbpf.c | 3 ++- tools/lib/bpf/libbpf_probes.c | 2 ++ 5 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1bb11a6ee667..c994ff5b157c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1035,6 +1035,7 @@ enum bpf_attach_type { BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, BPF_STRUCT_OPS, + BPF_NETFILTER, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 14f39c1e573e..0c21d0d8efe4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2433,6 +2433,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, default: return -EINVAL; } + case BPF_PROG_TYPE_NETFILTER: + if (expected_attach_type == BPF_NETFILTER) + return 0; + return -EINVAL; case BPF_PROG_TYPE_SYSCALL: case BPF_PROG_TYPE_EXT: if (expected_attach_type) @@ -4590,7 +4594,12 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) switch (prog->type) { case BPF_PROG_TYPE_EXT: + break; case BPF_PROG_TYPE_NETFILTER: + if (attr->link_create.attach_type != BPF_NETFILTER) { + ret = -EINVAL; + goto out; + } break; case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_TRACEPOINT: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1bb11a6ee667..c994ff5b157c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1035,6 +1035,7 @@ enum bpf_attach_type { BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, BPF_STRUCT_OPS, + BPF_NETFILTER, __MAX_BPF_ATTACH_TYPE }; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ad1ec893b41b..a27f6e9ccce7 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -117,6 +117,7 @@ static const char * const attach_type_name[] = { [BPF_PERF_EVENT] = "perf_event", [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", [BPF_STRUCT_OPS] = "struct_ops", + [BPF_NETFILTER] = "netfilter", }; static const char * const link_type_name[] = { @@ -8712,7 +8713,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), - SEC_DEF("netfilter", NETFILTER, 0, SEC_NONE), + SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), }; static size_t custom_sec_def_cnt; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 6065f408a59c..b7d443129f1c 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -180,7 +180,9 @@ static int probe_prog_load(enum bpf_prog_type prog_type, case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: + break; case BPF_PROG_TYPE_NETFILTER: + opts.expected_attach_type = BPF_NETFILTER; break; default: return -EOPNOTSUPP; From 8cfb98196cceec35416041c6b91212d2b99392e4 Mon Sep 17 00:00:00 2001 From: Nitesh Shetty Date: Mon, 5 Jun 2023 11:53:53 +0530 Subject: [PATCH 461/934] null_blk: Fix: memory release when memory_backed=1 Memory/pages are not freed, when unloading nullblk driver. Steps to reproduce issue 1.free -h total used free shared buff/cache available Mem: 7.8Gi 260Mi 7.1Gi 3.0Mi 395Mi 7.3Gi Swap: 0B 0B 0B 2.modprobe null_blk memory_backed=1 3.dd if=/dev/urandom of=/dev/nullb0 oflag=direct bs=1M count=1000 4.modprobe -r null_blk 5.free -h total used free shared buff/cache available Mem: 7.8Gi 1.2Gi 6.1Gi 3.0Mi 398Mi 6.3Gi Swap: 0B 0B 0B Signed-off-by: Anuj Gupta Signed-off-by: Nitesh Shetty Link: https://lore.kernel.org/r/20230605062354.24785-1-nj.shetty@samsung.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index b3fedafe301e..864013019d6b 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2244,6 +2244,7 @@ static void null_destroy_dev(struct nullb *nullb) struct nullb_device *dev = nullb->dev; null_del_dev(nullb); + null_free_device_storage(dev, false); null_free_dev(dev); } From 5647e53f7856bb39dae781fe26aa65a699e2fc9f Mon Sep 17 00:00:00 2001 From: Dan Schatzberg Date: Thu, 1 Jun 2023 11:38:19 -0700 Subject: [PATCH 462/934] cgroup: Documentation: Clarify usage of memory limits The existing documentation refers to memory.high as the "main mechanism to control memory usage." This seems incorrect to me - memory.high can result in reclaim pressure which simply leads to stalls unless some external component observes and actions on it (e.g. systemd-oomd can be used for this purpose). While this is feasible, users are unaware of this interaction and are led to believe that memory.high alone is an effective mechanism for limiting memory. The documentation should recommend the use of memory.max as the effective way to enforce memory limits - it triggers reclaim and results in OOM kills by itself. Signed-off-by: Dan Schatzberg Acked-by: Johannes Weiner Acked-by: Chris Down Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v2.rst | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index f67c0829350b..e592a9364473 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1213,23 +1213,25 @@ PAGE_SIZE multiple when read back. A read-write single value file which exists on non-root cgroups. The default is "max". - Memory usage throttle limit. This is the main mechanism to - control memory usage of a cgroup. If a cgroup's usage goes + Memory usage throttle limit. If a cgroup's usage goes over the high boundary, the processes of the cgroup are throttled and put under heavy reclaim pressure. Going over the high limit never invokes the OOM killer and - under extreme conditions the limit may be breached. + under extreme conditions the limit may be breached. The high + limit should be used in scenarios where an external process + monitors the limited cgroup to alleviate heavy reclaim + pressure. memory.max A read-write single value file which exists on non-root cgroups. The default is "max". - Memory usage hard limit. This is the final protection - mechanism. If a cgroup's memory usage reaches this limit and - can't be reduced, the OOM killer is invoked in the cgroup. - Under certain circumstances, the usage may go over the limit - temporarily. + Memory usage hard limit. This is the main mechanism to limit + memory usage of a cgroup. If a cgroup's memory usage reaches + this limit and can't be reduced, the OOM killer is invoked in + the cgroup. Under certain circumstances, the usage may go + over the limit temporarily. In default configuration regular 0-order allocations always succeed unless OOM killer chooses current task as a victim. @@ -1238,10 +1240,6 @@ PAGE_SIZE multiple when read back. Caller could retry them differently, return into userspace as -ENOMEM or silently ignore in cases like disk readahead. - This is the ultimate protection mechanism. As long as the - high limit is used and monitored properly, this limit's - utility is limited to providing the final safety net. - memory.reclaim A write-only nested-keyed file which exists for all cgroups. From 31c5f9164949347c9cb34f041a7e04fdc08b1b85 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 21 May 2023 15:48:28 +0000 Subject: [PATCH 463/934] Bluetooth: ISO: consider right CIS when removing CIG at cleanup When looking for CIS blocking CIG removal, consider only the CIS with the right CIG ID. Don't try to remove CIG with unset CIG ID. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f75ef12f18f7..9194e31953f9 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -947,8 +947,8 @@ static void find_cis(struct hci_conn *conn, void *data) { struct iso_list_data *d = data; - /* Ignore broadcast */ - if (!bacmp(&conn->dst, BDADDR_ANY)) + /* Ignore broadcast or if CIG don't match */ + if (!bacmp(&conn->dst, BDADDR_ANY) || d->cig != conn->iso_qos.ucast.cig) return; d->count++; @@ -963,6 +963,9 @@ static void cis_cleanup(struct hci_conn *conn) struct hci_dev *hdev = conn->hdev; struct iso_list_data d; + if (conn->iso_qos.ucast.cig == BT_ISO_QOS_CIG_UNSET) + return; + memset(&d, 0, sizeof(d)); d.cig = conn->iso_qos.ucast.cig; From e6a7a46b8636efe95c75bed63a57fc05c13feba4 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 21 May 2023 15:48:29 +0000 Subject: [PATCH 464/934] Bluetooth: ISO: Fix CIG auto-allocation to select configurable CIG Make CIG auto-allocation to select the first CIG_ID that is still configurable. Also use correct CIG_ID range (see Core v5.3 Vol 4 Part E Sec 7.8.97 p.2553). Previously, it would always select CIG_ID 0 regardless of anything, because cis_list with data.cis == 0xff (BT_ISO_QOS_CIS_UNSET) would not count any CIS. Since we are not adding CIS here, use find_cis instead. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 9194e31953f9..1f906f8508bc 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1769,24 +1769,23 @@ static bool hci_le_set_cig_params(struct hci_conn *conn, struct bt_iso_qos *qos) memset(&data, 0, sizeof(data)); - /* Allocate a CIG if not set */ + /* Allocate first still reconfigurable CIG if not set */ if (qos->ucast.cig == BT_ISO_QOS_CIG_UNSET) { - for (data.cig = 0x00; data.cig < 0xff; data.cig++) { + for (data.cig = 0x00; data.cig < 0xf0; data.cig++) { data.count = 0; - data.cis = 0xff; - hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, - BT_BOUND, &data); + hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, + BT_CONNECT, &data); if (data.count) continue; - hci_conn_hash_list_state(hdev, cis_list, ISO_LINK, + hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &data); if (!data.count) break; } - if (data.cig == 0xff) + if (data.cig == 0xf0) return false; /* Update CIG */ From c5d2b6fa26b5b8386a9cc902cdece3a46bef2bd2 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 30 May 2023 13:48:44 -0700 Subject: [PATCH 465/934] Bluetooth: Fix use-after-free in hci_remove_ltk/hci_remove_irk Similar to commit 0f7d9b31ce7a ("netfilter: nf_tables: fix use-after-free in nft_set_catchall_destroy()"). We can not access k after kfree_rcu() call. Cc: stable@vger.kernel.org Signed-off-by: Min Li Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a856b1051d35..0164b56de12d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1416,10 +1416,10 @@ int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) { - struct smp_ltk *k; + struct smp_ltk *k, *tmp; int removed = 0; - list_for_each_entry_rcu(k, &hdev->long_term_keys, list) { + list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) { if (bacmp(bdaddr, &k->bdaddr) || k->bdaddr_type != bdaddr_type) continue; @@ -1435,9 +1435,9 @@ int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type) { - struct smp_irk *k; + struct smp_irk *k, *tmp; - list_for_each_entry_rcu(k, &hdev->identity_resolving_keys, list) { + list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) { if (bacmp(bdaddr, &k->bdaddr) || k->addr_type != addr_type) continue; From 1857c19941c87eb36ad47f22a406be5dfe5eff9f Mon Sep 17 00:00:00 2001 From: Zhengping Jiang Date: Wed, 24 May 2023 17:11:58 -0700 Subject: [PATCH 466/934] Bluetooth: hci_sync: add lock to protect HCI_UNREGISTER When the HCI_UNREGISTER flag is set, no jobs should be scheduled. Fix potential race when HCI_UNREGISTER is set after the flag is tested in hci_cmd_sync_queue. Fixes: 0b94f2651f56 ("Bluetooth: hci_sync: Fix queuing commands when HCI_UNREGISTER is set") Signed-off-by: Zhengping Jiang Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 2 ++ net/bluetooth/hci_sync.c | 20 ++++++++++++++------ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 8baf34639939..fe3893855cd2 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -515,6 +515,7 @@ struct hci_dev { struct work_struct cmd_sync_work; struct list_head cmd_sync_work_list; struct mutex cmd_sync_work_lock; + struct mutex unregister_lock; struct work_struct cmd_sync_cancel_work; struct work_struct reenable_adv_work; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0164b56de12d..48917c68358d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2686,7 +2686,9 @@ void hci_unregister_dev(struct hci_dev *hdev) { BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); + mutex_lock(&hdev->unregister_lock); hci_dev_set_flag(hdev, HCI_UNREGISTER); + mutex_unlock(&hdev->unregister_lock); write_lock(&hci_dev_list_lock); list_del(&hdev->list); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 647a8ce54062..a59695f04c25 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -629,6 +629,7 @@ void hci_cmd_sync_init(struct hci_dev *hdev) INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work); INIT_LIST_HEAD(&hdev->cmd_sync_work_list); mutex_init(&hdev->cmd_sync_work_lock); + mutex_init(&hdev->unregister_lock); INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work); INIT_WORK(&hdev->reenable_adv_work, reenable_adv); @@ -692,14 +693,19 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { struct hci_cmd_sync_work_entry *entry; + int err = 0; - if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) - return -ENODEV; + mutex_lock(&hdev->unregister_lock); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + err = -ENODEV; + goto unlock; + } entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; - + if (!entry) { + err = -ENOMEM; + goto unlock; + } entry->func = func; entry->data = data; entry->destroy = destroy; @@ -710,7 +716,9 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, queue_work(hdev->req_workqueue, &hdev->cmd_sync_work); - return 0; +unlock: + mutex_unlock(&hdev->unregister_lock); + return err; } EXPORT_SYMBOL(hci_cmd_sync_submit); From fe2ccc6c29d53e14d3c8b3ddf8ad965a92e074ee Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 31 May 2023 10:57:58 +0200 Subject: [PATCH 467/934] Bluetooth: fix debugfs registration Since commit ec6cef9cd98d ("Bluetooth: Fix SMP channel registration for unconfigured controllers") the debugfs interface for unconfigured controllers will be created when the controller is configured. There is however currently nothing preventing a controller from being configured multiple time (e.g. setting the device address using btmgmt) which results in failed attempts to register the already registered debugfs entries: debugfs: File 'features' in directory 'hci0' already present! debugfs: File 'manufacturer' in directory 'hci0' already present! debugfs: File 'hci_version' in directory 'hci0' already present! ... debugfs: File 'quirk_simultaneous_discovery' in directory 'hci0' already present! Add a controller flag to avoid trying to register the debugfs interface more than once. Fixes: ec6cef9cd98d ("Bluetooth: Fix SMP channel registration for unconfigured controllers") Cc: stable@vger.kernel.org # 4.0 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + net/bluetooth/hci_sync.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 07df96c47ef4..872dcb91a540 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -350,6 +350,7 @@ enum { enum { HCI_SETUP, HCI_CONFIG, + HCI_DEBUGFS_CREATED, HCI_AUTO_OFF, HCI_RFKILLED, HCI_MGMT, diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a59695f04c25..804cde43b4e0 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4551,6 +4551,9 @@ static int hci_init_sync(struct hci_dev *hdev) !hci_dev_test_flag(hdev, HCI_CONFIG)) return 0; + if (hci_dev_test_and_set_flag(hdev, HCI_DEBUGFS_CREATED)) + return 0; + hci_debugfs_create_common(hdev); if (lmp_bredr_capable(hdev)) From 47c5d829a3e326b7395352a10fc8a6effe7afa15 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 31 May 2023 10:57:59 +0200 Subject: [PATCH 468/934] Bluetooth: hci_qca: fix debugfs registration Since commit 3e4be65eb82c ("Bluetooth: hci_qca: Add poweroff support during hci down for wcn3990"), the setup callback which registers the debugfs interface can be called multiple times. This specifically leads to the following error when powering on the controller: debugfs: Directory 'ibs' with parent 'hci0' already present! Add a driver flag to avoid trying to register the debugfs interface more than once. Fixes: 3e4be65eb82c ("Bluetooth: hci_qca: Add poweroff support during hci down for wcn3990") Cc: stable@vger.kernel.org # 4.20 Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_qca.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 1b064504b388..e30c979535b1 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -78,7 +78,8 @@ enum qca_flags { QCA_HW_ERROR_EVENT, QCA_SSR_TRIGGERED, QCA_BT_OFF, - QCA_ROM_FW + QCA_ROM_FW, + QCA_DEBUGFS_CREATED, }; enum qca_capabilities { @@ -635,6 +636,9 @@ static void qca_debugfs_init(struct hci_dev *hdev) if (!hdev->debugfs) return; + if (test_and_set_bit(QCA_DEBUGFS_CREATED, &qca->flags)) + return; + ibs_dir = debugfs_create_dir("ibs", hdev->debugfs); /* read only */ From 02c5ea5246a44d6ffde0fddebfc1d56188052976 Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Wed, 31 May 2023 03:44:56 +0000 Subject: [PATCH 469/934] Bluetooth: Fix l2cap_disconnect_req deadlock L2CAP assumes that the locks conn->chan_lock and chan->lock are acquired in the order conn->chan_lock, chan->lock to avoid potential deadlock. For example, l2sock_shutdown acquires these locks in the order: mutex_lock(&conn->chan_lock) l2cap_chan_lock(chan) However, l2cap_disconnect_req acquires chan->lock in l2cap_get_chan_by_scid first and then acquires conn->chan_lock before calling l2cap_chan_del. This means that these locks are acquired in unexpected order, which leads to potential deadlock: l2cap_chan_lock(c) mutex_lock(&conn->chan_lock) This patch releases chan->lock before acquiring the conn_chan_lock to avoid the potential deadlock. Fixes: a2a9339e1c9d ("Bluetooth: L2CAP: Fix use-after-free in l2cap_disconnect_{req,rsp}") Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 376b523c7b26..d9c4d26b2518 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4663,7 +4663,9 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, chan->ops->set_shutdown(chan); + l2cap_chan_unlock(chan); mutex_lock(&conn->chan_lock); + l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNRESET); mutex_unlock(&conn->chan_lock); @@ -4702,7 +4704,9 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, return 0; } + l2cap_chan_unlock(chan); mutex_lock(&conn->chan_lock); + l2cap_chan_lock(chan); l2cap_chan_del(chan, 0); mutex_unlock(&conn->chan_lock); From 6c242c64a09e78349fb0a5f0a6f8076a3d7c0bb4 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 1 Jun 2023 09:34:44 +0300 Subject: [PATCH 470/934] Bluetooth: ISO: don't try to remove CIG if there are bound CIS left Consider existing BOUND & CONNECT state CIS to block CIG removal. Otherwise, under suitable timing conditions we may attempt to remove CIG while Create CIS is pending, which fails. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1f906f8508bc..1ef952bda97d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -972,6 +972,8 @@ static void cis_cleanup(struct hci_conn *conn) /* Check if ISO connection is a CIS and remove CIG if there are * no other connections using it. */ + hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_BOUND, &d); + hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECT, &d); hci_conn_hash_list_state(hdev, find_cis, ISO_LINK, BT_CONNECTED, &d); if (d.count) return; From 71e9588435c38112d6a8686d3d8e7cc1de8fe22c Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 1 Jun 2023 09:34:45 +0300 Subject: [PATCH 471/934] Bluetooth: ISO: use correct CIS order in Set CIG Parameters event The order of CIS handle array in Set CIG Parameters response shall match the order of the CIS_ID array in the command (Core v5.3 Vol 4 Part E Sec 7.8.97). We send CIS_IDs mainly in the order of increasing CIS_ID (but with "last" CIS first if it has fixed CIG_ID). In handling of the reply, we currently assume this is also the same as the order of hci_conn in hdev->conn_hash, but that is not true. Match the correct hci_conn to the correct handle by matching them based on the CIG+CIS combination. The CIG+CIS combination shall be unique for ISO_LINK hci_conn at state >= BT_BOUND, which we maintain in hci_le_set_cig_params. Fixes: 26afbd826ee3 ("Bluetooth: Add initial implementation of CIS connections") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 3 ++- net/bluetooth/hci_event.c | 46 +++++++++++++++++++------------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index fe3893855cd2..9654567cfae3 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1202,7 +1202,8 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev, if (id != BT_ISO_QOS_CIS_UNSET && id != c->iso_qos.ucast.cis) continue; - if (ba_type == c->dst_type && !bacmp(&c->dst, ba)) { + /* Match destination address if set */ + if (!ba || (ba_type == c->dst_type && !bacmp(&c->dst, ba))) { rcu_read_unlock(); return c; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d00ef6e3fc45..09ba6d8987ee 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3804,48 +3804,56 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_rp_le_set_cig_params *rp = data; + struct hci_cp_le_set_cig_params *cp; struct hci_conn *conn; - int i = 0; + u8 status = rp->status; + int i; bt_dev_dbg(hdev, "status 0x%2.2x", rp->status); + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_CIG_PARAMS); + if (!cp || rp->num_handles != cp->num_cis || rp->cig_id != cp->cig_id) { + bt_dev_err(hdev, "unexpected Set CIG Parameters response data"); + status = HCI_ERROR_UNSPECIFIED; + } + hci_dev_lock(hdev); - if (rp->status) { + if (status) { while ((conn = hci_conn_hash_lookup_cig(hdev, rp->cig_id))) { conn->state = BT_CLOSED; - hci_connect_cfm(conn, rp->status); + hci_connect_cfm(conn, status); hci_conn_del(conn); } goto unlock; } - rcu_read_lock(); - - list_for_each_entry_rcu(conn, &hdev->conn_hash.list, list) { - if (conn->type != ISO_LINK || - conn->iso_qos.ucast.cig != rp->cig_id || - conn->state == BT_CONNECTED) + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2553 + * + * If the Status return parameter is zero, then the Controller shall + * set the Connection_Handle arrayed return parameter to the connection + * handle(s) corresponding to the CIS configurations specified in + * the CIS_IDs command parameter, in the same order. + */ + for (i = 0; i < rp->num_handles; ++i) { + conn = hci_conn_hash_lookup_cis(hdev, NULL, 0, rp->cig_id, + cp->cis[i].cis_id); + if (!conn || !bacmp(&conn->dst, BDADDR_ANY)) continue; - conn->handle = __le16_to_cpu(rp->handle[i++]); + if (conn->state != BT_BOUND && conn->state != BT_CONNECT) + continue; + + conn->handle = __le16_to_cpu(rp->handle[i]); bt_dev_dbg(hdev, "%p handle 0x%4.4x parent %p", conn, conn->handle, conn->parent); /* Create CIS if LE is already connected */ - if (conn->parent && conn->parent->state == BT_CONNECTED) { - rcu_read_unlock(); + if (conn->parent && conn->parent->state == BT_CONNECTED) hci_le_create_cis(conn); - rcu_read_lock(); - } - - if (i == rp->num_handles) - break; } - rcu_read_unlock(); - unlock: hci_dev_unlock(hdev); From 75767213f3d9b97f63694d02260b6a49a2271876 Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Sat, 3 Jun 2023 08:28:09 -0400 Subject: [PATCH 472/934] Bluetooth: L2CAP: Add missing checks for invalid DCID When receiving a connect response we should make sure that the DCID is within the valid range and that we don't already have another channel allocated for the same DCID. Missing checks may violate the specification (BLUETOOTH CORE SPECIFICATION Version 5.4 | Vol 3, Part A, Page 1046). Fixes: 40624183c202 ("Bluetooth: L2CAP: Add missing checks for invalid LE DCID") Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d9c4d26b2518..c5e8798e297c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4306,6 +4306,10 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, result = __le16_to_cpu(rsp->result); status = __le16_to_cpu(rsp->status); + if (result == L2CAP_CR_SUCCESS && (dcid < L2CAP_CID_DYN_START || + dcid > L2CAP_CID_DYN_END)) + return -EPROTO; + BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status); @@ -4337,6 +4341,11 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, switch (result) { case L2CAP_CR_SUCCESS: + if (__l2cap_get_chan_by_dcid(conn, dcid)) { + err = -EBADSLT; + break; + } + l2cap_state_change(chan, BT_CONFIG); chan->ident = 0; chan->dcid = dcid; From b9a4efd61b6b9f62f83752959e75a5dae20624fa Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 6 Jun 2023 09:31:22 +0200 Subject: [PATCH 473/934] ALSA: ice1712,ice1724: fix the kcontrol->id initialization The new xarray lookup code requires to know complete kcontrol->id before snd_ctl_add() call. Reorder the code to make the initialization properly. Cc: stable@kernel.org # v5.19+ Reported-by: Martin Zidek Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230606073122.597491-1-perex@perex.cz Signed-off-by: Takashi Iwai --- sound/pci/ice1712/aureon.c | 7 ++++--- sound/pci/ice1712/ice1712.c | 14 +++++++++----- sound/pci/ice1712/ice1724.c | 16 ++++++++++------ 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/sound/pci/ice1712/aureon.c b/sound/pci/ice1712/aureon.c index 24b978234000..027849329c1b 100644 --- a/sound/pci/ice1712/aureon.c +++ b/sound/pci/ice1712/aureon.c @@ -1899,11 +1899,12 @@ static int aureon_add_controls(struct snd_ice1712 *ice) else { for (i = 0; i < ARRAY_SIZE(cs8415_controls); i++) { struct snd_kcontrol *kctl; - err = snd_ctl_add(ice->card, (kctl = snd_ctl_new1(&cs8415_controls[i], ice))); - if (err < 0) - return err; + kctl = snd_ctl_new1(&cs8415_controls[i], ice); if (i > 1) kctl->id.device = ice->pcm->device; + err = snd_ctl_add(ice->card, kctl); + if (err < 0) + return err; } } } diff --git a/sound/pci/ice1712/ice1712.c b/sound/pci/ice1712/ice1712.c index a5241a287851..3b0c3e70987b 100644 --- a/sound/pci/ice1712/ice1712.c +++ b/sound/pci/ice1712/ice1712.c @@ -2371,22 +2371,26 @@ int snd_ice1712_spdif_build_controls(struct snd_ice1712 *ice) if (snd_BUG_ON(!ice->pcm_pro)) return -EIO; - err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_ice1712_spdif_default, ice)); + kctl = snd_ctl_new1(&snd_ice1712_spdif_default, ice); + kctl->id.device = ice->pcm_pro->device; + err = snd_ctl_add(ice->card, kctl); if (err < 0) return err; + kctl = snd_ctl_new1(&snd_ice1712_spdif_maskc, ice); kctl->id.device = ice->pcm_pro->device; - err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_ice1712_spdif_maskc, ice)); + err = snd_ctl_add(ice->card, kctl); if (err < 0) return err; + kctl = snd_ctl_new1(&snd_ice1712_spdif_maskp, ice); kctl->id.device = ice->pcm_pro->device; - err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_ice1712_spdif_maskp, ice)); + err = snd_ctl_add(ice->card, kctl); if (err < 0) return err; + kctl = snd_ctl_new1(&snd_ice1712_spdif_stream, ice); kctl->id.device = ice->pcm_pro->device; - err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_ice1712_spdif_stream, ice)); + err = snd_ctl_add(ice->card, kctl); if (err < 0) return err; - kctl->id.device = ice->pcm_pro->device; ice->spdif.stream_ctl = kctl; return 0; } diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index 6fab2ad85bbe..1dc776acd637 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -2392,23 +2392,27 @@ static int snd_vt1724_spdif_build_controls(struct snd_ice1712 *ice) if (err < 0) return err; - err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_vt1724_spdif_default, ice)); + kctl = snd_ctl_new1(&snd_vt1724_spdif_default, ice); + kctl->id.device = ice->pcm->device; + err = snd_ctl_add(ice->card, kctl); if (err < 0) return err; + kctl = snd_ctl_new1(&snd_vt1724_spdif_maskc, ice); kctl->id.device = ice->pcm->device; - err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_vt1724_spdif_maskc, ice)); + err = snd_ctl_add(ice->card, kctl); if (err < 0) return err; + kctl = snd_ctl_new1(&snd_vt1724_spdif_maskp, ice); kctl->id.device = ice->pcm->device; - err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_vt1724_spdif_maskp, ice)); + err = snd_ctl_add(ice->card, kctl); if (err < 0) return err; - kctl->id.device = ice->pcm->device; #if 0 /* use default only */ - err = snd_ctl_add(ice->card, kctl = snd_ctl_new1(&snd_vt1724_spdif_stream, ice)); + kctl = snd_ctl_new1(&snd_vt1724_spdif_stream, ice); + kctl->id.device = ice->pcm->device; + err = snd_ctl_add(ice->card, kctl); if (err < 0) return err; - kctl->id.device = ice->pcm->device; ice->spdif.stream_ctl = kctl; #endif return 0; From 09fe05c57b5aaf23e2c35036c98ea9f282b19a77 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 5 Jun 2023 16:33:35 +0200 Subject: [PATCH 474/934] rbd: move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting Move RBD_OBJ_FLAG_COPYUP_ENABLED flag setting into the object request state machine to allow for the snapshot context to be captured in the image request state machine rather than in rbd_queue_workfn(). Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 84ad3b17956f..6c847db6ee2c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1334,14 +1334,28 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req) /* * Must be called after rbd_obj_calc_img_extents(). */ -static bool rbd_obj_copyup_enabled(struct rbd_obj_request *obj_req) +static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req) { - if (!obj_req->num_img_extents || - (rbd_obj_is_entire(obj_req) && - !obj_req->img_request->snapc->num_snaps)) - return false; + if (obj_req->img_request->op_type == OBJ_OP_DISCARD) { + dout("%s %p objno %llu discard\n", __func__, obj_req, + obj_req->ex.oe_objno); + return; + } - return true; + if (!obj_req->num_img_extents) { + dout("%s %p objno %llu not overlapping\n", __func__, obj_req, + obj_req->ex.oe_objno); + return; + } + + if (rbd_obj_is_entire(obj_req) && + !obj_req->img_request->snapc->num_snaps) { + dout("%s %p objno %llu entire\n", __func__, obj_req, + obj_req->ex.oe_objno); + return; + } + + obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED; } static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req) @@ -2233,9 +2247,6 @@ static int rbd_obj_init_write(struct rbd_obj_request *obj_req) if (ret) return ret; - if (rbd_obj_copyup_enabled(obj_req)) - obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED; - obj_req->write_state = RBD_OBJ_WRITE_START; return 0; } @@ -2341,8 +2352,6 @@ static int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req) if (ret) return ret; - if (rbd_obj_copyup_enabled(obj_req)) - obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED; if (!obj_req->num_img_extents) { obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT; if (rbd_obj_is_entire(obj_req)) @@ -3286,6 +3295,7 @@ again: case RBD_OBJ_WRITE_START: rbd_assert(!*result); + rbd_obj_set_copyup_enabled(obj_req); if (rbd_obj_write_is_noop(obj_req)) return true; From 870611e4877eff1e8413c3fb92a585e45d5291f6 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 5 Jun 2023 16:33:35 +0200 Subject: [PATCH 475/934] rbd: get snapshot context after exclusive lock is ensured to be held Move capturing the snapshot context into the image request state machine, after exclusive lock is ensured to be held for the duration of dealing with the image request. This is needed to ensure correctness of fast-diff states (OBJECT_EXISTS vs OBJECT_EXISTS_CLEAN) and object deltas computed based off of them. Otherwise the object map that is forked for the snapshot isn't guaranteed to accurately reflect the contents of the snapshot when the snapshot is taken under I/O. This breaks differential backup and snapshot-based mirroring use cases with fast-diff enabled: since some object deltas may be incomplete, the destination image may get corrupted. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/61472 Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang --- drivers/block/rbd.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6c847db6ee2c..632751ddb287 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1336,6 +1336,8 @@ static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req) */ static void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req) { + rbd_assert(obj_req->img_request->snapc); + if (obj_req->img_request->op_type == OBJ_OP_DISCARD) { dout("%s %p objno %llu discard\n", __func__, obj_req, obj_req->ex.oe_objno); @@ -1456,6 +1458,7 @@ __rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, static struct ceph_osd_request * rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops) { + rbd_assert(obj_req->img_request->snapc); return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc, num_ops); } @@ -1592,15 +1595,18 @@ static void rbd_img_request_init(struct rbd_img_request *img_request, mutex_init(&img_request->state_mutex); } +/* + * Only snap_id is captured here, for reads. For writes, snapshot + * context is captured in rbd_img_object_requests() after exclusive + * lock is ensured to be held. + */ static void rbd_img_capture_header(struct rbd_img_request *img_req) { struct rbd_device *rbd_dev = img_req->rbd_dev; lockdep_assert_held(&rbd_dev->header_rwsem); - if (rbd_img_is_write(img_req)) - img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc); - else + if (!rbd_img_is_write(img_req)) img_req->snap_id = rbd_dev->spec->snap_id; if (rbd_dev_parent_get(rbd_dev)) @@ -3482,9 +3488,19 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req) static void rbd_img_object_requests(struct rbd_img_request *img_req) { + struct rbd_device *rbd_dev = img_req->rbd_dev; struct rbd_obj_request *obj_req; rbd_assert(!img_req->pending.result && !img_req->pending.num_pending); + rbd_assert(!need_exclusive_lock(img_req) || + __rbd_is_lock_owner(rbd_dev)); + + if (rbd_img_is_write(img_req)) { + rbd_assert(!img_req->snapc); + down_read(&rbd_dev->header_rwsem); + img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc); + up_read(&rbd_dev->header_rwsem); + } for_each_obj_request(img_req, obj_req) { int result = 0; @@ -3502,7 +3518,6 @@ static void rbd_img_object_requests(struct rbd_img_request *img_req) static bool rbd_img_advance(struct rbd_img_request *img_req, int *result) { - struct rbd_device *rbd_dev = img_req->rbd_dev; int ret; again: @@ -3523,9 +3538,6 @@ again: if (*result) return true; - rbd_assert(!need_exclusive_lock(img_req) || - __rbd_is_lock_owner(rbd_dev)); - rbd_img_object_requests(img_req); if (!img_req->pending.num_pending) { *result = img_req->pending.result; @@ -3987,6 +3999,10 @@ static int rbd_post_acquire_action(struct rbd_device *rbd_dev) { int ret; + ret = rbd_dev_refresh(rbd_dev); + if (ret) + return ret; + if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) { ret = rbd_object_map_open(rbd_dev); if (ret) From c37ab22bb1a43cdca8bf69cc0a22f1ccfc449e68 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 4 Jun 2023 12:11:23 +0300 Subject: [PATCH 476/934] wifi: mac80211: use correct iftype HE cap We already check that the right iftype capa exists, but then don't use it. Assign it to a variable so we can actually use it, and then do that. Fixes: bac2fd3d7534 ("mac80211: remove use of ieee80211_get_he_sta_cap()") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230604120651.0e908e5c5fdd.Iac142549a6144ac949ebd116b921a59ae5282735@changeid Signed-off-by: Johannes Berg --- net/mac80211/he.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 729f261520c7..0322abae0825 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -3,7 +3,7 @@ * HE handling * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright(c) 2019 - 2022 Intel Corporation + * Copyright(c) 2019 - 2023 Intel Corporation */ #include "ieee80211_i.h" @@ -114,6 +114,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct link_sta_info *link_sta) { struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap; + const struct ieee80211_sta_he_cap *own_he_cap_ptr; struct ieee80211_sta_he_cap own_he_cap; struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; u8 he_ppe_size; @@ -123,12 +124,16 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, memset(he_cap, 0, sizeof(*he_cap)); - if (!he_cap_ie || - !ieee80211_get_he_iftype_cap(sband, - ieee80211_vif_type_p2p(&sdata->vif))) + if (!he_cap_ie) return; - own_he_cap = sband->iftype_data->he_cap; + own_he_cap_ptr = + ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); + if (!own_he_cap_ptr) + return; + + own_he_cap = *own_he_cap_ptr; /* Make sure size is OK */ mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem); From 727073ca5e55ab6a07df316250be8a12606e8677 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 4 Jun 2023 12:11:18 +0300 Subject: [PATCH 477/934] wifi: cfg80211: reject bad AP MLD address When trying to authenticate, if the AP MLD address isn't a valid address, mac80211 can throw a warning. Avoid that by rejecting such addresses. Fixes: d648c23024bd ("wifi: nl80211: support MLO in auth/assoc") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230604120651.89188912bd1d.I8dbc6c8ee0cb766138803eec59508ef4ce477709@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d95f8053020d..087d60c0f6e4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10723,6 +10723,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MLD_ADDR]) return -EINVAL; req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); + if (!is_valid_ether_addr(req.ap_mld_addr)) + return -EINVAL; } req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, From 68c228557d52616cf040651abefda9839de7086a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 4 Jun 2023 12:11:16 +0300 Subject: [PATCH 478/934] wifi: mac80211: mlme: fix non-inheritence element There were two bugs when creating the non-inheritence element: 1) 'at_extension' needs to be declared outside the loop, otherwise the value resets every iteration and we can never really switch properly 2) 'added' never got set to true, so we always cut off the extension element again at the end of the function This shows another issue that we might add a list but no extension list, but we need to make the extension list a zero-length one in that case. Fix all these issues. While at it, add a comment explaining the trim. Fixes: 81151ce462e5 ("wifi: mac80211: support MLO authentication/association with one link") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230604120651.3addaa5c4782.If3a78f9305997ad7ef4ba7ffc17a8234c956f613@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e13a0354c397..bd8d6f9545f5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1217,6 +1217,7 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb, const u16 *inner) { unsigned int skb_len = skb->len; + bool at_extension = false; bool added = false; int i, j; u8 *len, *list_len = NULL; @@ -1228,7 +1229,6 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb, for (i = 0; i < PRESENT_ELEMS_MAX && outer[i]; i++) { u16 elem = outer[i]; bool have_inner = false; - bool at_extension = false; /* should at least be sorted in the sense of normal -> ext */ WARN_ON(at_extension && elem < PRESENT_ELEM_EXT_OFFS); @@ -1257,8 +1257,14 @@ static void ieee80211_add_non_inheritance_elem(struct sk_buff *skb, } *list_len += 1; skb_put_u8(skb, (u8)elem); + added = true; } + /* if we added a list but no extension list, make a zero-len one */ + if (added && (!at_extension || !list_len)) + skb_put_u8(skb, 0); + + /* if nothing added remove extension element completely */ if (!added) skb_trim(skb, skb_len); else From 47c171a426e305f2225b92ed7b5e0a990c95f6d4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 4 Jun 2023 12:11:15 +0300 Subject: [PATCH 479/934] wifi: mac80211: don't translate beacon/presp addrs Don't do link address translation for beacons and probe responses, this leads to reporting multiple scan list entries for the same AP (one with the MLD address) which just breaks things. We might need to extend this in the future for some other (action) frames that aren't MLD addressed. Fixes: 42fb9148c078 ("wifi: mac80211: do link->MLD address translation on RX") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230604120651.62adead1b43a.Ifc25eed26ebf3b269f60b1ec10060156d0e7ec0d@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 58222c077898..d996aa2579df 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4965,7 +4965,9 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, } if (unlikely(rx->sta && rx->sta->sta.mlo) && - is_unicast_ether_addr(hdr->addr1)) { + is_unicast_ether_addr(hdr->addr1) && + !ieee80211_is_probe_resp(hdr->frame_control) && + !ieee80211_is_beacon(hdr->frame_control)) { /* translate to MLD addresses */ if (ether_addr_equal(link->conf->addr, hdr->addr1)) ether_addr_copy(hdr->addr1, rx->sdata->vif.addr); From 1afa18e9e72396d1e1aedd6dbb34681f2413316b Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 31 May 2023 11:50:12 +0530 Subject: [PATCH 480/934] wifi: mac80211: fix switch count in EMA beacons Currently, whenever an EMA beacon is formed, due to is_template argument being false from the caller, the switch count is always decremented once which is wrong. Also if switch count is equal to profile periodicity, this makes the switch count to reach till zero which triggers a WARN_ON_ONCE. [ 261.593915] CPU: 1 PID: 800 Comm: kworker/u8:3 Not tainted 5.4.213 #0 [ 261.616143] Hardware name: Qualcomm Technologies, Inc. IPQ9574 [ 261.622666] Workqueue: phy0 ath12k_get_link_bss_conf [ath12k] [ 261.629771] pstate: 60400005 (nZCv daif +PAN -UAO) [ 261.635595] pc : ieee80211_next_txq+0x1ac/0x1b8 [mac80211] [ 261.640282] lr : ieee80211_beacon_update_cntdwn+0x64/0xb4 [mac80211] [...] [ 261.729683] Call trace: [ 261.734986] ieee80211_next_txq+0x1ac/0x1b8 [mac80211] [ 261.737156] ieee80211_beacon_cntdwn_is_complete+0xa28/0x1194 [mac80211] [ 261.742365] ieee80211_beacon_cntdwn_is_complete+0xef4/0x1194 [mac80211] [ 261.749224] ieee80211_beacon_get_template_ema_list+0x38/0x5c [mac80211] [ 261.755908] ath12k_get_link_bss_conf+0xf8/0x33b4 [ath12k] [ 261.762590] ath12k_get_link_bss_conf+0x390/0x33b4 [ath12k] [ 261.767881] process_one_work+0x194/0x270 [ 261.773346] worker_thread+0x200/0x314 [ 261.777514] kthread+0x140/0x150 [ 261.781158] ret_from_fork+0x10/0x18 Fix this issue by making the is_template argument as true when fetching the EMA beacons. Fixes: bd54f3c29077 ("wifi: mac80211: generate EMA beacons in AP mode") Signed-off-by: Aditya Kumar Singh Link: https://lore.kernel.org/r/20230531062012.4537-1-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0d9fbc8458fd..7f1c7f67014b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5528,7 +5528,7 @@ ieee80211_beacon_get_template_ema_list(struct ieee80211_hw *hw, { struct ieee80211_ema_beacons *ema_beacons = NULL; - WARN_ON(__ieee80211_beacon_get(hw, vif, NULL, false, link_id, 0, + WARN_ON(__ieee80211_beacon_get(hw, vif, NULL, true, link_id, 0, &ema_beacons)); return ema_beacons; From 7a4615b9a9da5225b22b36a20508555dd133ac24 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 2 Jun 2023 13:42:47 -0600 Subject: [PATCH 481/934] wifi: iwlwifi: mvm: Fix -Warray-bounds bug in iwl_mvm_wait_d3_notif() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kmemdup() at line 2735 is not duplicating enough memory for notif->tid_tear_down and notif->station_id. As it only duplicates 612 bytes: up to offsetofend(struct iwl_wowlan_info_notif, received_beacons), this is the range of [0, 612) bytes. 2735 notif = kmemdup(notif_v1, 2736 offsetofend(struct iwl_wowlan_info_notif, 2737 received_beacons), 2738 GFP_ATOMIC); which evidently does not cover bytes 612 and 613 for members tid_tear_down and station_id in struct iwl_wowlan_info_notif. See below: $ pahole -C iwl_wowlan_info_notif drivers/net/wireless/intel/iwlwifi/mvm/d3.o struct iwl_wowlan_info_notif { struct iwl_wowlan_gtk_status_v3 gtk[2]; /* 0 488 */ /* --- cacheline 7 boundary (448 bytes) was 40 bytes ago --- */ struct iwl_wowlan_igtk_status igtk[2]; /* 488 80 */ /* --- cacheline 8 boundary (512 bytes) was 56 bytes ago --- */ __le64 replay_ctr; /* 568 8 */ /* --- cacheline 9 boundary (576 bytes) --- */ __le16 pattern_number; /* 576 2 */ __le16 reserved1; /* 578 2 */ __le16 qos_seq_ctr[8]; /* 580 16 */ __le32 wakeup_reasons; /* 596 4 */ __le32 num_of_gtk_rekeys; /* 600 4 */ __le32 transmitted_ndps; /* 604 4 */ __le32 received_beacons; /* 608 4 */ u8 tid_tear_down; /* 612 1 */ u8 station_id; /* 613 1 */ u8 reserved2[2]; /* 614 2 */ /* size: 616, cachelines: 10, members: 13 */ /* last cacheline: 40 bytes */ }; Therefore, when the following assignments take place, actually no memory has been allocated for those objects: 2743 notif->tid_tear_down = notif_v1->tid_tear_down; 2744 notif->station_id = notif_v1->station_id; Fix this by allocating space for the whole notif object and zero out the remaining space in memory after member station_id. This also fixes the following -Warray-bounds issues: CC drivers/net/wireless/intel/iwlwifi/mvm/d3.o drivers/net/wireless/intel/iwlwifi/mvm/d3.c: In function ‘iwl_mvm_wait_d3_notif’: drivers/net/wireless/intel/iwlwifi/mvm/d3.c:2743:30: warning: array subscript ‘struct iwl_wowlan_info_notif[0]’ is partly outside array bounds of ‘unsigned char[612]’ [-Warray-bounds=] 2743 | notif->tid_tear_down = notif_v1->tid_tear_down; | from drivers/net/wireless/intel/iwlwifi/mvm/d3.c:7: In function ‘kmemdup’, inlined from ‘iwl_mvm_wait_d3_notif’ at drivers/net/wireless/intel/iwlwifi/mvm/d3.c:2735:12: include/linux/fortify-string.h:765:16: note: object of size 612 allocated by ‘__real_kmemdup’ 765 | return __real_kmemdup(p, size, gfp); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/wireless/intel/iwlwifi/mvm/d3.c: In function ‘iwl_mvm_wait_d3_notif’: drivers/net/wireless/intel/iwlwifi/mvm/d3.c:2744:30: warning: array subscript ‘struct iwl_wowlan_info_notif[0]’ is partly outside array bounds of ‘unsigned char[612]’ [-Warray-bounds=] 2744 | notif->station_id = notif_v1->station_id; | ^~ In function ‘kmemdup’, inlined from ‘iwl_mvm_wait_d3_notif’ at drivers/net/wireless/intel/iwlwifi/mvm/d3.c:2735:12: include/linux/fortify-string.h:765:16: note: object of size 612 allocated by ‘__real_kmemdup’ 765 | return __real_kmemdup(p, size, gfp); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ Link: https://github.com/KSPP/linux/issues/306 Fixes: 905d50ddbc83 ("wifi: iwlwifi: mvm: support wowlan info notification version 2") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Acked-by: Gregory Greenman Link: https://lore.kernel.org/r/ZHpGN555FwAKGduH@work Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 37aa4676dc94..6d1007f24b4a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2732,17 +2732,13 @@ static bool iwl_mvm_wait_d3_notif(struct iwl_notif_wait_data *notif_wait, if (wowlan_info_ver < 2) { struct iwl_wowlan_info_notif_v1 *notif_v1 = (void *)pkt->data; - notif = kmemdup(notif_v1, - offsetofend(struct iwl_wowlan_info_notif, - received_beacons), - GFP_ATOMIC); - + notif = kmemdup(notif_v1, sizeof(*notif), GFP_ATOMIC); if (!notif) return false; notif->tid_tear_down = notif_v1->tid_tear_down; notif->station_id = notif_v1->station_id; - + memset_after(notif, 0, station_id); } else { notif = (void *)pkt->data; } From 42510dffd0e2c27046905f742172ed6662af5557 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 5 Jun 2023 16:56:00 +0530 Subject: [PATCH 482/934] qed/qede: Fix scheduling while atomic Statistics read through bond interface via sysfs causes below bug and traces as it triggers the bonding module to collect the slave device statistics while holding the spinlock, beneath that qede->qed driver statistics flow gets scheduled out due to usleep_range() used in PTT acquire logic [ 3673.988874] Hardware name: HPE ProLiant DL365 Gen10 Plus/ProLiant DL365 Gen10 Plus, BIOS A42 10/29/2021 [ 3673.988878] Call Trace: [ 3673.988891] dump_stack_lvl+0x34/0x44 [ 3673.988908] __schedule_bug.cold+0x47/0x53 [ 3673.988918] __schedule+0x3fb/0x560 [ 3673.988929] schedule+0x43/0xb0 [ 3673.988932] schedule_hrtimeout_range_clock+0xbf/0x1b0 [ 3673.988937] ? __hrtimer_init+0xc0/0xc0 [ 3673.988950] usleep_range+0x5e/0x80 [ 3673.988955] qed_ptt_acquire+0x2b/0xd0 [qed] [ 3673.988981] _qed_get_vport_stats+0x141/0x240 [qed] [ 3673.989001] qed_get_vport_stats+0x18/0x80 [qed] [ 3673.989016] qede_fill_by_demand_stats+0x37/0x400 [qede] [ 3673.989028] qede_get_stats64+0x19/0xe0 [qede] [ 3673.989034] dev_get_stats+0x5c/0xc0 [ 3673.989045] netstat_show.constprop.0+0x52/0xb0 [ 3673.989055] dev_attr_show+0x19/0x40 [ 3673.989065] sysfs_kf_seq_show+0x9b/0xf0 [ 3673.989076] seq_read_iter+0x120/0x4b0 [ 3673.989087] new_sync_read+0x118/0x1a0 [ 3673.989095] vfs_read+0xf3/0x180 [ 3673.989099] ksys_read+0x5f/0xe0 [ 3673.989102] do_syscall_64+0x3b/0x90 [ 3673.989109] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 3673.989115] RIP: 0033:0x7f8467d0b082 [ 3673.989119] Code: c0 e9 b2 fe ff ff 50 48 8d 3d ca 05 08 00 e8 35 e7 01 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 [ 3673.989121] RSP: 002b:00007ffffb21fd08 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 3673.989127] RAX: ffffffffffffffda RBX: 000000000100eca0 RCX: 00007f8467d0b082 [ 3673.989128] RDX: 00000000000003ff RSI: 00007ffffb21fdc0 RDI: 0000000000000003 [ 3673.989130] RBP: 00007f8467b96028 R08: 0000000000000010 R09: 00007ffffb21ec00 [ 3673.989132] R10: 00007ffffb27b170 R11: 0000000000000246 R12: 00000000000000f0 [ 3673.989134] R13: 0000000000000003 R14: 00007f8467b92000 R15: 0000000000045a05 [ 3673.989139] CPU: 30 PID: 285188 Comm: read_all Kdump: loaded Tainted: G W OE Fix this by collecting the statistics asynchronously from a periodic delayed work scheduled at default stats coalescing interval and return the recent copy of statisitcs from .ndo_get_stats64(), also add ability to configure/retrieve stats coalescing interval using below commands - ethtool -C ethx stats-block-usecs ethtool -c ethx Fixes: 133fac0eedc3 ("qede: Add basic ethtool support") Cc: Sudarsana Kalluru Cc: David Miller Signed-off-by: Manish Chopra Link: https://lore.kernel.org/r/20230605112600.48238-1-manishc@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 2 +- drivers/net/ethernet/qlogic/qede/qede.h | 4 +++ .../net/ethernet/qlogic/qede/qede_ethtool.c | 24 +++++++++++-- drivers/net/ethernet/qlogic/qede/qede_main.c | 34 ++++++++++++++++++- 4 files changed, 60 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 2edd6bf64a3c..7776d3bdd459 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1903,7 +1903,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats) { u32 i; - if (!cdev) { + if (!cdev || cdev->recov_in_prog) { memset(stats, 0, sizeof(*stats)); return; } diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index f9931ecb7baa..4d83ceebdc49 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -269,6 +269,10 @@ struct qede_dev { #define QEDE_ERR_WARN 3 struct qede_dump_info dump_info; + struct delayed_work periodic_task; + unsigned long stats_coal_ticks; + u32 stats_coal_usecs; + spinlock_t stats_lock; /* lock for vport stats access */ }; enum QEDE_STATE { diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 374a86b875a3..95820cf1cd6c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -429,6 +429,8 @@ static void qede_get_ethtool_stats(struct net_device *dev, } } + spin_lock(&edev->stats_lock); + for (i = 0; i < QEDE_NUM_STATS; i++) { if (qede_is_irrelevant_stat(edev, i)) continue; @@ -438,6 +440,8 @@ static void qede_get_ethtool_stats(struct net_device *dev, buf++; } + spin_unlock(&edev->stats_lock); + __qede_unlock(edev); } @@ -829,6 +833,7 @@ out: coal->rx_coalesce_usecs = rx_coal; coal->tx_coalesce_usecs = tx_coal; + coal->stats_block_coalesce_usecs = edev->stats_coal_usecs; return rc; } @@ -842,6 +847,19 @@ int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal, int i, rc = 0; u16 rxc, txc; + if (edev->stats_coal_usecs != coal->stats_block_coalesce_usecs) { + edev->stats_coal_usecs = coal->stats_block_coalesce_usecs; + if (edev->stats_coal_usecs) { + edev->stats_coal_ticks = usecs_to_jiffies(edev->stats_coal_usecs); + schedule_delayed_work(&edev->periodic_task, 0); + + DP_INFO(edev, "Configured stats coal ticks=%lu jiffies\n", + edev->stats_coal_ticks); + } else { + cancel_delayed_work_sync(&edev->periodic_task); + } + } + if (!netif_running(dev)) { DP_INFO(edev, "Interface is down\n"); return -EINVAL; @@ -2252,7 +2270,8 @@ out: } static const struct ethtool_ops qede_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_STATS_BLOCK_USECS, .get_link_ksettings = qede_get_link_ksettings, .set_link_ksettings = qede_set_link_ksettings, .get_drvinfo = qede_get_drvinfo, @@ -2303,7 +2322,8 @@ static const struct ethtool_ops qede_ethtool_ops = { }; static const struct ethtool_ops qede_vf_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_STATS_BLOCK_USECS, .get_link_ksettings = qede_get_link_ksettings, .get_drvinfo = qede_get_drvinfo, .get_msglevel = qede_get_msglevel, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 4c6c685820e3..4b004a728190 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -307,6 +307,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) edev->ops->get_vport_stats(edev->cdev, &stats); + spin_lock(&edev->stats_lock); + p_common->no_buff_discards = stats.common.no_buff_discards; p_common->packet_too_big_discard = stats.common.packet_too_big_discard; p_common->ttl0_discard = stats.common.ttl0_discard; @@ -404,6 +406,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) p_ah->tx_1519_to_max_byte_packets = stats.ah.tx_1519_to_max_byte_packets; } + + spin_unlock(&edev->stats_lock); } static void qede_get_stats64(struct net_device *dev, @@ -412,9 +416,10 @@ static void qede_get_stats64(struct net_device *dev, struct qede_dev *edev = netdev_priv(dev); struct qede_stats_common *p_common; - qede_fill_by_demand_stats(edev); p_common = &edev->stats.common; + spin_lock(&edev->stats_lock); + stats->rx_packets = p_common->rx_ucast_pkts + p_common->rx_mcast_pkts + p_common->rx_bcast_pkts; stats->tx_packets = p_common->tx_ucast_pkts + p_common->tx_mcast_pkts + @@ -434,6 +439,8 @@ static void qede_get_stats64(struct net_device *dev, stats->collisions = edev->stats.bb.tx_total_collisions; stats->rx_crc_errors = p_common->rx_crc_errors; stats->rx_frame_errors = p_common->rx_align_errors; + + spin_unlock(&edev->stats_lock); } #ifdef CONFIG_QED_SRIOV @@ -1063,6 +1070,23 @@ static void qede_unlock(struct qede_dev *edev) rtnl_unlock(); } +static void qede_periodic_task(struct work_struct *work) +{ + struct qede_dev *edev = container_of(work, struct qede_dev, + periodic_task.work); + + qede_fill_by_demand_stats(edev); + schedule_delayed_work(&edev->periodic_task, edev->stats_coal_ticks); +} + +static void qede_init_periodic_task(struct qede_dev *edev) +{ + INIT_DELAYED_WORK(&edev->periodic_task, qede_periodic_task); + spin_lock_init(&edev->stats_lock); + edev->stats_coal_usecs = USEC_PER_SEC; + edev->stats_coal_ticks = usecs_to_jiffies(USEC_PER_SEC); +} + static void qede_sp_task(struct work_struct *work) { struct qede_dev *edev = container_of(work, struct qede_dev, @@ -1082,6 +1106,7 @@ static void qede_sp_task(struct work_struct *work) */ if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) { + cancel_delayed_work_sync(&edev->periodic_task); #ifdef CONFIG_QED_SRIOV /* SRIOV must be disabled outside the lock to avoid a deadlock. * The recovery of the active VFs is currently not supported. @@ -1272,6 +1297,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, */ INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); mutex_init(&edev->qede_lock); + qede_init_periodic_task(edev); rc = register_netdev(edev->ndev); if (rc) { @@ -1296,6 +1322,11 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, edev->rx_copybreak = QEDE_RX_HDR_SIZE; qede_log_probe(edev); + + /* retain user config (for example - after recovery) */ + if (edev->stats_coal_usecs) + schedule_delayed_work(&edev->periodic_task, 0); + return 0; err4: @@ -1364,6 +1395,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) unregister_netdev(ndev); cancel_delayed_work_sync(&edev->sp_task); + cancel_delayed_work_sync(&edev->periodic_task); edev->ops->common->set_power_state(cdev, PCI_D0); From 50d30040eb856ff6b0382b4d9dc332dc15597729 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 25 May 2023 21:45:19 -0700 Subject: [PATCH 483/934] accel/ivpu: ivpu_ipc needs GENERIC_ALLOCATOR Drivers that use the gen_pool*() family of functions should select GENERIC_ALLOCATOR to prevent build errors like these: ld: drivers/accel/ivpu/ivpu_ipc.o: in function `gen_pool_free': include/linux/genalloc.h:172: undefined reference to `gen_pool_free_owner' ld: drivers/accel/ivpu/ivpu_ipc.o: in function `gen_pool_alloc_algo': include/linux/genalloc.h:138: undefined reference to `gen_pool_alloc_algo_owner' ld: drivers/accel/ivpu/ivpu_ipc.o: in function `gen_pool_free': include/linux/genalloc.h:172: undefined reference to `gen_pool_free_owner' ld: drivers/accel/ivpu/ivpu_ipc.o: in function `ivpu_ipc_init': drivers/accel/ivpu/ivpu_ipc.c:441: undefined reference to `devm_gen_pool_create' ld: drivers/accel/ivpu/ivpu_ipc.o: in function `gen_pool_add_virt': include/linux/genalloc.h:104: undefined reference to `gen_pool_add_owner' Fixes: 5d7422cfb498 ("accel/ivpu: Add IPC driver and JSM messages") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: https://lore.kernel.org/all/202305221206.1TaugDKP-lkp@intel.com/ Cc: Oded Gabbay Cc: dri-devel@lists.freedesktop.org Cc: Jacek Lawrynowicz Cc: Stanislaw Gruszka Cc: Andrzej Kacprowski Cc: Krystian Pradzynski Cc: Jeffrey Hugo Cc: Daniel Vetter Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230526044519.13441-1-rdunlap@infradead.org --- drivers/accel/ivpu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig index 9bdf168bf1d0..1a4c4ed9d113 100644 --- a/drivers/accel/ivpu/Kconfig +++ b/drivers/accel/ivpu/Kconfig @@ -7,6 +7,7 @@ config DRM_ACCEL_IVPU depends on PCI && PCI_MSI select FW_LOADER select SHMEM + select GENERIC_ALLOCATOR help Choose this option if you have a system that has an 14th generation Intel CPU or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated From c9b83ae4a1609b1914ba7fc70826a3f3a8b234db Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Jun 2023 11:38:52 +0200 Subject: [PATCH 484/934] ALSA: ymfpci: Fix kctl->id initialization ymfpci driver replaces the kctl->id.device after assigning the kctl via snd_ctl_add(). This doesn't work any longer with the new Xarray lookup change. It has to be set before snd_ctl_add() call instead. Fixes: c27e1efb61c5 ("ALSA: control: Use xarray for faster lookups") Cc: Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230606093855.14685-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/ymfpci/ymfpci_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c index 6971eec45a4d..6b8d8690b6b2 100644 --- a/sound/pci/ymfpci/ymfpci_main.c +++ b/sound/pci/ymfpci/ymfpci_main.c @@ -1822,20 +1822,20 @@ int snd_ymfpci_mixer(struct snd_ymfpci *chip, int rear_switch) if (snd_BUG_ON(!chip->pcm_spdif)) return -ENXIO; kctl = snd_ctl_new1(&snd_ymfpci_spdif_default, chip); + kctl->id.device = chip->pcm_spdif->device; err = snd_ctl_add(chip->card, kctl); if (err < 0) return err; - kctl->id.device = chip->pcm_spdif->device; kctl = snd_ctl_new1(&snd_ymfpci_spdif_mask, chip); + kctl->id.device = chip->pcm_spdif->device; err = snd_ctl_add(chip->card, kctl); if (err < 0) return err; - kctl->id.device = chip->pcm_spdif->device; kctl = snd_ctl_new1(&snd_ymfpci_spdif_stream, chip); + kctl->id.device = chip->pcm_spdif->device; err = snd_ctl_add(chip->card, kctl); if (err < 0) return err; - kctl->id.device = chip->pcm_spdif->device; chip->spdif_pcm_ctl = kctl; /* direct recording source */ From f2f312ad88c68a7f4a7789b9269ae33af3c7c7e9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Jun 2023 11:38:53 +0200 Subject: [PATCH 485/934] ALSA: cmipci: Fix kctl->id initialization cmipci driver replaces the kctl->id.device after assigning the kctl via snd_ctl_add(). This doesn't work any longer with the new Xarray lookup change. It has to be set before snd_ctl_add() call instead. Fixes: c27e1efb61c5 ("ALSA: control: Use xarray for faster lookups") Cc: Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230606093855.14685-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/cmipci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c index 727db6d43391..6d25c12d9ef0 100644 --- a/sound/pci/cmipci.c +++ b/sound/pci/cmipci.c @@ -2688,20 +2688,20 @@ static int snd_cmipci_mixer_new(struct cmipci *cm, int pcm_spdif_device) } if (cm->can_ac3_hw) { kctl = snd_ctl_new1(&snd_cmipci_spdif_default, cm); + kctl->id.device = pcm_spdif_device; err = snd_ctl_add(card, kctl); if (err < 0) return err; - kctl->id.device = pcm_spdif_device; kctl = snd_ctl_new1(&snd_cmipci_spdif_mask, cm); + kctl->id.device = pcm_spdif_device; err = snd_ctl_add(card, kctl); if (err < 0) return err; - kctl->id.device = pcm_spdif_device; kctl = snd_ctl_new1(&snd_cmipci_spdif_stream, cm); + kctl->id.device = pcm_spdif_device; err = snd_ctl_add(card, kctl); if (err < 0) return err; - kctl->id.device = pcm_spdif_device; } if (cm->chip_version <= 37) { sw = snd_cmipci_old_mixer_switches; From c5ae57b1bb99bd6f50b90428fabde397c2aeba0f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Jun 2023 11:38:54 +0200 Subject: [PATCH 486/934] ALSA: gus: Fix kctl->id initialization GUS driver replaces the kctl->id.index after assigning the kctl via snd_ctl_add(). This doesn't work any longer with the new Xarray lookup change. It has to be set before snd_ctl_add() call instead. Fixes: c27e1efb61c5 ("ALSA: control: Use xarray for faster lookups") Cc: Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230606093855.14685-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/isa/gus/gus_pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/isa/gus/gus_pcm.c b/sound/isa/gus/gus_pcm.c index 230f65a0e4b0..388db5fb65bd 100644 --- a/sound/isa/gus/gus_pcm.c +++ b/sound/isa/gus/gus_pcm.c @@ -892,10 +892,10 @@ int snd_gf1_pcm_new(struct snd_gus_card *gus, int pcm_dev, int control_index) kctl = snd_ctl_new1(&snd_gf1_pcm_volume_control1, gus); else kctl = snd_ctl_new1(&snd_gf1_pcm_volume_control, gus); + kctl->id.index = control_index; err = snd_ctl_add(card, kctl); if (err < 0) return err; - kctl->id.index = control_index; return 0; } From 5c219a340850233aecbb444af964653ecd3d1370 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Jun 2023 11:38:55 +0200 Subject: [PATCH 487/934] ALSA: hda: Fix kctl->id initialization HD-audio core code replaces the kctl->id.index of SPDIF-related controls after assigning via snd_ctl_add(). This doesn't work any longer with the new Xarray lookup change. The change of the kctl->id content has to be done via snd_ctl_rename_id() helper, instead. Fixes: c27e1efb61c5 ("ALSA: control: Use xarray for faster lookups") Cc: Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230606093855.14685-5-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 9f79c0ac2bda..bd19f92aeeec 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2458,10 +2458,14 @@ int snd_hda_create_dig_out_ctls(struct hda_codec *codec, type == HDA_PCM_TYPE_HDMI) { /* suppose a single SPDIF device */ for (dig_mix = dig_mixes; dig_mix->name; dig_mix++) { + struct snd_ctl_elem_id id; + kctl = find_mixer_ctl(codec, dig_mix->name, 0, 0); if (!kctl) break; - kctl->id.index = spdif_index; + id = kctl->id; + id.index = spdif_index; + snd_ctl_rename_id(codec->card, &kctl->id, &id); } bus->primary_dig_out_type = HDA_PCM_TYPE_HDMI; } From 3aa0519a4780f1b8e11966bd879d4a2934ba455f Mon Sep 17 00:00:00 2001 From: Balint Dobszay Date: Thu, 1 Jun 2023 16:07:49 +0200 Subject: [PATCH 488/934] firmware: arm_ffa: Set handle field to zero in memory descriptor As described in the commit 111a833dc5cb ("firmware: arm_ffa: Set reserved/MBZ fields to zero in the memory descriptors") some fields in the memory descriptor have to be zeroed explicitly. The handle field is one of these, but it was left out from that change, fix this now. Fixes: 111a833dc5cb ("firmware: arm_ffa: Set reserved/MBZ fields to zero in the memory descriptors") Reported-by: Imre Kis Signed-off-by: Balint Dobszay Link: https://lore.kernel.org/r/20230601140749.93812-1-balint.dobszay@arm.com Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index e23409138667..2109cd178ff7 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -424,6 +424,7 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, ep_mem_access->flag = 0; ep_mem_access->reserved = 0; } + mem_region->handle = 0; mem_region->reserved_0 = 0; mem_region->reserved_1 = 0; mem_region->ep_count = args->nattrs; From 411360257c1f4fccaa20143098b6d3fcc9d4e4dc Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 13 Apr 2023 08:38:10 +0200 Subject: [PATCH 489/934] accel/ivpu: Reserve all non-command bo's using DMA_RESV_USAGE_BOOKKEEP Use DMA_RESV_USAGE_BOOKKEEP reservation for buffer objects, except for command buffers for which we use DMA_RESV_USAGE_WRITE (since VPU can write to command buffer context save area). Fixes: 0ec8671837a6 ("accel/ivpu: Fix S3 system suspend when not idle") Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230413063810.3167511-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 3c6f1e16cf2f..d45be0615b47 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -431,6 +431,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 struct ivpu_file_priv *file_priv = file->driver_priv; struct ivpu_device *vdev = file_priv->vdev; struct ww_acquire_ctx acquire_ctx; + enum dma_resv_usage usage; struct ivpu_bo *bo; int ret; u32 i; @@ -461,22 +462,28 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; - ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx); + ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, + &acquire_ctx); if (ret) { ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret); return ret; } - ret = dma_resv_reserve_fences(bo->base.resv, 1); - if (ret) { - ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); - goto unlock_reservations; + for (i = 0; i < buf_count; i++) { + ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1); + if (ret) { + ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); + goto unlock_reservations; + } } - dma_resv_add_fence(bo->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); + for (i = 0; i < buf_count; i++) { + usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP; + dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, usage); + } unlock_reservations: - drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, 1, &acquire_ctx); + drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); wmb(); /* Flush write combining buffers */ From 306320034e8fbe7ee1cc4f5269c55658b4612048 Mon Sep 17 00:00:00 2001 From: Bernhard Seibold Date: Fri, 2 Jun 2023 15:30:29 +0200 Subject: [PATCH 490/934] serial: lantiq: add missing interrupt ack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the error interrupt is never acknowledged, so once active it will stay active indefinitely, causing the handler to be called in an infinite loop. Fixes: 2f0fc4159a6a ("SERIAL: Lantiq: Add driver for MIPS Lantiq SOCs.") Cc: Signed-off-by: Bernhard Seibold Reviewed-by: Ilpo Järvinen Message-ID: <20230602133029.546-1-mail@bernhard-seibold.de> Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/lantiq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c index a58e9277dfad..f1387f1024db 100644 --- a/drivers/tty/serial/lantiq.c +++ b/drivers/tty/serial/lantiq.c @@ -250,6 +250,7 @@ lqasc_err_int(int irq, void *_port) struct ltq_uart_port *ltq_port = to_ltq_uart_port(port); spin_lock_irqsave(<q_port->lock, flags); + __raw_writel(ASC_IRNCR_EIR, port->membase + LTQ_ASC_IRNCR); /* clear any pending interrupts */ asc_update_bits(0, ASCWHBSTATE_CLRPE | ASCWHBSTATE_CLRFE | ASCWHBSTATE_CLRROE, port->membase + LTQ_ASC_WHBSTATE); From 3e54ed8247c94c8bdf370bd872bd9dfe72b1b12b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Jun 2023 14:34:47 +0200 Subject: [PATCH 491/934] wifi: cfg80211: fix locking in sched scan stop work This should use wiphy_lock() now instead of acquiring the RTNL, since cfg80211_stop_sched_scan_req() now needs that. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Signed-off-by: Johannes Berg --- net/wireless/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 5b0c4d5b80cf..b3ec9eaec36b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -368,12 +368,12 @@ static void cfg80211_sched_scan_stop_wk(struct work_struct *work) rdev = container_of(work, struct cfg80211_registered_device, sched_scan_stop_wk); - rtnl_lock(); + wiphy_lock(&rdev->wiphy); list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) { if (req->nl_owner_dead) cfg80211_stop_sched_scan_req(rdev, req, false); } - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); } static void cfg80211_propagate_radar_detect_wk(struct work_struct *work) From f7e60032c6618dfd643c7210d5cba2789e2de2e2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Jun 2023 14:34:48 +0200 Subject: [PATCH 492/934] wifi: cfg80211: fix locking in regulatory disconnect This should use wiphy_lock() now instead of requiring the RTNL, since __cfg80211_leave() via cfg80211_leave() is now requiring that lock to be held. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Signed-off-by: Johannes Berg --- net/wireless/reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0d40d6af7e10..949e1fb3bec6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2440,11 +2440,11 @@ static void reg_leave_invalid_chans(struct wiphy *wiphy) struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - ASSERT_RTNL(); - + wiphy_lock(wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) if (!reg_wdev_chan_valid(wiphy, wdev)) cfg80211_leave(rdev, wdev); + wiphy_unlock(wiphy); } static void reg_check_chans_work(struct work_struct *work) From 947c70a213769f60e9d5aca2bc88b50a1cfaf5a6 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 6 Jun 2023 17:38:59 +0800 Subject: [PATCH 493/934] spi: cadence-quadspi: Add missing check for dma_set_mask Add check for dma_set_mask() and return the error if it fails. Fixes: 1a6f854f7daa ("spi: cadence-quadspi: Add Xilinx Versal external DMA support") Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/r/20230606093859.27818-1-jiasheng@iscas.ac.cn Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 6ddb2dfc0f00..32449bef4415 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1756,8 +1756,11 @@ static int cqspi_probe(struct platform_device *pdev) cqspi->slow_sram = true; if (of_device_is_compatible(pdev->dev.of_node, - "xlnx,versal-ospi-1.0")) - dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + "xlnx,versal-ospi-1.0")) { + ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) + goto probe_reset_failed; + } } ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0, From 95ae9979bfe3174c2ee8d64409c44532f2881907 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Tue, 6 Jun 2023 20:00:34 +0800 Subject: [PATCH 494/934] gpio: sim: fix memory corruption when adding named lines and unnamed hogs When constructing the sim, gpio-sim constructs an array of named lines, sized based on the largest offset of any named line, and then initializes that array with the names of all lines, including unnamed hogs with higher offsets. In doing so it writes NULLs beyond the extent of the array. Add a check that only named lines are used to initialize the array. Fixes: cb8c474e79be ("gpio: sim: new testing module") Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sim.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index a1c8702f362c..fab67a5785d7 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -721,8 +721,10 @@ static char **gpio_sim_make_line_names(struct gpio_sim_bank *bank, if (!line_names) return ERR_PTR(-ENOMEM); - list_for_each_entry(line, &bank->line_list, siblings) - line_names[line->offset] = line->name; + list_for_each_entry(line, &bank->line_list, siblings) { + if (line->name && (line->offset <= max_offset)) + line_names[line->offset] = line->name; + } return line_names; } From b752a385b584d385683c65cb76a1298f1379a88c Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Tue, 6 Jun 2023 22:57:47 +0800 Subject: [PATCH 495/934] ALSA: hda/realtek: Enable 4 amplifiers instead of 2 on a HP platform In the commit 7bb62340951a ("ALSA: hda/realtek: fix speaker, mute/micmute LEDs not work on a HP platform"), speakers and LEDs are fixed but only 2 CS35L41 amplifiers on SPI bus connected to Realtek codec are enabled. Need the ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED to get all amplifiers working. Signed-off-by: Chris Chiu Fixes: 7bb62340951a ("ALSA: hda/realtek: fix speaker, mute/micmute LEDs not work on a HP platform") Cc: Link: https://lore.kernel.org/r/20230606145747.135966-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 699167b9175f..a5d55a7063d3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9500,7 +9500,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8b8a, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b8b, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b8d, "HP", ALC236_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8b8f, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b8f, "HP", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b92, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b97, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), From c5c31fb71f16ba75bad4ade208abbae225305b65 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 30 May 2023 01:34:02 +0300 Subject: [PATCH 496/934] spi: fsl-dspi: avoid SCK glitches with continuous transfers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DSPI controller has configurable timing for (a) tCSC: the interval between the assertion of the chip select and the first clock edge (b) tASC: the interval between the last clock edge and the deassertion of the chip select What is a bit surprising, but is documented in the figure "Example of continuous transfer (CPHA=1, CONT=1)" in the datasheet, is that when the chip select stays asserted between multiple TX FIFO writes, the tCSC and tASC times still apply. With CONT=1, chip select remains asserted, but SCK takes a break and goes to the idle state for tASC + tCSC ns. In other words, the default values (of 0 and 0 ns) result in SCK glitches where the SCK transition to the idle state, as well as the SCK transition from the idle state, will have no delay in between, and it may appear that a SCK cycle has simply gone missing. The resulting timing violation might cause data corruption in many peripherals, as their chip select is asserted. The driver has device tree bindings for tCSC ("fsl,spi-cs-sck-delay") and tASC ("fsl,spi-sck-cs-delay"), but these are only specified to apply when the chip select toggles in the first place, and this timing characteristic depends on each peripheral. Many peripherals do not have explicit timing requirements, so many device trees do not have these properties present at all. Nonetheless, the lack of SCK glitches is a common sense requirement, and since the SCK stays in the idle state during transfers for tCSC+tASC ns, and that in itself should look like half a cycle, then let's ensure that tCSC and tASC are at least a quarter of a SCK period, such that their sum is at least half of one. Fixes: 95bf15f38641 ("spi: fsl-dspi: Add ~50ns delay between cs and sck") Reported-by: Lisa Chen (陈敏捷) Debugged-by: Lisa Chen (陈敏捷) Tested-by: Lisa Chen (陈敏捷) Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230529223402.1199503-1-vladimir.oltean@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 4339485d202c..674cfe05f411 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1002,7 +1002,9 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr, static int dspi_setup(struct spi_device *spi) { struct fsl_dspi *dspi = spi_controller_get_devdata(spi->controller); + u32 period_ns = DIV_ROUND_UP(NSEC_PER_SEC, spi->max_speed_hz); unsigned char br = 0, pbr = 0, pcssck = 0, cssck = 0; + u32 quarter_period_ns = DIV_ROUND_UP(period_ns, 4); u32 cs_sck_delay = 0, sck_cs_delay = 0; struct fsl_dspi_platform_data *pdata; unsigned char pasc = 0, asc = 0; @@ -1031,6 +1033,19 @@ static int dspi_setup(struct spi_device *spi) sck_cs_delay = pdata->sck_cs_delay; } + /* Since tCSC and tASC apply to continuous transfers too, avoid SCK + * glitches of half a cycle by never allowing tCSC + tASC to go below + * half a SCK period. + */ + if (cs_sck_delay < quarter_period_ns) + cs_sck_delay = quarter_period_ns; + if (sck_cs_delay < quarter_period_ns) + sck_cs_delay = quarter_period_ns; + + dev_dbg(&spi->dev, + "DSPI controller timing params: CS-to-SCK delay %u ns, SCK-to-CS delay %u ns\n", + cs_sck_delay, sck_cs_delay); + clkrate = clk_get_rate(dspi->clk); hz_to_spi_baud(&pbr, &br, spi->max_speed_hz, clkrate); From 568a67e742dfa90b19a23305317164c5c350b71e Mon Sep 17 00:00:00 2001 From: Andrew Powers-Holmes Date: Thu, 1 Jun 2023 15:25:16 +0200 Subject: [PATCH 497/934] arm64: dts: rockchip: Fix rk356x PCIe register and range mappings The register and range mappings for the PCIe controller in Rockchip's RK356x SoCs are incorrect. Replace them with corrected values from the vendor BSP sources, updated to match current DT schema. These values are also used in u-boot. Fixes: 66b51ea7d70f ("arm64: dts: rockchip: Add rk3568 PCIe2x1 controller") Cc: stable@vger.kernel.org Signed-off-by: Andrew Powers-Holmes Signed-off-by: Jonas Karlman Signed-off-by: Nicolas Frattaroli Tested-by: Diederik de Haas Link: https://lore.kernel.org/r/20230601132516.153934-1-frattaroli.nicolas@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3568.dtsi | 14 ++++++++------ arch/arm64/boot/dts/rockchip/rk356x.dtsi | 7 ++++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3568.dtsi b/arch/arm64/boot/dts/rockchip/rk3568.dtsi index ba67b58f05b7..f1be76a54ceb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568.dtsi @@ -94,9 +94,10 @@ power-domains = <&power RK3568_PD_PIPE>; reg = <0x3 0xc0400000 0x0 0x00400000>, <0x0 0xfe270000 0x0 0x00010000>, - <0x3 0x7f000000 0x0 0x01000000>; - ranges = <0x01000000 0x0 0x3ef00000 0x3 0x7ef00000 0x0 0x00100000>, - <0x02000000 0x0 0x00000000 0x3 0x40000000 0x0 0x3ef00000>; + <0x0 0xf2000000 0x0 0x00100000>; + ranges = <0x01000000 0x0 0xf2100000 0x0 0xf2100000 0x0 0x00100000>, + <0x02000000 0x0 0xf2200000 0x0 0xf2200000 0x0 0x01e00000>, + <0x03000000 0x0 0x40000000 0x3 0x40000000 0x0 0x40000000>; reg-names = "dbi", "apb", "config"; resets = <&cru SRST_PCIE30X1_POWERUP>; reset-names = "pipe"; @@ -146,9 +147,10 @@ power-domains = <&power RK3568_PD_PIPE>; reg = <0x3 0xc0800000 0x0 0x00400000>, <0x0 0xfe280000 0x0 0x00010000>, - <0x3 0xbf000000 0x0 0x01000000>; - ranges = <0x01000000 0x0 0x3ef00000 0x3 0xbef00000 0x0 0x00100000>, - <0x02000000 0x0 0x00000000 0x3 0x80000000 0x0 0x3ef00000>; + <0x0 0xf0000000 0x0 0x00100000>; + ranges = <0x01000000 0x0 0xf0100000 0x0 0xf0100000 0x0 0x00100000>, + <0x02000000 0x0 0xf0200000 0x0 0xf0200000 0x0 0x01e00000>, + <0x03000000 0x0 0x40000000 0x3 0x80000000 0x0 0x40000000>; reg-names = "dbi", "apb", "config"; resets = <&cru SRST_PCIE30X2_POWERUP>; reset-names = "pipe"; diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index f62e0fd881a9..61680c7ac489 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -952,7 +952,7 @@ compatible = "rockchip,rk3568-pcie"; reg = <0x3 0xc0000000 0x0 0x00400000>, <0x0 0xfe260000 0x0 0x00010000>, - <0x3 0x3f000000 0x0 0x01000000>; + <0x0 0xf4000000 0x0 0x00100000>; reg-names = "dbi", "apb", "config"; interrupts = , , @@ -982,8 +982,9 @@ phys = <&combphy2 PHY_TYPE_PCIE>; phy-names = "pcie-phy"; power-domains = <&power RK3568_PD_PIPE>; - ranges = <0x01000000 0x0 0x3ef00000 0x3 0x3ef00000 0x0 0x00100000 - 0x02000000 0x0 0x00000000 0x3 0x00000000 0x0 0x3ef00000>; + ranges = <0x01000000 0x0 0xf4100000 0x0 0xf4100000 0x0 0x00100000>, + <0x02000000 0x0 0xf4200000 0x0 0xf4200000 0x0 0x01e00000>, + <0x03000000 0x0 0x40000000 0x3 0x00000000 0x0 0x40000000>; resets = <&cru SRST_PCIE20_POWERUP>; reset-names = "pipe"; #address-cells = <3>; From 981a37bab5e5f16137266d3f00cf2bd018af36ef Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 5 Jun 2023 12:03:15 -0700 Subject: [PATCH 498/934] btrfs: properly enable async discard when switching from RO->RW The async discard uses the BTRFS_FS_DISCARD_RUNNING bit in the fs_info to force discards off when the filesystem has aborted or we're generally not able to run discards. This gets flipped on when we're mounted rw, and also when we go from ro->rw. Commit 63a7cb13071842 ("btrfs: auto enable discard=async when possible") enabled async discard by default, and this meant "mount -o ro /dev/xxx /yyy" had async discards turned on. Unfortunately, this meant our check in btrfs_remount_cleanup() would see that discards are already on: /* If we toggled discard async */ if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && btrfs_test_opt(fs_info, DISCARD_ASYNC)) btrfs_discard_resume(fs_info); So, we'd never call btrfs_discard_resume() when remounting the root filesystem from ro->rw. drgn shows this really nicely: import os import sys from drgn.helpers.linux.fs import path_lookup from drgn import NULL, Object, Type, cast def btrfs_sb(sb): return cast("struct btrfs_fs_info *", sb.s_fs_info) if len(sys.argv) == 1: path = "/" else: path = sys.argv[1] fs_info = cast("struct btrfs_fs_info *", path_lookup(prog, path).mnt.mnt_sb.s_fs_info) BTRFS_FS_DISCARD_RUNNING = 1 << prog['BTRFS_FS_DISCARD_RUNNING'] if fs_info.flags & BTRFS_FS_DISCARD_RUNNING: print("discard running flag is on") else: print("discard running flag is off") [root]# mount | grep nvme /dev/nvme0n1p3 on / type btrfs (rw,relatime,compress-force=zstd:3,ssd,discard=async,space_cache=v2,subvolid=5,subvol=/) [root]# ./discard_running.drgn discard running flag is off [root]# mount -o remount,discard=sync / [root]# mount -o remount,discard=async / [root]# ./discard_running.drgn discard running flag is on The fix is to call btrfs_discard_resume() when we're going from ro->rw. It already checks to make sure the async discard flag is on, so it'll do the right thing. Fixes: 63a7cb13071842 ("btrfs: auto enable discard=async when possible") CC: stable@vger.kernel.org # 6.3+ Reviewed-by: Boris Burkov Signed-off-by: Chris Mason Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ec18e2210602..efeb1a9d040a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1841,6 +1841,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_clear_sb_rdonly(sb); set_bit(BTRFS_FS_OPEN, &fs_info->flags); + + /* + * If we've gone from readonly -> read/write, we need to get + * our sync/async discard lists in the right state. + */ + btrfs_discard_resume(fs_info); } out: /* From 69844e335d8c22454746c7903776533d8b4ab8fa Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 6 Jun 2023 10:22:02 -0700 Subject: [PATCH 499/934] selftests/bpf: Fix sockopt_sk selftest Commit f4e4534850a9 ("net/netlink: fix NETLINK_LIST_MEMBERSHIPS length report") fixed NETLINK_LIST_MEMBERSHIPS length report which caused selftest sockopt_sk failure. The failure log looks like test_sockopt_sk:PASS:join_cgroup /sockopt_sk 0 nsec run_test:PASS:skel_load 0 nsec run_test:PASS:setsockopt_link 0 nsec run_test:PASS:getsockopt_link 0 nsec getsetsockopt:FAIL:Unexpected NETLINK_LIST_MEMBERSHIPS value unexpected Unexpected NETLINK_LIST_MEMBERSHIPS value: actual 8 != expected 4 run_test:PASS:getsetsockopt 0 nsec #201 sockopt_sk:FAIL In net/netlink/af_netlink.c, function netlink_getsockopt(), for NETLINK_LIST_MEMBERSHIPS, nlk->ngroups equals to 36. Before Commit f4e4534850a9, the optlen is calculated as ALIGN(nlk->ngroups / 8, sizeof(u32)) = 4 After that commit, the optlen is ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)) = 8 Fix the test by setting the expected optlen to be 8. Fixes: f4e4534850a9 ("net/netlink: fix NETLINK_LIST_MEMBERSHIPS length report") Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230606172202.1606249-1-yhs@fb.com --- tools/testing/selftests/bpf/prog_tests/sockopt_sk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 4512dd808c33..05d0e07da394 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -209,7 +209,7 @@ static int getsetsockopt(void) err, errno); goto err; } - ASSERT_EQ(optlen, 4, "Unexpected NETLINK_LIST_MEMBERSHIPS value"); + ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value"); free(big_buf); close(fd); From bd058763a624a1fb5c20f3c46e632d623c043676 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Wed, 24 May 2023 12:25:27 +0000 Subject: [PATCH 500/934] netfilter: nf_tables: Add null check for nla_nest_start_noflag() in nft_dump_basechain_hook() The nla_nest_start_noflag() function may fail and return NULL; the return value needs to be checked. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d54725cd11a5 ("netfilter: nf_tables: support for multiple devices per netdev hook") Signed-off-by: Gavrilov Ilia Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dc5675962de4..3445b8e1f462 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1600,6 +1600,8 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family, if (nft_base_chain_netdev(family, ops->hooknum)) { nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS); + if (!nest_devs) + goto nla_put_failure; if (!hook_list) hook_list = &basechain->hook_list; From 14e8b293903785590a0ef168745ac84250cb1f4c Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Thu, 25 May 2023 15:07:24 +0100 Subject: [PATCH 501/934] netfilter: nft_bitwise: fix register tracking At the end of `nft_bitwise_reduce`, there is a loop which is intended to update the bitwise expression associated with each tracked destination register. However, currently, it just updates the first register repeatedly. Fix it. Fixes: 34cc9e52884a ("netfilter: nf_tables: cancel tracking for clobbered destination registers") Signed-off-by: Jeremy Sowden Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_bitwise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 84eae7cabc67..2527a01486ef 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -323,7 +323,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track, dreg = priv->dreg; regcount = DIV_ROUND_UP(priv->len, NFT_REG32_SIZE); for (i = 0; i < regcount; i++, dreg++) - track->regs[priv->dreg].bitwise = expr; + track->regs[dreg].bitwise = expr; return false; } From e1f543dc660b44618a1bd72ddb4ca0828a95f7ad Mon Sep 17 00:00:00 2001 From: Tijs Van Buggenhout Date: Thu, 25 May 2023 12:25:26 +0200 Subject: [PATCH 502/934] netfilter: conntrack: fix NULL pointer dereference in nf_confirm_cthelper An nf_conntrack_helper from nf_conn_help may become NULL after DNAT. Observed when TCP port 1720 (Q931_PORT), associated with h323 conntrack helper, is DNAT'ed to another destination port (e.g. 1730), while nfqueue is being used for final acceptance (e.g. snort). This happenned after transition from kernel 4.14 to 5.10.161. Workarounds: * keep the same port (1720) in DNAT * disable nfqueue * disable/unload h323 NAT helper $ linux-5.10/scripts/decode_stacktrace.sh vmlinux < /tmp/kernel.log BUG: kernel NULL pointer dereference, address: 0000000000000084 [..] RIP: 0010:nf_conntrack_update (net/netfilter/nf_conntrack_core.c:2080 net/netfilter/nf_conntrack_core.c:2134) nf_conntrack [..] nfqnl_reinject (net/netfilter/nfnetlink_queue.c:237) nfnetlink_queue nfqnl_recv_verdict (net/netfilter/nfnetlink_queue.c:1230) nfnetlink_queue nfnetlink_rcv_msg (net/netfilter/nfnetlink.c:241) nfnetlink [..] Fixes: ee04805ff54a ("netfilter: conntrack: make conntrack userspace helpers work again") Signed-off-by: Tijs Van Buggenhout Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c4ccfec6cb98..d119f1d4c2fc 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2260,6 +2260,9 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, return 0; helper = rcu_dereference(help->helper); + if (!helper) + return 0; + if (!(helper->flags & NF_CT_HELPER_F_USERSPACE)) return 0; From 24e227896bbf003165e006732dccb3516f87f88e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 18 May 2023 10:33:00 -0700 Subject: [PATCH 503/934] netfilter: ipset: Add schedule point in call_ad(). syzkaller found a repro that causes Hung Task [0] with ipset. The repro first creates an ipset and then tries to delete a large number of IPs from the ipset concurrently: IPSET_ATTR_IPADDR_IPV4 : 172.20.20.187 IPSET_ATTR_CIDR : 2 The first deleting thread hogs a CPU with nfnl_lock(NFNL_SUBSYS_IPSET) held, and other threads wait for it to be released. Previously, the same issue existed in set->variant->uadt() that could run so long under ip_set_lock(set). Commit 5e29dc36bd5e ("netfilter: ipset: Rework long task execution when adding/deleting entries") tried to fix it, but the issue still exists in the caller with another mutex. While adding/deleting many IPs, we should release the CPU periodically to prevent someone from abusing ipset to hang the system. Note we need to increment the ipset's refcnt to prevent the ipset from being destroyed while rescheduling. [0]: INFO: task syz-executor174:268 blocked for more than 143 seconds. Not tainted 6.4.0-rc1-00145-gba79e9a73284 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor174 state:D stack:0 pid:268 ppid:260 flags:0x0000000d Call trace: __switch_to+0x308/0x714 arch/arm64/kernel/process.c:556 context_switch kernel/sched/core.c:5343 [inline] __schedule+0xd84/0x1648 kernel/sched/core.c:6669 schedule+0xf0/0x214 kernel/sched/core.c:6745 schedule_preempt_disabled+0x58/0xf0 kernel/sched/core.c:6804 __mutex_lock_common kernel/locking/mutex.c:679 [inline] __mutex_lock+0x6fc/0xdb0 kernel/locking/mutex.c:747 __mutex_lock_slowpath+0x14/0x20 kernel/locking/mutex.c:1035 mutex_lock+0x98/0xf0 kernel/locking/mutex.c:286 nfnl_lock net/netfilter/nfnetlink.c:98 [inline] nfnetlink_rcv_msg+0x480/0x70c net/netfilter/nfnetlink.c:295 netlink_rcv_skb+0x1c0/0x350 net/netlink/af_netlink.c:2546 nfnetlink_rcv+0x18c/0x199c net/netfilter/nfnetlink.c:658 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x664/0x8cc net/netlink/af_netlink.c:1365 netlink_sendmsg+0x6d0/0xa4c net/netlink/af_netlink.c:1913 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg net/socket.c:747 [inline] ____sys_sendmsg+0x4b8/0x810 net/socket.c:2503 ___sys_sendmsg net/socket.c:2557 [inline] __sys_sendmsg+0x1f8/0x2a4 net/socket.c:2586 __do_sys_sendmsg net/socket.c:2595 [inline] __se_sys_sendmsg net/socket.c:2593 [inline] __arm64_sys_sendmsg+0x80/0x94 net/socket.c:2593 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall+0x84/0x270 arch/arm64/kernel/syscall.c:52 el0_svc_common+0x134/0x24c arch/arm64/kernel/syscall.c:142 do_el0_svc+0x64/0x198 arch/arm64/kernel/syscall.c:193 el0_svc+0x2c/0x7c arch/arm64/kernel/entry-common.c:637 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 Reported-by: syzkaller Fixes: a7b4f989a629 ("netfilter: ipset: IP set core support") Signed-off-by: Kuniyuki Iwashima Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 46ebee9400da..9a6b64779e64 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1694,6 +1694,14 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { + if (retried) { + __ip_set_get(set); + nfnl_unlock(NFNL_SUBSYS_IPSET); + cond_resched(); + nfnl_lock(NFNL_SUBSYS_IPSET); + __ip_set_put(set); + } + ip_set_lock(set); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); ip_set_unlock(set); From 08e42a0d3ad30f276f9597b591f975971a1b0fcf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 6 Jun 2023 16:32:44 +0200 Subject: [PATCH 504/934] netfilter: nf_tables: out-of-bound check in chain blob Add current size of rule expressions to the boundary check. Fixes: 2c865a8a28a1 ("netfilter: nf_tables: add rule blob layout") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3445b8e1f462..0519d45ede6b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9007,7 +9007,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha continue; } - if (WARN_ON_ONCE(data + expr->ops->size > data_boundary)) + if (WARN_ON_ONCE(data + size + expr->ops->size > data_boundary)) return -ENOMEM; memcpy(data + size, expr, expr->ops->size); From f6ca5baf2a86d0eaff3859844da9e4e29ff750a7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 5 Jun 2023 16:32:57 -0700 Subject: [PATCH 505/934] netlink: specs: ethtool: fix random typos Working on the code gen for C reveals typos in the ethtool spec as the compiler tries to find the names in the existing uAPI header. Fix the mistakes. Fixes: a353318ebf24 ("tools: ynl: populate most of the ethtool spec") Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230605233257.843977-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 32 ++++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 3abc576ff797..4846345bade4 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -223,7 +223,7 @@ attribute-sets: name: tx-min-frag-size type: u32 - - name: tx-min-frag-size + name: rx-min-frag-size type: u32 - name: verify-enabled @@ -294,7 +294,7 @@ attribute-sets: name: master-slave-state type: u8 - - name: master-slave-lanes + name: lanes type: u32 - name: rate-matching @@ -322,7 +322,7 @@ attribute-sets: name: ext-substate type: u8 - - name: down-cnt + name: ext-down-cnt type: u32 - name: debug @@ -577,7 +577,7 @@ attribute-sets: name: phc-index type: u32 - - name: cable-test-nft-nest-result + name: cable-test-ntf-nest-result attributes: - name: pair @@ -586,7 +586,7 @@ attribute-sets: name: code type: u8 - - name: cable-test-nft-nest-fault-length + name: cable-test-ntf-nest-fault-length attributes: - name: pair @@ -595,16 +595,16 @@ attribute-sets: name: cm type: u32 - - name: cable-test-nft-nest + name: cable-test-ntf-nest attributes: - name: result type: nest - nested-attributes: cable-test-nft-nest-result + nested-attributes: cable-test-ntf-nest-result - name: fault-length type: nest - nested-attributes: cable-test-nft-nest-fault-length + nested-attributes: cable-test-ntf-nest-fault-length - name: cable-test attributes: @@ -618,7 +618,7 @@ attribute-sets: - name: nest type: nest - nested-attributes: cable-test-nft-nest + nested-attributes: cable-test-ntf-nest - name: cable-test-tdr-cfg attributes: @@ -776,7 +776,7 @@ attribute-sets: name: hist-bkt-hi type: u32 - - name: hist-bkt-val + name: hist-val type: u64 - name: stats @@ -965,7 +965,7 @@ operations: - duplex - master-slave-cfg - master-slave-state - - master-slave-lanes + - lanes - rate-matching dump: *linkmodes-get-op - @@ -999,7 +999,7 @@ operations: - sqi-max - ext-state - ext-substate - - down-cnt + - ext-down-cnt dump: *linkstate-get-op - name: debug-get @@ -1351,7 +1351,7 @@ operations: reply: attributes: - header - - cable-test-nft-nest + - cable-test-ntf-nest - name: cable-test-tdr-act doc: Cable test TDR. @@ -1539,7 +1539,7 @@ operations: - hkey dump: *rss-get-op - - name: plca-get + name: plca-get-cfg doc: Get PLCA params. attribute-set: plca @@ -1561,7 +1561,7 @@ operations: - burst-tmr dump: *plca-get-op - - name: plca-set + name: plca-set-cfg doc: Set PLCA params. attribute-set: plca @@ -1585,7 +1585,7 @@ operations: - name: plca-ntf doc: Notification for change in PLCA params. - notify: plca-get + notify: plca-get-cfg - name: mm-get doc: Get MAC Merge configuration and state From 6eea63c7090b20ee41032d3e478e617b219d69aa Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Tue, 6 Jun 2023 16:32:47 +0200 Subject: [PATCH 506/934] drm/lima: fix sched context destroy The drm sched entity must be flushed before finishing, to account for jobs potentially still in flight at that time. Lima did not do this flush until now, so switch the destroy call to the drm_sched_entity_destroy() wrapper which will take care of that. This fixes a regression on lima which started since the rework in commit 2fdb8a8f07c2 ("drm/scheduler: rework entity flush, kill and fini") where some specific types of applications may hang indefinitely. Fixes: 2fdb8a8f07c2 ("drm/scheduler: rework entity flush, kill and fini") Reviewed-by: Vasily Khoruzhick Signed-off-by: Erico Nunes Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20230606143247.433018-1-nunes.erico@gmail.com --- drivers/gpu/drm/lima/lima_sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index ff003403fbbc..ffd91a5ee299 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -165,7 +165,7 @@ int lima_sched_context_init(struct lima_sched_pipe *pipe, void lima_sched_context_fini(struct lima_sched_pipe *pipe, struct lima_sched_context *context) { - drm_sched_entity_fini(&context->base); + drm_sched_entity_destroy(&context->base); } struct dma_fence *lima_sched_context_queue_task(struct lima_sched_task *task) From a2f4c143d76b1a47c91ef9bc46907116b111da0b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 5 Jun 2023 11:06:17 -0700 Subject: [PATCH 507/934] ipv6: rpl: Fix Route of Death. A remote DoS vulnerability of RPL Source Routing is assigned CVE-2023-2156. The Source Routing Header (SRH) has the following format: 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Next Header | Hdr Ext Len | Routing Type | Segments Left | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | CmprI | CmprE | Pad | Reserved | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | . . . Addresses[1..n] . . . | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ The originator of an SRH places the first hop's IPv6 address in the IPv6 header's IPv6 Destination Address and the second hop's IPv6 address as the first address in Addresses[1..n]. The CmprI and CmprE fields indicate the number of prefix octets that are shared with the IPv6 Destination Address. When CmprI or CmprE is not 0, Addresses[1..n] are compressed as follows: 1..n-1 : (16 - CmprI) bytes n : (16 - CmprE) bytes Segments Left indicates the number of route segments remaining. When the value is not zero, the SRH is forwarded to the next hop. Its address is extracted from Addresses[n - Segment Left + 1] and swapped with IPv6 Destination Address. When Segment Left is greater than or equal to 2, the size of SRH is not changed because Addresses[1..n-1] are decompressed and recompressed with CmprI. OTOH, when Segment Left changes from 1 to 0, the new SRH could have a different size because Addresses[1..n-1] are decompressed with CmprI and recompressed with CmprE. Let's say CmprI is 15 and CmprE is 0. When we receive SRH with Segment Left >= 2, Addresses[1..n-1] have 1 byte for each, and Addresses[n] has 16 bytes. When Segment Left is 1, Addresses[1..n-1] is decompressed to 16 bytes and not recompressed. Finally, the new SRH will need more room in the header, and the size is (16 - 1) * (n - 1) bytes. Here the max value of n is 255 as Segment Left is u8, so in the worst case, we have to allocate 3825 bytes in the skb headroom. However, now we only allocate a small fixed buffer that is IPV6_RPL_SRH_WORST_SWAP_SIZE (16 + 7 bytes). If the decompressed size overflows the room, skb_push() hits BUG() below [0]. Instead of allocating the fixed buffer for every packet, let's allocate enough headroom only when we receive SRH with Segment Left 1. [0]: skbuff: skb_under_panic: text:ffffffff81c9f6e2 len:576 put:576 head:ffff8880070b5180 data:ffff8880070b4fb0 tail:0x70 end:0x140 dev:lo kernel BUG at net/core/skbuff.c:200! invalid opcode: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 154 Comm: python3 Not tainted 6.4.0-rc4-00190-gc308e9ec0047 #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:skb_panic (net/core/skbuff.c:200) Code: 4f 70 50 8b 87 bc 00 00 00 50 8b 87 b8 00 00 00 50 ff b7 c8 00 00 00 4c 8b 8f c0 00 00 00 48 c7 c7 80 6e 77 82 e8 ad 8b 60 ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffc90000003da0 EFLAGS: 00000246 RAX: 0000000000000085 RBX: ffff8880058a6600 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff88807dc1c540 RDI: ffff88807dc1c540 RBP: ffffc90000003e48 R08: ffffffff82b392c8 R09: 00000000ffffdfff R10: ffffffff82a592e0 R11: ffffffff82b092e0 R12: ffff888005b1c800 R13: ffff8880070b51b8 R14: ffff888005b1ca18 R15: ffff8880070b5190 FS: 00007f4539f0b740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055670baf3000 CR3: 0000000005b0e000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: skb_push (net/core/skbuff.c:210) ipv6_rthdr_rcv (./include/linux/skbuff.h:2880 net/ipv6/exthdrs.c:634 net/ipv6/exthdrs.c:718) ip6_protocol_deliver_rcu (net/ipv6/ip6_input.c:437 (discriminator 5)) ip6_input_finish (./include/linux/rcupdate.h:805 net/ipv6/ip6_input.c:483) __netif_receive_skb_one_core (net/core/dev.c:5494) process_backlog (./include/linux/rcupdate.h:805 net/core/dev.c:5934) __napi_poll (net/core/dev.c:6496) net_rx_action (net/core/dev.c:6565 net/core/dev.c:6696) __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:572) do_softirq (kernel/softirq.c:472 kernel/softirq.c:459) __local_bh_enable_ip (kernel/softirq.c:396) __dev_queue_xmit (net/core/dev.c:4272) ip6_finish_output2 (./include/net/neighbour.h:544 net/ipv6/ip6_output.c:134) rawv6_sendmsg (./include/net/dst.h:458 ./include/linux/netfilter.h:303 net/ipv6/raw.c:656 net/ipv6/raw.c:914) sock_sendmsg (net/socket.c:724 net/socket.c:747) __sys_sendto (net/socket.c:2144) __x64_sys_sendto (net/socket.c:2156 net/socket.c:2152 net/socket.c:2152) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) RIP: 0033:0x7f453a138aea Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 RSP: 002b:00007ffcc212a1c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007ffcc212a288 RCX: 00007f453a138aea RDX: 0000000000000060 RSI: 00007f4539084c20 RDI: 0000000000000003 RBP: 00007f4538308e80 R08: 00007ffcc212a300 R09: 000000000000001c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007f4539712d1b Modules linked in: Fixes: 8610c7c6e3bd ("net: ipv6: add support for rpl sr exthdr") Reported-by: Max VA Closes: https://www.interruptlabs.co.uk/articles/linux-ipv6-route-of-death Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230605180617.67284-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/rpl.h | 3 --- net/ipv6/exthdrs.c | 29 +++++++++++------------------ 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/include/net/rpl.h b/include/net/rpl.h index 308ef0a05cae..30fe780d1e7c 100644 --- a/include/net/rpl.h +++ b/include/net/rpl.h @@ -23,9 +23,6 @@ static inline int rpl_init(void) static inline void rpl_exit(void) {} #endif -/* Worst decompression memory usage ipv6 address (16) + pad 7 */ -#define IPV6_RPL_SRH_WORST_SWAP_SIZE (sizeof(struct in6_addr) + 7) - size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri, unsigned char cmpre); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a8d961d3a477..5fa0e37305d9 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -569,24 +569,6 @@ looped_back: return -1; } - if (skb_cloned(skb)) { - if (pskb_expand_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE, 0, - GFP_ATOMIC)) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); - return -1; - } - } else { - err = skb_cow_head(skb, IPV6_RPL_SRH_WORST_SWAP_SIZE); - if (unlikely(err)) { - kfree_skb(skb); - return -1; - } - } - - hdr = (struct ipv6_rpl_sr_hdr *)skb_transport_header(skb); - if (!pskb_may_pull(skb, ipv6_rpl_srh_size(n, hdr->cmpri, hdr->cmpre))) { kfree_skb(skb); @@ -630,6 +612,17 @@ looped_back: skb_pull(skb, ((hdr->hdrlen + 1) << 3)); skb_postpull_rcsum(skb, oldhdr, sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3)); + if (unlikely(!hdr->segments_left)) { + if (pskb_expand_head(skb, sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3), 0, + GFP_ATOMIC)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + kfree(buf); + return -1; + } + + oldhdr = ipv6_hdr(skb); + } skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr)); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); From 4a059559809fd1ddbf16f847c4d2237309c08edf Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Fri, 19 May 2023 08:55:05 +0900 Subject: [PATCH 508/934] drm/exynos: vidi: fix a wrong error return Fix a wrong error return by dropping an error return. When vidi driver is remvoed, if ctx->raw_edid isn't same as fake_edid_info then only what we have to is to free ctx->raw_edid so that driver removing can work correctly - it's not an error case. Signed-off-by: Inki Dae Reviewed-by: Andi Shyti --- drivers/gpu/drm/exynos/exynos_drm_vidi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 4d56c8c799c5..f5e1adfcaa51 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -469,8 +469,6 @@ static int vidi_remove(struct platform_device *pdev) if (ctx->raw_edid != (struct edid *)fake_edid_info) { kfree(ctx->raw_edid); ctx->raw_edid = NULL; - - return -EINVAL; } component_del(&pdev->dev, &vidi_component_ops); From 48bfd02569f5db49cc033f259e66d57aa6efc9a3 Mon Sep 17 00:00:00 2001 From: Min Li Date: Fri, 26 May 2023 21:01:31 +0800 Subject: [PATCH 509/934] drm/exynos: fix race condition UAF in exynos_g2d_exec_ioctl If it is async, runqueue_node is freed in g2d_runqueue_worker on another worker thread. So in extreme cases, if g2d_runqueue_worker runs first, and then executes the following if statement, there will be use-after-free. Signed-off-by: Min Li Reviewed-by: Andi Shyti Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index ec784e58da5c..414e585ec7dd 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -1335,7 +1335,7 @@ int exynos_g2d_exec_ioctl(struct drm_device *drm_dev, void *data, /* Let the runqueue know that there is work to do. */ queue_work(g2d->g2d_workq, &g2d->runqueue_work); - if (runqueue_node->async) + if (req->async) goto out; wait_for_completion(&runqueue_node->complete); From 82a01ab35bd02ba4b0b4e12bc95c5b69240eb7b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Jun 2023 16:16:47 +0000 Subject: [PATCH 510/934] tcp: gso: really support BIG TCP We missed that tcp_gso_segment() was assuming skb->len was smaller than 65535 : oldlen = (u16)~skb->len; This part came with commit 0718bcc09b35 ("[NET]: Fix CHECKSUM_HW GSO problems.") This leads to wrong TCP checksum. Adapt the code to accept arbitrary packet length. v2: - use two csum_add() instead of csum_fold() (Alexander Duyck) - Change delta type to __wsum to reduce casts (Alexander Duyck) Fixes: 09f3d1a3a52c ("ipv6/gso: remove temporary HBH/jumbo header") Signed-off-by: Eric Dumazet Reviewed-by: Alexander Duyck Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230605161647.3624428-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_offload.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 45dda7889387..4851211aa60d 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -60,12 +60,12 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, struct tcphdr *th; unsigned int thlen; unsigned int seq; - __be32 delta; unsigned int oldlen; unsigned int mss; struct sk_buff *gso_skb = skb; __sum16 newcheck; bool ooo_okay, copy_destructor; + __wsum delta; th = tcp_hdr(skb); thlen = th->doff * 4; @@ -75,7 +75,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, thlen)) goto out; - oldlen = (u16)~skb->len; + oldlen = ~skb->len; __skb_pull(skb, thlen); mss = skb_shinfo(skb)->gso_size; @@ -110,7 +110,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (skb_is_gso(segs)) mss *= skb_shinfo(segs)->gso_segs; - delta = htonl(oldlen + (thlen + mss)); + delta = (__force __wsum)htonl(oldlen + thlen + mss); skb = segs; th = tcp_hdr(skb); @@ -119,8 +119,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (unlikely(skb_shinfo(gso_skb)->tx_flags & SKBTX_SW_TSTAMP)) tcp_gso_tstamp(segs, skb_shinfo(gso_skb)->tskey, seq, mss); - newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); + newcheck = ~csum_fold(csum_add(csum_unfold(th->check), delta)); while (skb->next) { th->fin = th->psh = 0; @@ -165,11 +164,11 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc)); } - delta = htonl(oldlen + (skb_tail_pointer(skb) - - skb_transport_header(skb)) + - skb->data_len); - th->check = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); + delta = (__force __wsum)htonl(oldlen + + (skb_tail_pointer(skb) - + skb_transport_header(skb)) + + skb->data_len); + th->check = ~csum_fold(csum_add(csum_unfold(th->check), delta)); if (skb->ip_summed == CHECKSUM_PARTIAL) gso_reset_checksum(skb, ~th->check); else From 4f48c30312b7af5365878ab191bb41e7b899e09b Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 5 Jun 2023 12:51:16 -0700 Subject: [PATCH 511/934] pds_core: Fix FW recovery detection Commit 523847df1b37 ("pds_core: add devcmd device interfaces") included initial support for FW recovery detection. Unfortunately, the ordering in pdsc_is_fw_good() was incorrect, which was causing FW recovery to be undetected by the driver. Fix this by making sure to update the cached fw_status by calling pdsc_is_fw_running() before setting the local FW gen. Fixes: 523847df1b37 ("pds_core: add devcmd device interfaces") Signed-off-by: Shannon Nelson Signed-off-by: Brett Creeley Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230605195116.49653-1-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index f7c597ea5daf..debe5216fe29 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -68,9 +68,15 @@ bool pdsc_is_fw_running(struct pdsc *pdsc) bool pdsc_is_fw_good(struct pdsc *pdsc) { - u8 gen = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION; + bool fw_running = pdsc_is_fw_running(pdsc); + u8 gen; - return pdsc_is_fw_running(pdsc) && gen == pdsc->fw_generation; + /* Make sure to update the cached fw_status by calling + * pdsc_is_fw_running() before getting the generation + */ + gen = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION; + + return fw_running && gen == pdsc->fw_generation; } static u8 pdsc_devcmd_status(struct pdsc *pdsc) From accc1bf23068c1cdc4c2b015320ba856e210dd98 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 5 Jun 2023 12:59:25 -0700 Subject: [PATCH 512/934] virtio_net: use control_buf for coalesce params Commit 699b045a8e43 ("net: virtio_net: notifications coalescing support") added coalescing command support for virtio_net. However, the coalesce commands are using buffers on the stack, which is causing the device to see DMA errors. There should also be a complaint from check_for_stack() in debug_dma_map_xyz(). Fix this by adding and using coalesce params from the control_buf struct, which aligns with other commands. Cc: stable@vger.kernel.org Fixes: 699b045a8e43 ("net: virtio_net: notifications coalescing support") Reviewed-by: Shannon Nelson Signed-off-by: Allen Hubbe Signed-off-by: Brett Creeley Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20230605195925.51625-1-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 56ca1d270304..486b5849033d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -205,6 +205,8 @@ struct control_buf { __virtio16 vid; __virtio64 offloads; struct virtio_net_ctrl_rss rss; + struct virtio_net_ctrl_coal_tx coal_tx; + struct virtio_net_ctrl_coal_rx coal_rx; }; struct virtnet_info { @@ -2934,12 +2936,10 @@ static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, struct ethtool_coalesce *ec) { struct scatterlist sgs_tx, sgs_rx; - struct virtio_net_ctrl_coal_tx coal_tx; - struct virtio_net_ctrl_coal_rx coal_rx; - coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); - coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); - sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); + vi->ctrl->coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); + vi->ctrl->coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); + sg_init_one(&sgs_tx, &vi->ctrl->coal_tx, sizeof(vi->ctrl->coal_tx)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, @@ -2950,9 +2950,9 @@ static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, vi->tx_usecs = ec->tx_coalesce_usecs; vi->tx_max_packets = ec->tx_max_coalesced_frames; - coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); - coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); - sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); + vi->ctrl->coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); + vi->ctrl->coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); + sg_init_one(&sgs_rx, &vi->ctrl->coal_rx, sizeof(vi->ctrl->coal_rx)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, From bf06fcf4be0feefebd27deb8b60ad262f4230489 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Jun 2023 10:36:15 +0300 Subject: [PATCH 513/934] xfrm: add missed call to delete offloaded policies Offloaded policies are deleted through two flows: netdev is going down and policy flush. In both cases, the code lacks relevant call to delete offloaded policy. Fixes: 919e43fad516 ("xfrm: add an interface to offload policy") Signed-off-by: Leon Romanovsky Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ff58ce6c030c..e7617c9959c3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1831,6 +1831,7 @@ again: __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + xfrm_dev_policy_delete(pol); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); @@ -1869,6 +1870,7 @@ again: __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + xfrm_dev_policy_delete(pol); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); From 1e5c647c3f6d4f8497dedcd226204e1880e0ffb3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Jun 2023 07:41:14 +0000 Subject: [PATCH 514/934] rfs: annotate lockless accesses to sk->sk_rxhash Add READ_ONCE()/WRITE_ONCE() on accesses to sk->sk_rxhash. This also prevents a (smart ?) compiler to remove the condition in: if (sk->sk_rxhash != newval) sk->sk_rxhash = newval; We need the condition to avoid dirtying a shared cache line. Fixes: fec5e652e58f ("rfs: Receive Flow Steering") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/sock.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index b418425d7230..6f428a7f3567 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1152,8 +1152,12 @@ static inline void sock_rps_record_flow(const struct sock *sk) * OR an additional socket flag * [1] : sk_state and sk_prot are in the same cache line. */ - if (sk->sk_state == TCP_ESTABLISHED) - sock_rps_record_flow_hash(sk->sk_rxhash); + if (sk->sk_state == TCP_ESTABLISHED) { + /* This READ_ONCE() is paired with the WRITE_ONCE() + * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). + */ + sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); + } } #endif } @@ -1162,15 +1166,19 @@ static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_RPS - if (unlikely(sk->sk_rxhash != skb->hash)) - sk->sk_rxhash = skb->hash; + /* The following WRITE_ONCE() is paired with the READ_ONCE() + * here, and another one in sock_rps_record_flow(). + */ + if (unlikely(READ_ONCE(sk->sk_rxhash) != skb->hash)) + WRITE_ONCE(sk->sk_rxhash, skb->hash); #endif } static inline void sock_rps_reset_rxhash(struct sock *sk) { #ifdef CONFIG_RPS - sk->sk_rxhash = 0; + /* Paired with READ_ONCE() in sock_rps_record_flow() */ + WRITE_ONCE(sk->sk_rxhash, 0); #endif } From 5c3b74a92aa285a3df722bf6329ba7ccf70346d6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Jun 2023 07:41:15 +0000 Subject: [PATCH 515/934] rfs: annotate lockless accesses to RFS sock flow table Add READ_ONCE()/WRITE_ONCE() on accesses to the sock flow table. This also prevents a (smart ?) compiler to remove the condition in: if (table->ents[index] != newval) table->ents[index] = newval; We need the condition to avoid dirtying a shared cache line. Fixes: fec5e652e58f ("rfs: Receive Flow Steering") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++-- net/core/dev.c | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 08fbd4622ccf..e6f22b7403d0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -768,8 +768,11 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); - if (table->ents[index] != val) - table->ents[index] = val; + /* The following WRITE_ONCE() is paired with the READ_ONCE() + * here, and another one in get_rps_cpu(). + */ + if (READ_ONCE(table->ents[index]) != val) + WRITE_ONCE(table->ents[index], val); } } diff --git a/net/core/dev.c b/net/core/dev.c index b3c13e041935..1495f8aff288 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4471,8 +4471,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, u32 next_cpu; u32 ident; - /* First check into global flow table if there is a match */ - ident = sock_flow_table->ents[hash & sock_flow_table->mask]; + /* First check into global flow table if there is a match. + * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow(). + */ + ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]); if ((ident ^ hash) & ~rps_cpu_mask) goto try_rps; From d636fc5dd692c8f4e00ae6e0359c0eceeb5d9bdb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Jun 2023 11:19:29 +0000 Subject: [PATCH 516/934] net: sched: add rcu annotations around qdisc->qdisc_sleeping syzbot reported a race around qdisc->qdisc_sleeping [1] It is time we add proper annotations to reads and writes to/from qdisc->qdisc_sleeping. [1] BUG: KCSAN: data-race in dev_graft_qdisc / qdisc_lookup_rcu read to 0xffff8881286fc618 of 8 bytes by task 6928 on cpu 1: qdisc_lookup_rcu+0x192/0x2c0 net/sched/sch_api.c:331 __tcf_qdisc_find+0x74/0x3c0 net/sched/cls_api.c:1174 tc_get_tfilter+0x18f/0x990 net/sched/cls_api.c:2547 rtnetlink_rcv_msg+0x7af/0x8c0 net/core/rtnetlink.c:6386 netlink_rcv_skb+0x126/0x220 net/netlink/af_netlink.c:2546 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:6413 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x56f/0x640 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x665/0x770 net/netlink/af_netlink.c:1913 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg net/socket.c:747 [inline] ____sys_sendmsg+0x375/0x4c0 net/socket.c:2503 ___sys_sendmsg net/socket.c:2557 [inline] __sys_sendmsg+0x1e3/0x270 net/socket.c:2586 __do_sys_sendmsg net/socket.c:2595 [inline] __se_sys_sendmsg net/socket.c:2593 [inline] __x64_sys_sendmsg+0x46/0x50 net/socket.c:2593 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd write to 0xffff8881286fc618 of 8 bytes by task 6912 on cpu 0: dev_graft_qdisc+0x4f/0x80 net/sched/sch_generic.c:1115 qdisc_graft+0x7d0/0xb60 net/sched/sch_api.c:1103 tc_modify_qdisc+0x712/0xf10 net/sched/sch_api.c:1693 rtnetlink_rcv_msg+0x807/0x8c0 net/core/rtnetlink.c:6395 netlink_rcv_skb+0x126/0x220 net/netlink/af_netlink.c:2546 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:6413 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x56f/0x640 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x665/0x770 net/netlink/af_netlink.c:1913 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg net/socket.c:747 [inline] ____sys_sendmsg+0x375/0x4c0 net/socket.c:2503 ___sys_sendmsg net/socket.c:2557 [inline] __sys_sendmsg+0x1e3/0x270 net/socket.c:2586 __do_sys_sendmsg net/socket.c:2595 [inline] __se_sys_sendmsg net/socket.c:2593 [inline] __x64_sys_sendmsg+0x46/0x50 net/socket.c:2593 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 6912 Comm: syz-executor.5 Not tainted 6.4.0-rc3-syzkaller-00190-g0d85b27b0cc6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/16/2023 Fixes: 3a7d0d07a386 ("net: sched: extend Qdisc with rcu") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Vlad Buslov Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/net/sch_generic.h | 6 ++++-- net/core/dev.c | 2 +- net/sched/sch_api.c | 26 ++++++++++++++++---------- net/sched/sch_fq_pie.c | 2 ++ net/sched/sch_generic.c | 30 +++++++++++++++--------------- net/sched/sch_mq.c | 8 ++++---- net/sched/sch_mqprio.c | 8 ++++---- net/sched/sch_pie.c | 5 ++++- net/sched/sch_red.c | 5 ++++- net/sched/sch_sfq.c | 5 ++++- net/sched/sch_taprio.c | 6 +++--- net/sched/sch_teql.c | 2 +- 13 files changed, 63 insertions(+), 44 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e6f22b7403d0..c2f0c6002a84 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -620,7 +620,7 @@ struct netdev_queue { netdevice_tracker dev_tracker; struct Qdisc __rcu *qdisc; - struct Qdisc *qdisc_sleeping; + struct Qdisc __rcu *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index fab5ba3e61b7..27271f2b37cb 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -545,7 +545,7 @@ static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc) static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc) { - return qdisc->dev_queue->qdisc_sleeping; + return rcu_dereference_rtnl(qdisc->dev_queue->qdisc_sleeping); } static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) @@ -754,7 +754,9 @@ static inline bool qdisc_tx_changing(const struct net_device *dev) for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping) + + if (rcu_access_pointer(txq->qdisc) != + rcu_access_pointer(txq->qdisc_sleeping)) return true; } return false; diff --git a/net/core/dev.c b/net/core/dev.c index 1495f8aff288..c29f3e1db3ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10543,7 +10543,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) return NULL; netdev_init_one_queue(dev, queue, NULL); RCU_INIT_POINTER(queue->qdisc, &noop_qdisc); - queue->qdisc_sleeping = &noop_qdisc; + RCU_INIT_POINTER(queue->qdisc_sleeping, &noop_qdisc); rcu_assign_pointer(dev->ingress_queue, queue); #endif return queue; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 9ea51812b9cf..e4b6452318c0 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -309,7 +309,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) if (dev_ingress_queue(dev)) q = qdisc_match_from_root( - dev_ingress_queue(dev)->qdisc_sleeping, + rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping), handle); out: return q; @@ -328,7 +328,8 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) nq = dev_ingress_queue_rcu(dev); if (nq) - q = qdisc_match_from_root(nq->qdisc_sleeping, handle); + q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping), + handle); out: return q; } @@ -634,8 +635,13 @@ EXPORT_SYMBOL(qdisc_watchdog_init); void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, u64 delta_ns) { - if (test_bit(__QDISC_STATE_DEACTIVATED, - &qdisc_root_sleeping(wd->qdisc)->state)) + bool deactivated; + + rcu_read_lock(); + deactivated = test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(wd->qdisc)->state); + rcu_read_unlock(); + if (deactivated) return; if (hrtimer_is_queued(&wd->timer)) { @@ -1478,7 +1484,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, } q = qdisc_leaf(p, clid); } else if (dev_ingress_queue(dev)) { - q = dev_ingress_queue(dev)->qdisc_sleeping; + q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } } else { q = rtnl_dereference(dev->qdisc); @@ -1564,7 +1570,7 @@ replay: } q = qdisc_leaf(p, clid); } else if (dev_ingress_queue_create(dev)) { - q = dev_ingress_queue(dev)->qdisc_sleeping; + q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } } else { q = rtnl_dereference(dev->qdisc); @@ -1805,8 +1811,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) dev_queue = dev_ingress_queue(dev); if (dev_queue && - tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, - &q_idx, s_q_idx, false, + tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping), + skb, cb, &q_idx, s_q_idx, false, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; @@ -2249,8 +2255,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) dev_queue = dev_ingress_queue(dev); if (dev_queue && - tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, - &t, s_t, false) < 0) + tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping), + skb, tcm, cb, &t, s_t, false) < 0) goto done; done: diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index c699e5095607..591d87d5e5c0 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -379,6 +379,7 @@ static void fq_pie_timer(struct timer_list *t) spinlock_t *root_lock; /* to lock qdisc for probability calculations */ u32 idx; + rcu_read_lock(); root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -391,6 +392,7 @@ static void fq_pie_timer(struct timer_list *t) mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate); spin_unlock(root_lock); + rcu_read_unlock(); } static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 37e41f972f69..3248259eba32 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -648,7 +648,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { static struct netdev_queue noop_netdev_queue = { RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc), - .qdisc_sleeping = &noop_qdisc, + RCU_POINTER_INITIALIZER(qdisc_sleeping, &noop_qdisc), }; struct Qdisc noop_qdisc = { @@ -1103,7 +1103,7 @@ EXPORT_SYMBOL(qdisc_put_unlocked); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc) { - struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; + struct Qdisc *oqdisc = rtnl_dereference(dev_queue->qdisc_sleeping); spinlock_t *root_lock; root_lock = qdisc_lock(oqdisc); @@ -1112,7 +1112,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, /* ... and graft new one */ if (qdisc == NULL) qdisc = &noop_qdisc; - dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc); rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); spin_unlock_bh(root_lock); @@ -1125,12 +1125,12 @@ static void shutdown_scheduler_queue(struct net_device *dev, struct netdev_queue *dev_queue, void *_qdisc_default) { - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - dev_queue->qdisc_sleeping = qdisc_default; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc_default); qdisc_put(qdisc); } @@ -1154,7 +1154,7 @@ static void attach_one_default_qdisc(struct net_device *dev, if (!netif_is_multiqueue(dev)) qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc); } static void attach_default_qdiscs(struct net_device *dev) @@ -1167,7 +1167,7 @@ static void attach_default_qdiscs(struct net_device *dev) if (!netif_is_multiqueue(dev) || dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - qdisc = txq->qdisc_sleeping; + qdisc = rtnl_dereference(txq->qdisc_sleeping); rcu_assign_pointer(dev->qdisc, qdisc); qdisc_refcount_inc(qdisc); } else { @@ -1186,7 +1186,7 @@ static void attach_default_qdiscs(struct net_device *dev) netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); dev->priv_flags |= IFF_NO_QUEUE; netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - qdisc = txq->qdisc_sleeping; + qdisc = rtnl_dereference(txq->qdisc_sleeping); rcu_assign_pointer(dev->qdisc, qdisc); qdisc_refcount_inc(qdisc); dev->priv_flags ^= IFF_NO_QUEUE; @@ -1202,7 +1202,7 @@ static void transition_one_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_need_watchdog) { - struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *new_qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); int *need_watchdog_p = _need_watchdog; if (!(new_qdisc->flags & TCQ_F_BUILTIN)) @@ -1272,7 +1272,7 @@ static void dev_reset_queue(struct net_device *dev, struct Qdisc *qdisc; bool nolock; - qdisc = dev_queue->qdisc_sleeping; + qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); if (!qdisc) return; @@ -1303,7 +1303,7 @@ static bool some_qdisc_is_busy(struct net_device *dev) int val; dev_queue = netdev_get_tx_queue(dev, i); - q = dev_queue->qdisc_sleeping; + q = rtnl_dereference(dev_queue->qdisc_sleeping); root_lock = qdisc_lock(q); spin_lock_bh(root_lock); @@ -1379,7 +1379,7 @@ EXPORT_SYMBOL(dev_deactivate); static int qdisc_change_tx_queue_len(struct net_device *dev, struct netdev_queue *dev_queue) { - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); const struct Qdisc_ops *ops = qdisc->ops; if (ops->change_tx_queue_len) @@ -1404,7 +1404,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) unsigned int i; for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping); /* Only update the default qdiscs we created, * qdiscs with handles are always hashed. */ @@ -1412,7 +1412,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) qdisc_hash_del(qdisc); } for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping); if (qdisc != &noop_qdisc && !qdisc->handle) qdisc_hash_add(qdisc, false); } @@ -1449,7 +1449,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, struct Qdisc *qdisc = _qdisc; rcu_assign_pointer(dev_queue->qdisc, qdisc); - dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc); } void dev_init_scheduler(struct net_device *dev) diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index d0bc660d7401..c860119a8f09 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -141,7 +141,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) * qdisc totals are added at end. */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); spin_lock_bh(qdisc_lock(qdisc)); gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, @@ -202,7 +202,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl) { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long mq_find(struct Qdisc *sch, u32 classid) @@ -221,7 +221,7 @@ static int mq_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; return 0; } @@ -230,7 +230,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, { struct netdev_queue *dev_queue = mq_queue_get(sch, cl); - sch = dev_queue->qdisc_sleeping; + sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index dc5a0ff50b14..ab69ff7577fc 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -557,7 +557,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) * qdisc totals are added at end. */ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { - qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping); spin_lock_bh(qdisc_lock(qdisc)); gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats, @@ -604,7 +604,7 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) if (!dev_queue) return NULL; - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long mqprio_find(struct Qdisc *sch, u32 classid) @@ -637,7 +637,7 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_parent = (tc < 0) ? 0 : TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(tc + TC_H_MIN_PRIORITY)); - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; } else { tcm->tcm_parent = TC_H_ROOT; tcm->tcm_info = 0; @@ -693,7 +693,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, } else { struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); - sch = dev_queue->qdisc_sleeping; + sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 2152a56d73f8..2da6250ec346 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -421,8 +421,10 @@ static void pie_timer(struct timer_list *t) { struct pie_sched_data *q = from_timer(q, t, adapt_timer); struct Qdisc *sch = q->sch; - spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spinlock_t *root_lock; + rcu_read_lock(); + root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); pie_calculate_probability(&q->params, &q->vars, sch->qstats.backlog); @@ -430,6 +432,7 @@ static void pie_timer(struct timer_list *t) if (q->params.tupdate) mod_timer(&q->adapt_timer, jiffies + q->params.tupdate); spin_unlock(root_lock); + rcu_read_unlock(); } static int pie_init(struct Qdisc *sch, struct nlattr *opt, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 98129324e157..16277b6a0238 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -321,12 +321,15 @@ static inline void red_adaptative_timer(struct timer_list *t) { struct red_sched_data *q = from_timer(q, t, adapt_timer); struct Qdisc *sch = q->sch; - spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spinlock_t *root_lock; + rcu_read_lock(); + root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); red_adaptative_algo(&q->parms, &q->vars); mod_timer(&q->adapt_timer, jiffies + HZ/2); spin_unlock(root_lock); + rcu_read_unlock(); } static int red_init(struct Qdisc *sch, struct nlattr *opt, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index abd436307d6a..66dcb18638fe 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -606,10 +606,12 @@ static void sfq_perturbation(struct timer_list *t) { struct sfq_sched_data *q = from_timer(q, t, perturb_timer); struct Qdisc *sch = q->sch; - spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + spinlock_t *root_lock; siphash_key_t nkey; get_random_bytes(&nkey, sizeof(nkey)); + rcu_read_lock(); + root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); q->perturbation = nkey; if (!q->filter_list && q->tail) @@ -618,6 +620,7 @@ static void sfq_perturbation(struct timer_list *t) if (q->perturb_period) mod_timer(&q->perturb_timer, jiffies + q->perturb_period); + rcu_read_unlock(); } static int sfq_change(struct Qdisc *sch, struct nlattr *opt) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 76db9a10ef50..dd7dea2f6e83 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2358,7 +2358,7 @@ static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) if (!dev_queue) return NULL; - return dev_queue->qdisc_sleeping; + return rtnl_dereference(dev_queue->qdisc_sleeping); } static unsigned long taprio_find(struct Qdisc *sch, u32 classid) @@ -2377,7 +2377,7 @@ static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, tcm->tcm_parent = TC_H_ROOT; tcm->tcm_handle |= TC_H_MIN(cl); - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle; return 0; } @@ -2389,7 +2389,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, { struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); - sch = dev_queue->qdisc_sleeping; + sch = rtnl_dereference(dev_queue->qdisc_sleeping); if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 || qdisc_qstats_copy(d, sch) < 0) return -1; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 16f9238aa51d..7721239c185f 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -297,7 +297,7 @@ restart: struct net_device *slave = qdisc_dev(q); struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); - if (slave_txq->qdisc_sleeping != q) + if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q) continue; if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) || !netif_running(slave)) { From 79d0150d2d983a4f6efee676cea06027f586fcd0 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 5 Jun 2023 14:11:35 +0100 Subject: [PATCH 517/934] drm/i915/selftests: Add some missing error propagation Add some missing error propagation in live_parallel_switch. To avoid needlessly burdening the various backport processes, note I am not marking it as a fix against any patches and not copying stable since it is debug/selftests only code. Signed-off-by: Tvrtko Ursulin Reported-by: Dan Carpenter Cc: Andi Shyti Reviewed-by: Andi Shyti Fixes: 50d16d44cce4 ("drm/i915/selftests: Exercise context switching in parallel") Fixes: 6407cf533217 ("drm/i915/selftests: Stop using kthread_stop()") Link: https://patchwork.freedesktop.org/patch/msgid/20230605131135.396854-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 412fa1f097f48c8c1321806dd25e46618e0da147) Signed-off-by: Joonas Lahtinen --- .../gpu/drm/i915/gem/selftests/i915_gem_context.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index a81fa6a20f5a..7b516b1a4915 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -346,8 +346,10 @@ static int live_parallel_switch(void *arg) continue; ce = intel_context_create(data[m].ce[0]->engine); - if (IS_ERR(ce)) + if (IS_ERR(ce)) { + err = PTR_ERR(ce); goto out; + } err = intel_context_pin(ce); if (err) { @@ -367,8 +369,10 @@ static int live_parallel_switch(void *arg) worker = kthread_create_worker(0, "igt/parallel:%s", data[n].ce[0]->engine->name); - if (IS_ERR(worker)) + if (IS_ERR(worker)) { + err = PTR_ERR(worker); goto out; + } data[n].worker = worker; } @@ -397,8 +401,10 @@ static int live_parallel_switch(void *arg) } } - if (igt_live_test_end(&t)) - err = -EIO; + if (igt_live_test_end(&t)) { + err = err ?: -EIO; + break; + } } out: From bf15bb38ec7f4ff522da5c20e1673dbda7159938 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 6 Jun 2023 10:12:53 -0700 Subject: [PATCH 518/934] ice: make writes to /dev/gnssX synchronous The current ice driver's GNSS write implementation buffers writes and works through them asynchronously in a kthread. That's bad because: - The GNSS write_raw operation is supposed to be synchronous[1][2]. - There is no upper bound on the number of pending writes. Userspace can submit writes much faster than the driver can process, consuming unlimited amounts of kernel memory. A patch that's currently on review[3] ("[v3,net] ice: Write all GNSS buffers instead of first one") would add one more problem: - The possibility of waiting for a very long time to flush the write work when doing rmmod, softlockups. To fix these issues, simplify the implementation: Drop the buffering, the write_work, and make the writes synchronous. I tested this with gpsd and ubxtool. [1] https://events19.linuxfoundation.org/wp-content/uploads/2017/12/The-GNSS-Subsystem-Johan-Hovold-Hovold-Consulting-AB.pdf "User interface" slide. [2] A comment in drivers/gnss/core.c:gnss_write(): /* Ignoring O_NONBLOCK, write_raw() is synchronous. */ [3] https://patchwork.ozlabs.org/project/intel-wired-lan/patch/20230217120541.16745-1-karol.kolacinski@intel.com/ Fixes: d6b98c8d242a ("ice: add write functionality for GNSS TTY") Signed-off-by: Michal Schmidt Reviewed-by: Simon Horman Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_common.c | 2 +- drivers/net/ethernet/intel/ice/ice_common.h | 2 +- drivers/net/ethernet/intel/ice/ice_gnss.c | 64 ++------------------- drivers/net/ethernet/intel/ice/ice_gnss.h | 10 ---- 4 files changed, 6 insertions(+), 72 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 0157f6e98d3e..eb2dc0983776 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -5160,7 +5160,7 @@ ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, */ int ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, - u16 bus_addr, __le16 addr, u8 params, u8 *data, + u16 bus_addr, __le16 addr, u8 params, const u8 *data, struct ice_sq_cd *cd) { struct ice_aq_desc desc = { 0 }; diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 8ba5f935a092..81961a7d6598 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -229,7 +229,7 @@ ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, struct ice_sq_cd *cd); int ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, - u16 bus_addr, __le16 addr, u8 params, u8 *data, + u16 bus_addr, __le16 addr, u8 params, const u8 *data, struct ice_sq_cd *cd); bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index 2ea8a2b11bcd..bd0ed155e11b 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -16,8 +16,8 @@ * * number of bytes written - success * * negative - error code */ -static unsigned int -ice_gnss_do_write(struct ice_pf *pf, unsigned char *buf, unsigned int size) +static int +ice_gnss_do_write(struct ice_pf *pf, const unsigned char *buf, unsigned int size) { struct ice_aqc_link_topo_addr link_topo; struct ice_hw *hw = &pf->hw; @@ -72,39 +72,7 @@ err_out: dev_err(ice_pf_to_dev(pf), "GNSS failed to write, offset=%u, size=%u, err=%d\n", offset, size, err); - return offset; -} - -/** - * ice_gnss_write_pending - Write all pending data to internal GNSS - * @work: GNSS write work structure - */ -static void ice_gnss_write_pending(struct kthread_work *work) -{ - struct gnss_serial *gnss = container_of(work, struct gnss_serial, - write_work); - struct ice_pf *pf = gnss->back; - - if (!pf) - return; - - if (!test_bit(ICE_FLAG_GNSS, pf->flags)) - return; - - if (!list_empty(&gnss->queue)) { - struct gnss_write_buf *write_buf = NULL; - unsigned int bytes; - - write_buf = list_first_entry(&gnss->queue, - struct gnss_write_buf, queue); - - bytes = ice_gnss_do_write(pf, write_buf->buf, write_buf->size); - dev_dbg(ice_pf_to_dev(pf), "%u bytes written to GNSS\n", bytes); - - list_del(&write_buf->queue); - kfree(write_buf->buf); - kfree(write_buf); - } + return err; } /** @@ -220,8 +188,6 @@ static struct gnss_serial *ice_gnss_struct_init(struct ice_pf *pf) pf->gnss_serial = gnss; kthread_init_delayed_work(&gnss->read_work, ice_gnss_read); - INIT_LIST_HEAD(&gnss->queue); - kthread_init_work(&gnss->write_work, ice_gnss_write_pending); kworker = kthread_create_worker(0, "ice-gnss-%s", dev_name(dev)); if (IS_ERR(kworker)) { kfree(gnss); @@ -281,7 +247,6 @@ static void ice_gnss_close(struct gnss_device *gdev) if (!gnss) return; - kthread_cancel_work_sync(&gnss->write_work); kthread_cancel_delayed_work_sync(&gnss->read_work); } @@ -300,10 +265,7 @@ ice_gnss_write(struct gnss_device *gdev, const unsigned char *buf, size_t count) { struct ice_pf *pf = gnss_get_drvdata(gdev); - struct gnss_write_buf *write_buf; struct gnss_serial *gnss; - unsigned char *cmd_buf; - int err = count; /* We cannot write a single byte using our I2C implementation. */ if (count <= 1 || count > ICE_GNSS_TTY_WRITE_BUF) @@ -319,24 +281,7 @@ ice_gnss_write(struct gnss_device *gdev, const unsigned char *buf, if (!gnss) return -ENODEV; - cmd_buf = kcalloc(count, sizeof(*buf), GFP_KERNEL); - if (!cmd_buf) - return -ENOMEM; - - memcpy(cmd_buf, buf, count); - write_buf = kzalloc(sizeof(*write_buf), GFP_KERNEL); - if (!write_buf) { - kfree(cmd_buf); - return -ENOMEM; - } - - write_buf->buf = cmd_buf; - write_buf->size = count; - INIT_LIST_HEAD(&write_buf->queue); - list_add_tail(&write_buf->queue, &gnss->queue); - kthread_queue_work(gnss->kworker, &gnss->write_work); - - return err; + return ice_gnss_do_write(pf, buf, count); } static const struct gnss_operations ice_gnss_ops = { @@ -432,7 +377,6 @@ void ice_gnss_exit(struct ice_pf *pf) if (pf->gnss_serial) { struct gnss_serial *gnss = pf->gnss_serial; - kthread_cancel_work_sync(&gnss->write_work); kthread_cancel_delayed_work_sync(&gnss->read_work); kthread_destroy_worker(gnss->kworker); gnss->kworker = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h index b8bb8b63d081..75e567ad7059 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.h +++ b/drivers/net/ethernet/intel/ice/ice_gnss.h @@ -22,26 +22,16 @@ */ #define ICE_GNSS_UBX_WRITE_BYTES (ICE_MAX_I2C_WRITE_BYTES + 1) -struct gnss_write_buf { - struct list_head queue; - unsigned int size; - unsigned char *buf; -}; - /** * struct gnss_serial - data used to initialize GNSS TTY port * @back: back pointer to PF * @kworker: kwork thread for handling periodic work * @read_work: read_work function for handling GNSS reads - * @write_work: write_work function for handling GNSS writes - * @queue: write buffers queue */ struct gnss_serial { struct ice_pf *back; struct kthread_worker *kworker; struct kthread_delayed_work read_work; - struct kthread_work write_work; - struct list_head queue; }; #if IS_ENABLED(CONFIG_GNSS) From ca0aa17f2db3468fd017038d23a78e17388e2f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 9 Mar 2023 10:58:19 +0100 Subject: [PATCH 519/934] i2c: sprd: Delete i2c adapter in .remove's error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If pm runtime resume fails the .remove callback used to exit early. This resulted in an error message by the driver core but the device gets removed anyhow. This lets the registered i2c adapter stay around with an unbound parent device. So only skip clk disabling if resume failed, but do delete the adapter. Fixes: 8b9ec0719834 ("i2c: Add Spreadtrum I2C controller driver") Signed-off-by: Uwe Kleine-König Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sprd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 4fe15cd78907..ffc54fbf814d 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -576,12 +576,14 @@ static int sprd_i2c_remove(struct platform_device *pdev) struct sprd_i2c *i2c_dev = platform_get_drvdata(pdev); int ret; - ret = pm_runtime_resume_and_get(i2c_dev->dev); + ret = pm_runtime_get_sync(i2c_dev->dev); if (ret < 0) - return ret; + dev_err(&pdev->dev, "Failed to resume device (%pe)\n", ERR_PTR(ret)); i2c_del_adapter(&i2c_dev->adap); - clk_disable_unprepare(i2c_dev->clk); + + if (ret >= 0) + clk_disable_unprepare(i2c_dev->clk); pm_runtime_put_noidle(i2c_dev->dev); pm_runtime_disable(i2c_dev->dev); From 886bc7d6ed3357975c5f1d3c784da96000d4bbb4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Jun 2023 11:42:33 +0000 Subject: [PATCH 520/934] net: sched: move rtm_tca_policy declaration to include file rtm_tca_policy is used from net/sched/sch_api.c and net/sched/cls_api.c, thus should be declared in an include file. This fixes the following sparse warning: net/sched/sch_api.c:1434:25: warning: symbol 'rtm_tca_policy' was not declared. Should it be static? Fixes: e331473fee3d ("net/sched: cls_api: add missing validation of netlink attributes") Signed-off-by: Eric Dumazet Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 2 ++ net/sched/cls_api.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f436688b6efc..5722931d83d4 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -127,6 +127,8 @@ static inline void qdisc_run(struct Qdisc *q) } } +extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; + /* Calculate maximal size of packet seen by hard_start_xmit routine of this device. */ diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2621550bfddc..b2432ee04f31 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -43,8 +43,6 @@ #include #include -extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; - /* The list of all installed classifier types */ static LIST_HEAD(tcf_proto_base); From de9df6c6b27e22d7bdd20107947ef3a20e687de5 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 6 Jun 2023 13:56:35 +0200 Subject: [PATCH 521/934] net: openvswitch: fix upcall counter access before allocation Currently, the per cpu upcall counters are allocated after the vport is created and inserted into the system. This could lead to the datapath accessing the counters before they are allocated resulting in a kernel Oops. Here is an example: PID: 59693 TASK: ffff0005f4f51500 CPU: 0 COMMAND: "ovs-vswitchd" #0 [ffff80000a39b5b0] __switch_to at ffffb70f0629f2f4 #1 [ffff80000a39b5d0] __schedule at ffffb70f0629f5cc #2 [ffff80000a39b650] preempt_schedule_common at ffffb70f0629fa60 #3 [ffff80000a39b670] dynamic_might_resched at ffffb70f0629fb58 #4 [ffff80000a39b680] mutex_lock_killable at ffffb70f062a1388 #5 [ffff80000a39b6a0] pcpu_alloc at ffffb70f0594460c #6 [ffff80000a39b750] __alloc_percpu_gfp at ffffb70f05944e68 #7 [ffff80000a39b760] ovs_vport_cmd_new at ffffb70ee6961b90 [openvswitch] ... PID: 58682 TASK: ffff0005b2f0bf00 CPU: 0 COMMAND: "kworker/0:3" #0 [ffff80000a5d2f40] machine_kexec at ffffb70f056a0758 #1 [ffff80000a5d2f70] __crash_kexec at ffffb70f057e2994 #2 [ffff80000a5d3100] crash_kexec at ffffb70f057e2ad8 #3 [ffff80000a5d3120] die at ffffb70f0628234c #4 [ffff80000a5d31e0] die_kernel_fault at ffffb70f062828a8 #5 [ffff80000a5d3210] __do_kernel_fault at ffffb70f056a31f4 #6 [ffff80000a5d3240] do_bad_area at ffffb70f056a32a4 #7 [ffff80000a5d3260] do_translation_fault at ffffb70f062a9710 #8 [ffff80000a5d3270] do_mem_abort at ffffb70f056a2f74 #9 [ffff80000a5d32a0] el1_abort at ffffb70f06297dac #10 [ffff80000a5d32d0] el1h_64_sync_handler at ffffb70f06299b24 #11 [ffff80000a5d3410] el1h_64_sync at ffffb70f056812dc #12 [ffff80000a5d3430] ovs_dp_upcall at ffffb70ee6963c84 [openvswitch] #13 [ffff80000a5d3470] ovs_dp_process_packet at ffffb70ee6963fdc [openvswitch] #14 [ffff80000a5d34f0] ovs_vport_receive at ffffb70ee6972c78 [openvswitch] #15 [ffff80000a5d36f0] netdev_port_receive at ffffb70ee6973948 [openvswitch] #16 [ffff80000a5d3720] netdev_frame_hook at ffffb70ee6973a28 [openvswitch] #17 [ffff80000a5d3730] __netif_receive_skb_core.constprop.0 at ffffb70f06079f90 We moved the per cpu upcall counter allocation to the existing vport alloc and free functions to solve this. Fixes: 95637d91fefd ("net: openvswitch: release vport resources on failure") Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets") Signed-off-by: Eelco Chaudron Reviewed-by: Simon Horman Acked-by: Aaron Conole Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 19 ------------------- net/openvswitch/vport.c | 18 ++++++++++++++++-- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index fcee6012293b..58f530f60172 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p) /* First drop references to device. */ hlist_del_rcu(&p->dp_hash_node); - /* Free percpu memory */ - free_percpu(p->upcall_stats); - /* Then destroy it. */ ovs_vport_del(p); } @@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_portids; } - vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); - if (!vport->upcall_stats) { - err = -ENOMEM; - goto err_destroy_vport; - } - err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_NEW); BUG_ON(err < 0); @@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_vport: - ovs_dp_detach_port(vport); err_destroy_portids: kfree(rcu_dereference_raw(dp->upcall_portids)); err_unlock_and_destroy_meters: @@ -2322,12 +2311,6 @@ restart: goto exit_unlock_free; } - vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); - if (!vport->upcall_stats) { - err = -ENOMEM; - goto exit_unlock_free_vport; - } - err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW, GFP_KERNEL); @@ -2345,8 +2328,6 @@ restart: ovs_notify(&dp_vport_genl_family, reply, info); return 0; -exit_unlock_free_vport: - ovs_dp_detach_port(vport); exit_unlock_free: ovs_unlock(); kfree_skb(reply); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 7e0f5c45b512..972ae01a70f7 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -124,6 +124,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, { struct vport *vport; size_t alloc_size; + int err; alloc_size = sizeof(struct vport); if (priv_size) { @@ -135,17 +136,29 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, if (!vport) return ERR_PTR(-ENOMEM); + vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); + if (!vport->upcall_stats) { + err = -ENOMEM; + goto err_kfree_vport; + } + vport->dp = parms->dp; vport->port_no = parms->port_no; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) { - kfree(vport); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto err_free_percpu; } return vport; + +err_free_percpu: + free_percpu(vport->upcall_stats); +err_kfree_vport: + kfree(vport); + return ERR_PTR(err); } EXPORT_SYMBOL_GPL(ovs_vport_alloc); @@ -165,6 +178,7 @@ void ovs_vport_free(struct vport *vport) * it is safe to use raw dereference. */ kfree(rcu_dereference_raw(vport->upcall_portids)); + free_percpu(vport->upcall_stats); kfree(vport); } EXPORT_SYMBOL_GPL(ovs_vport_free); From 682881ee45c81daa883dcd4fe613b0b0d988bb22 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Jun 2023 13:13:04 +0000 Subject: [PATCH 522/934] net: sched: act_police: fix sparse errors in tcf_police_dump() Fixes following sparse errors: net/sched/act_police.c:360:28: warning: dereference of noderef expression net/sched/act_police.c:362:45: warning: dereference of noderef expression net/sched/act_police.c:362:45: warning: dereference of noderef expression net/sched/act_police.c:368:28: warning: dereference of noderef expression net/sched/act_police.c:370:45: warning: dereference of noderef expression net/sched/act_police.c:370:45: warning: dereference of noderef expression net/sched/act_police.c:376:45: warning: dereference of noderef expression net/sched/act_police.c:376:45: warning: dereference of noderef expression Fixes: d1967e495a8d ("net_sched: act_police: add 2 new attributes to support police 64bit rate and peakrate") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_police.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 227cba58ce9f..2e9dce03d1ec 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -357,23 +357,23 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, opt.burst = PSCHED_NS2TICKS(p->tcfp_burst); if (p->rate_present) { psched_ratecfg_getrate(&opt.rate, &p->rate); - if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) && + if ((p->rate.rate_bytes_ps >= (1ULL << 32)) && nla_put_u64_64bit(skb, TCA_POLICE_RATE64, - police->params->rate.rate_bytes_ps, + p->rate.rate_bytes_ps, TCA_POLICE_PAD)) goto nla_put_failure; } if (p->peak_present) { psched_ratecfg_getrate(&opt.peakrate, &p->peak); - if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) && + if ((p->peak.rate_bytes_ps >= (1ULL << 32)) && nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64, - police->params->peak.rate_bytes_ps, + p->peak.rate_bytes_ps, TCA_POLICE_PAD)) goto nla_put_failure; } if (p->pps_present) { if (nla_put_u64_64bit(skb, TCA_POLICE_PKTRATE64, - police->params->ppsrate.rate_pkts_ps, + p->ppsrate.rate_pkts_ps, TCA_POLICE_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(skb, TCA_POLICE_PKTBURST64, From 44f8baaf230c655c249467ca415b570deca8df77 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Wed, 7 Jun 2023 10:23:01 +0800 Subject: [PATCH 523/934] net: sched: fix possible refcount leak in tc_chain_tmplt_add() try_module_get will be called in tcf_proto_lookup_ops. So module_put needs to be called to drop the refcount if ops don't implement the required function. Fixes: 9f407f1768d3 ("net: sched: introduce chain templates") Signed-off-by: Hangyu Hua Reviewed-by: Larysa Zaremba Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b2432ee04f31..c877a6343fd4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2950,6 +2950,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier"); + module_put(ops->owner); return -EOPNOTSUPP; } From d7459efc9276a715fe9def401bc30045aeb9668a Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Wed, 7 Jun 2023 14:50:04 +0800 Subject: [PATCH 524/934] gpio: sim: quietly ignore configured lines outside the bank The user-space policy of the gpio-sim is that configuration for lines with offsets outside the bounds of the corresponding bank is ignored, but gpio-sim is still using that configuration when constructing the sim. In the case of named lines this results in temporarily allocating space for names that are not used, and for hogs results in errors being logged when the gpio-sim attempts to register the out of range hog with gpiolib: gpiochip_machine_hog: unable to get GPIO desc: -22 Add checks to filter out any line configuration outside the bounds of the bank when constructing the sim. Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sim.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index fab67a5785d7..8b49b0abacd5 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -696,6 +696,9 @@ static char **gpio_sim_make_line_names(struct gpio_sim_bank *bank, char **line_names; list_for_each_entry(line, &bank->line_list, siblings) { + if (line->offset >= bank->num_lines) + continue; + if (line->name) { if (line->offset > max_offset) max_offset = line->offset; @@ -722,6 +725,9 @@ static char **gpio_sim_make_line_names(struct gpio_sim_bank *bank, return ERR_PTR(-ENOMEM); list_for_each_entry(line, &bank->line_list, siblings) { + if (line->offset >= bank->num_lines) + continue; + if (line->name && (line->offset <= max_offset)) line_names[line->offset] = line->name; } @@ -756,6 +762,9 @@ static int gpio_sim_add_hogs(struct gpio_sim_device *dev) list_for_each_entry(bank, &dev->bank_list, siblings) { list_for_each_entry(line, &bank->line_list, siblings) { + if (line->offset >= bank->num_lines) + continue; + if (line->hog) num_hogs++; } @@ -771,6 +780,9 @@ static int gpio_sim_add_hogs(struct gpio_sim_device *dev) list_for_each_entry(bank, &dev->bank_list, siblings) { list_for_each_entry(line, &bank->line_list, siblings) { + if (line->offset >= bank->num_lines) + continue; + if (!line->hog) continue; From d1f11f41eb746a33816695f1b6b6719826cc532c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 5 Jun 2023 16:53:08 +0200 Subject: [PATCH 525/934] MAINTAINERS: add Andy Shevchenko as reviewer for the GPIO subsystem Andy has been a de-facto reviewer for all things GPIO for a long time so let's make it official. Signed-off-by: Bartosz Golaszewski Acked-by: Andy Shevchenko Reviewed-by: Linus Walleij --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0dab9737ec16..98f8256b08a7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8791,6 +8791,7 @@ F: include/linux/gpio/regmap.h GPIO SUBSYSTEM M: Linus Walleij M: Bartosz Golaszewski +R: Andy Shevchenko L: linux-gpio@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git From f46fab0e36e611a2389d3843f34658c849b6bd60 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 6 Jun 2023 11:17:14 -0700 Subject: [PATCH 526/934] bpf: Add extra path pointer check to d_path helper Anastasios reported crash on stable 5.15 kernel with following BPF attached to lsm hook: SEC("lsm.s/bprm_creds_for_exec") int BPF_PROG(bprm_creds_for_exec, struct linux_binprm *bprm) { struct path *path = &bprm->executable->f_path; char p[128] = { 0 }; bpf_d_path(path, p, 128); return 0; } But bprm->executable can be NULL, so bpf_d_path call will crash: BUG: kernel NULL pointer dereference, address: 0000000000000018 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC NOPTI ... RIP: 0010:d_path+0x22/0x280 ... Call Trace: bpf_d_path+0x21/0x60 bpf_prog_db9cf176e84498d9_bprm_creds_for_exec+0x94/0x99 bpf_trampoline_6442506293_0+0x55/0x1000 bpf_lsm_bprm_creds_for_exec+0x5/0x10 security_bprm_creds_for_exec+0x29/0x40 bprm_execve+0x1c1/0x900 do_execveat_common.isra.0+0x1af/0x260 __x64_sys_execve+0x32/0x40 It's problem for all stable trees with bpf_d_path helper, which was added in 5.9. This issue is fixed in current bpf code, where we identify and mark trusted pointers, so the above code would fail even to load. For the sake of the stable trees and to workaround potentially broken verifier in the future, adding the code that reads the path object from the passed pointer and verifies it's valid in kernel space. Fixes: 6e22ab9da793 ("bpf: Add d_path helper") Reported-by: Anastasios Papagiannis Suggested-by: Alexei Starovoitov Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Stanislav Fomichev Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20230606181714.532998-1-jolsa@kernel.org --- kernel/trace/bpf_trace.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9a050e36dc6c..1f4b07da327a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -900,13 +900,23 @@ static const struct bpf_func_proto bpf_send_signal_thread_proto = { BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz) { + struct path copy; long len; char *p; if (!sz) return 0; - p = d_path(path, buf, sz); + /* + * The path pointer is verified as trusted and safe to use, + * but let's double check it's valid anyway to workaround + * potentially broken verifier. + */ + len = copy_from_kernel_nofault(©, path, sizeof(*path)); + if (len < 0) + return len; + + p = d_path(©, buf, sz); if (IS_ERR(p)) { len = PTR_ERR(p); } else { From b00de0000a69579f4d730077fe3ea8ca31404255 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Mon, 5 Jun 2023 14:56:07 +0300 Subject: [PATCH 527/934] regulator: qcom-rpmh: Fix regulators for PM8550 The PM8550 uses only NLDOs 515 and the LDO 6 through 8 are low voltage type, so fix accordingly. Fixes: e6e3776d682d ("regulator: qcom-rpmh: Add support for PM8550 regulators") Signed-off-by: Abel Vesa Link: https://lore.kernel.org/r/20230605115607.921308-1-abel.vesa@linaro.org Signed-off-by: Mark Brown --- drivers/regulator/qcom-rpmh-regulator.c | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index b0a58c62b1e2..f3b280af0773 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -1057,21 +1057,21 @@ static const struct rpmh_vreg_init_data pm8450_vreg_data[] = { }; static const struct rpmh_vreg_init_data pm8550_vreg_data[] = { - RPMH_VREG("ldo1", "ldo%s1", &pmic5_pldo, "vdd-l1-l4-l10"), + RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo515, "vdd-l1-l4-l10"), RPMH_VREG("ldo2", "ldo%s2", &pmic5_pldo, "vdd-l2-l13-l14"), - RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"), - RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo, "vdd-l1-l4-l10"), + RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo515, "vdd-l3"), + RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo515, "vdd-l1-l4-l10"), RPMH_VREG("ldo5", "ldo%s5", &pmic5_pldo, "vdd-l5-l16"), - RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo_lv, "vdd-l6-l7"), - RPMH_VREG("ldo7", "ldo%s7", &pmic5_pldo_lv, "vdd-l6-l7"), - RPMH_VREG("ldo8", "ldo%s8", &pmic5_pldo_lv, "vdd-l8-l9"), + RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo, "vdd-l6-l7"), + RPMH_VREG("ldo7", "ldo%s7", &pmic5_pldo, "vdd-l6-l7"), + RPMH_VREG("ldo8", "ldo%s8", &pmic5_pldo, "vdd-l8-l9"), RPMH_VREG("ldo9", "ldo%s9", &pmic5_pldo, "vdd-l8-l9"), - RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo, "vdd-l1-l4-l10"), - RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo, "vdd-l11"), + RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo515, "vdd-l1-l4-l10"), + RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo515, "vdd-l11"), RPMH_VREG("ldo12", "ldo%s12", &pmic5_pldo, "vdd-l12"), RPMH_VREG("ldo13", "ldo%s13", &pmic5_pldo, "vdd-l2-l13-l14"), RPMH_VREG("ldo14", "ldo%s14", &pmic5_pldo, "vdd-l2-l13-l14"), - RPMH_VREG("ldo15", "ldo%s15", &pmic5_pldo, "vdd-l15"), + RPMH_VREG("ldo15", "ldo%s15", &pmic5_nldo515, "vdd-l15"), RPMH_VREG("ldo16", "ldo%s16", &pmic5_pldo, "vdd-l5-l16"), RPMH_VREG("ldo17", "ldo%s17", &pmic5_pldo, "vdd-l17"), RPMH_VREG("bob1", "bob%s1", &pmic5_bob, "vdd-bob1"), @@ -1086,9 +1086,9 @@ static const struct rpmh_vreg_init_data pm8550vs_vreg_data[] = { RPMH_VREG("smps4", "smp%s4", &pmic5_ftsmps525_lv, "vdd-s4"), RPMH_VREG("smps5", "smp%s5", &pmic5_ftsmps525_lv, "vdd-s5"), RPMH_VREG("smps6", "smp%s6", &pmic5_ftsmps525_mv, "vdd-s6"), - RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo, "vdd-l1"), - RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo, "vdd-l2"), - RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"), + RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo515, "vdd-l1"), + RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo515, "vdd-l2"), + RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo515, "vdd-l3"), {} }; @@ -1101,9 +1101,9 @@ static const struct rpmh_vreg_init_data pm8550ve_vreg_data[] = { RPMH_VREG("smps6", "smp%s6", &pmic5_ftsmps525_lv, "vdd-s6"), RPMH_VREG("smps7", "smp%s7", &pmic5_ftsmps525_lv, "vdd-s7"), RPMH_VREG("smps8", "smp%s8", &pmic5_ftsmps525_lv, "vdd-s8"), - RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo, "vdd-l1"), - RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo, "vdd-l2"), - RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"), + RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo515, "vdd-l1"), + RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo515, "vdd-l2"), + RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo515, "vdd-l3"), {} }; From 6569fc12e442ea973d96db39e542aa19a7bc3a79 Mon Sep 17 00:00:00 2001 From: Hsieh-Tseng Shen Date: Tue, 25 Apr 2023 18:28:28 +0800 Subject: [PATCH 528/934] riscv: mm: Ensure prot of VM_WRITE and VM_EXEC must be readable Commit 8aeb7b17f04e ("RISC-V: Make mmap() with PROT_WRITE imply PROT_READ") allows riscv to use mmap with PROT_WRITE only, and meanwhile mmap with w+x is also permitted. However, when userspace tries to access this page with PROT_WRITE|PROT_EXEC, which causes infinite loop at load page fault as well as it triggers soft lockup. According to riscv privileged spec, "Writable pages must also be marked readable". The fix to drop the `PAGE_COPY_READ_EXEC` and then `PAGE_COPY_EXEC` would be just used instead. This aligns the other arches (i.e arm64) for protection_map. Fixes: 8aeb7b17f04e ("RISC-V: Make mmap() with PROT_WRITE imply PROT_READ") Signed-off-by: Hsieh-Tseng Shen Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230425102828.1616812-1-woodrow.shen@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 3 +-- arch/riscv/mm/init.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 2258b27173b0..75970ee2bda2 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -165,8 +165,7 @@ extern struct pt_alloc_ops pt_ops __initdata; _PAGE_EXEC | _PAGE_WRITE) #define PAGE_COPY PAGE_READ -#define PAGE_COPY_EXEC PAGE_EXEC -#define PAGE_COPY_READ_EXEC PAGE_READ_EXEC +#define PAGE_COPY_EXEC PAGE_READ_EXEC #define PAGE_SHARED PAGE_WRITE #define PAGE_SHARED_EXEC PAGE_WRITE_EXEC diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index c6bb966e4123..0fe75c9713f8 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -293,7 +293,7 @@ static const pgprot_t protection_map[16] = { [VM_EXEC] = PAGE_EXEC, [VM_EXEC | VM_READ] = PAGE_READ_EXEC, [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC, - [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_READ_EXEC, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC, [VM_SHARED] = PAGE_NONE, [VM_SHARED | VM_READ] = PAGE_READ, [VM_SHARED | VM_WRITE] = PAGE_SHARED, From 25abe0db92437fde463204c3e4eb5a71b62d6e5d Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 6 Jun 2023 15:04:44 +0200 Subject: [PATCH 529/934] riscv: Fix kfence now that the linear mapping can be backed by PUD/P4D/PGD RISC-V Kfence implementation used to rely on the fact the linear mapping was backed by at most PMD hugepages, which is not true anymore since commit 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping"). Instead of splitting PUD/P4D/PGD mappings afterwards, directly map the kfence pool region using PTE mappings by allocating this region before setup_vm_final(). Reported-by: syzbot+a74d57bddabbedd75135@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a74d57bddabbedd75135 Fixes: 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping") Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230606130444.25090-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kfence.h | 33 ------------------------------- arch/riscv/mm/init.c | 35 ++++++++++++++++++++++++++++----- 2 files changed, 30 insertions(+), 38 deletions(-) diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h index d887a54042aa..0bbffd528096 100644 --- a/arch/riscv/include/asm/kfence.h +++ b/arch/riscv/include/asm/kfence.h @@ -8,41 +8,8 @@ #include #include -static inline int split_pmd_page(unsigned long addr) -{ - int i; - unsigned long pfn = PFN_DOWN(__pa((addr & PMD_MASK))); - pmd_t *pmd = pmd_off_k(addr); - pte_t *pte = pte_alloc_one_kernel(&init_mm); - - if (!pte) - return -ENOMEM; - - for (i = 0; i < PTRS_PER_PTE; i++) - set_pte(pte + i, pfn_pte(pfn + i, PAGE_KERNEL)); - set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(pte)), PAGE_TABLE)); - - flush_tlb_kernel_range(addr, addr + PMD_SIZE); - return 0; -} - static inline bool arch_kfence_init_pool(void) { - int ret; - unsigned long addr; - pmd_t *pmd; - - for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr); - addr += PAGE_SIZE) { - pmd = pmd_off_k(addr); - - if (pmd_leaf(*pmd)) { - ret = split_pmd_page(addr); - if (ret) - return false; - } - } - return true; } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 0fe75c9713f8..38c4b4d6b64f 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -23,6 +23,7 @@ #ifdef CONFIG_RELOCATABLE #include #endif +#include #include #include @@ -1167,14 +1168,16 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) } static void __init create_linear_mapping_range(phys_addr_t start, - phys_addr_t end) + phys_addr_t end, + uintptr_t fixed_map_size) { phys_addr_t pa; uintptr_t va, map_size; for (pa = start; pa < end; pa += map_size) { va = (uintptr_t)__va(pa); - map_size = best_map_size(pa, end - pa); + map_size = fixed_map_size ? fixed_map_size : + best_map_size(pa, end - pa); create_pgd_mapping(swapper_pg_dir, va, pa, map_size, pgprot_from_va(va)); @@ -1184,6 +1187,7 @@ static void __init create_linear_mapping_range(phys_addr_t start, static void __init create_linear_mapping_page_table(void) { phys_addr_t start, end; + phys_addr_t kfence_pool __maybe_unused; u64 i; #ifdef CONFIG_STRICT_KERNEL_RWX @@ -1197,6 +1201,19 @@ static void __init create_linear_mapping_page_table(void) memblock_mark_nomap(krodata_start, krodata_size); #endif +#ifdef CONFIG_KFENCE + /* + * kfence pool must be backed by PAGE_SIZE mappings, so allocate it + * before we setup the linear mapping so that we avoid using hugepages + * for this region. + */ + kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); + BUG_ON(!kfence_pool); + + memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE); + __kfence_pool = __va(kfence_pool); +#endif + /* Map all memory banks in the linear mapping */ for_each_mem_range(i, &start, &end) { if (start >= end) @@ -1207,17 +1224,25 @@ static void __init create_linear_mapping_page_table(void) if (end >= __pa(PAGE_OFFSET) + memory_limit) end = __pa(PAGE_OFFSET) + memory_limit; - create_linear_mapping_range(start, end); + create_linear_mapping_range(start, end, 0); } #ifdef CONFIG_STRICT_KERNEL_RWX - create_linear_mapping_range(ktext_start, ktext_start + ktext_size); + create_linear_mapping_range(ktext_start, ktext_start + ktext_size, 0); create_linear_mapping_range(krodata_start, - krodata_start + krodata_size); + krodata_start + krodata_size, 0); memblock_clear_nomap(ktext_start, ktext_size); memblock_clear_nomap(krodata_start, krodata_size); #endif + +#ifdef CONFIG_KFENCE + create_linear_mapping_range(kfence_pool, + kfence_pool + KFENCE_POOL_SIZE, + PAGE_SIZE); + + memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); +#endif } static void __init setup_vm_final(void) From 49a0a3731596fc004db6eec3fc674d92a09ef383 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 7 Jun 2023 14:58:51 +0200 Subject: [PATCH 530/934] riscv: Check the virtual alignment before choosing a map size We used to only check the alignment of the physical address to decide which mapping would fit for a certain region of the linear mapping, but it is not enough since the virtual address must also be aligned, so check that too. Fixes: 3335068f8721 ("riscv: Use PUD/P4D/PGD pages for the linear mapping") Reported-by: Song Shuai Link: https://lore.kernel.org/linux-riscv/tencent_7C3B580B47C1B17C16488EC1@qq.com/ Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230607125851.63370-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 38c4b4d6b64f..4fa420faa780 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -660,18 +660,19 @@ void __init create_pgd_mapping(pgd_t *pgdp, create_pgd_next_mapping(nextp, va, pa, sz, prot); } -static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) +static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va, + phys_addr_t size) { - if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE) + if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE) return PGDIR_SIZE; - if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE) + if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE) return P4D_SIZE; - if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE) + if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE) return PUD_SIZE; - if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE) + if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE) return PMD_SIZE; return PAGE_SIZE; @@ -1177,7 +1178,7 @@ static void __init create_linear_mapping_range(phys_addr_t start, for (pa = start; pa < end; pa += map_size) { va = (uintptr_t)__va(pa); map_size = fixed_map_size ? fixed_map_size : - best_map_size(pa, end - pa); + best_map_size(pa, va, end - pa); create_pgd_mapping(swapper_pg_dir, va, pa, map_size, pgprot_from_va(va)); From 1caa71a7a600f7781ce05ef1e84701c459653663 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 7 Jun 2023 15:38:44 +0100 Subject: [PATCH 531/934] KVM: arm64: Restore GICv2-on-GICv3 functionality When reworking the vgic locking, the vgic distributor registration got simplified, which was a very good cleanup. But just a tad too radical, as we now register the *native* vgic only, ignoring the GICv2-on-GICv3 that allows pre-historic VMs (or so I thought) to run. As it turns out, QEMU still defaults to GICv2 in some cases, and this breaks Nathan's setup! Fix it by propagating the *requested* vgic type rather than the host's version. Fixes: 59112e9c390b ("KVM: arm64: vgic: Fix a circular locking issue") Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Marc Zyngier link: https://lore.kernel.org/r/20230606221525.GA2269598@dev-arch.thelio-3990X --- arch/arm64/kvm/vgic/vgic-init.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 6eafc2c45cfc..c8c3cb812783 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -446,6 +446,7 @@ int vgic_lazy_init(struct kvm *kvm) int kvm_vgic_map_resources(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + enum vgic_type type; gpa_t dist_base; int ret = 0; @@ -460,10 +461,13 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (!irqchip_in_kernel(kvm)) goto out; - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { ret = vgic_v2_map_resources(kvm); - else + type = VGIC_V2; + } else { ret = vgic_v3_map_resources(kvm); + type = VGIC_V3; + } if (ret) { __kvm_vgic_destroy(kvm); @@ -473,8 +477,7 @@ int kvm_vgic_map_resources(struct kvm *kvm) dist_base = dist->vgic_dist_base; mutex_unlock(&kvm->arch.config_lock); - ret = vgic_register_dist_iodev(kvm, dist_base, - kvm_vgic_global_state.type); + ret = vgic_register_dist_iodev(kvm, dist_base, type); if (ret) { kvm_err("Unable to register VGIC dist MMIO regions\n"); kvm_vgic_destroy(kvm); From 30c60dda219ddda0bc6ff6ac55d493d9db8be4fa Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 6 Jun 2023 18:48:14 +0000 Subject: [PATCH 532/934] KVM: arm64: Use raw_smp_processor_id() in kvm_pmu_probe_armpmu() Sebastian reports that commit 1c913a1c35aa ("KVM: arm64: Iterate arm_pmus list to probe for default PMU") introduced the following splat with CONFIG_DEBUG_PREEMPT enabled: [70506.110187] BUG: using smp_processor_id() in preemptible [00000000] code: qemu-system-aar/3078242 [70506.119077] caller is debug_smp_processor_id+0x20/0x30 [70506.124229] CPU: 129 PID: 3078242 Comm: qemu-system-aar Tainted: G W 6.4.0-rc5 #25 [70506.133176] Hardware name: GIGABYTE R181-T92-00/MT91-FS4-00, BIOS F34 08/13/2020 [70506.140559] Call trace: [70506.142993] dump_backtrace+0xa4/0x130 [70506.146737] show_stack+0x20/0x38 [70506.150040] dump_stack_lvl+0x48/0x60 [70506.153704] dump_stack+0x18/0x28 [70506.157007] check_preemption_disabled+0xe4/0x108 [70506.161701] debug_smp_processor_id+0x20/0x30 [70506.166046] kvm_arm_pmu_v3_set_attr+0x460/0x628 [70506.170662] kvm_arm_vcpu_arch_set_attr+0x88/0xd8 [70506.175363] kvm_arch_vcpu_ioctl+0x258/0x4a8 [70506.179632] kvm_vcpu_ioctl+0x32c/0x6b8 [70506.183465] __arm64_sys_ioctl+0xb4/0x100 [70506.187467] invoke_syscall+0x78/0x108 [70506.191205] el0_svc_common.constprop.0+0x4c/0x100 [70506.195984] do_el0_svc+0x34/0x50 [70506.199287] el0_svc+0x34/0x108 [70506.202416] el0t_64_sync_handler+0xf4/0x120 [70506.206674] el0t_64_sync+0x194/0x198 Fix the issue by using the raw variant that bypasses the debug assertion. While at it, stick all of the nuance and UAPI baggage into a comment for posterity. Fixes: 1c913a1c35aa ("KVM: arm64: Iterate arm_pmus list to probe for default PMU") Reported-by: Sebastian Ott Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230606184814.456743-1-oliver.upton@linux.dev --- arch/arm64/kvm/pmu-emul.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 491ca7eb2a4c..560650972478 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -700,7 +700,25 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void) mutex_lock(&arm_pmus_lock); - cpu = smp_processor_id(); + /* + * It is safe to use a stale cpu to iterate the list of PMUs so long as + * the same value is used for the entirety of the loop. Given this, and + * the fact that no percpu data is used for the lookup there is no need + * to disable preemption. + * + * It is still necessary to get a valid cpu, though, to probe for the + * default PMU instance as userspace is not required to specify a PMU + * type. In order to uphold the preexisting behavior KVM selects the + * PMU instance for the core where the first call to the + * KVM_ARM_VCPU_PMU_V3_CTRL attribute group occurs. A dependent use case + * would be a user with disdain of all things big.LITTLE that affines + * the VMM to a particular cluster of cores. + * + * In any case, userspace should just do the sane thing and use the UAPI + * to select a PMU type directly. But, be wary of the baggage being + * carried here. + */ + cpu = raw_smp_processor_id(); list_for_each_entry(entry, &arm_pmus, entry) { tmp = entry->arm_pmu; From a27648c742104a833a01c54becc24429898d85bf Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Jun 2023 09:47:13 +0100 Subject: [PATCH 533/934] afs: Fix setting of mtime when creating a file/dir/symlink kafs incorrectly passes a zero mtime (ie. 1st Jan 1970) to the server when creating a file, dir or symlink because the mtime recorded in the afs_operation struct gets passed to the server by the marshalling routines, but the afs_mkdir(), afs_create() and afs_symlink() functions don't set it. This gets masked if a file or directory is subsequently modified. Fix this by filling in op->mtime before calling the create op. Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept") Signed-off-by: David Howells Reviewed-by: Jeffrey Altman Reviewed-by: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/afs/dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 4dd97afa536c..5219182e52e1 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1358,6 +1358,7 @@ static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir, op->dentry = dentry; op->create.mode = S_IFDIR | mode; op->create.reason = afs_edit_dir_for_mkdir; + op->mtime = current_time(dir); op->ops = &afs_mkdir_operation; return afs_do_sync_operation(op); } @@ -1661,6 +1662,7 @@ static int afs_create(struct mnt_idmap *idmap, struct inode *dir, op->dentry = dentry; op->create.mode = S_IFREG | mode; op->create.reason = afs_edit_dir_for_create; + op->mtime = current_time(dir); op->ops = &afs_create_operation; return afs_do_sync_operation(op); @@ -1796,6 +1798,7 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir, op->ops = &afs_symlink_operation; op->create.reason = afs_edit_dir_for_symlink; op->create.symlink = content; + op->mtime = current_time(dir); return afs_do_sync_operation(op); error: From 44b902e935be111fc98eb2a2d8e3d08196fd52ac Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Mon, 8 May 2023 11:08:10 -0500 Subject: [PATCH 534/934] dt-bindings: net: realtek-bluetooth: Fix RTL8821CS binding Update the fallback string for the RTL8821CS from realtek,rtl8822cs-bt to realtek,rtl8723bs-bt. The difference between these two strings is that the 8822cs enables power saving features that the 8723bs does not, and in testing the 8821cs seems to have issues with these power saving modes enabled. Fixes: 95ee3a93239e ("dt-bindings: net: realtek-bluetooth: Add RTL8821CS") Signed-off-by: Chris Morgan Acked-by: Krzysztof Kozlowski Acked-by: Heiko Stuebner Link: https://lore.kernel.org/r/20230508160811.3568213-2-macroalpha82@gmail.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/net/realtek-bluetooth.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml index 8cc2b9924680..506ea9b17668 100644 --- a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml +++ b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml @@ -27,7 +27,7 @@ properties: - items: - enum: - realtek,rtl8821cs-bt - - const: realtek,rtl8822cs-bt + - const: realtek,rtl8723bs-bt device-wake-gpios: maxItems: 1 From 2a567c1e29301495affa220543f156dd6dad50a9 Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Tue, 9 May 2023 16:15:01 +0200 Subject: [PATCH 535/934] dt-bindings: net: realtek-bluetooth: Fix double RTL8723CS in desc The description says 'RTL8723CS/RTL8723CS/...' whereas the title and other places reference 'RTL8723BS/RTL8723CS/...'. Signed-off-by: Diederik de Haas Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230509141500.275887-1-didi.debian@cknow.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/net/realtek-bluetooth.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml index 506ea9b17668..043e118c605c 100644 --- a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml +++ b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml @@ -11,7 +11,7 @@ maintainers: - Alistair Francis description: - RTL8723CS/RTL8723CS/RTL8821CS/RTL8822CS is a WiFi + BT chip. WiFi part + RTL8723BS/RTL8723CS/RTL8821CS/RTL8822CS is a WiFi + BT chip. WiFi part is connected over SDIO, while BT is connected over serial. It speaks H5 protocol with few extra commands to upload firmware and change module speed. From 25bda386c3d526b48dd22186324570fc8e191a52 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 7 Jun 2023 01:14:05 +0200 Subject: [PATCH 536/934] MAINTAINERS: Add entry for debug objects This is overdue and an oversight. Add myself to this file deespite the fact that I'm trying to reduce the number of entries in this file which have my name attached, but in the hope that patches wont get picked up elsewhere completely unreviewed and unnoticed. Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0dab9737ec16..a2f1d14031b8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5728,6 +5728,14 @@ F: include/linux/tfrc.h F: include/uapi/linux/dccp.h F: net/dccp/ +DEBUGOBJECTS: +M: Thomas Gleixner +L: linux-kernel@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/debugobjects +F: lib/debugobjects.c +F: include/linux/debugobjects.h + DECSTATION PLATFORM SUPPORT M: "Maciej W. Rozycki" L: linux-mips@vger.kernel.org From 4d8df0f5f79f747d75a7d356d9b9ea40a4e4c8a9 Mon Sep 17 00:00:00 2001 From: Prathu Baronia Date: Mon, 22 May 2023 14:20:19 +0530 Subject: [PATCH 537/934] vhost: use kzalloc() instead of kmalloc() followed by memset() Use kzalloc() to allocate new zeroed out msg node instead of memsetting a node allocated with kmalloc(). Signed-off-by: Prathu Baronia Message-Id: <20230522085019.42914-1-prathubaronia2011@gmail.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vhost/vhost.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 074273020849..ecb3b397bb38 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2563,12 +2563,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify); /* Create a new message. */ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) { - struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + /* Make sure all padding within the structure is initialized. */ + struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; - /* Make sure all padding within the structure is initialized. */ - memset(&node->msg, 0, sizeof node->msg); node->vq = vq; node->msg.type = type; return node; From de29a96acceae732c68a4094d08dc49079eefa02 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 26 May 2023 15:35:37 +0800 Subject: [PATCH 538/934] notifier: Initialize new struct srcu_usage field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 95433f726301 ("srcu: Begin offloading srcu_struct fields to srcu_update"), a new struct srcu_usage field was added, but was not properly initialized. This led to a "spinlock bad magic" BUG when the SRCU notifier was ever used. This was observed in the MediaTek CCI devfreq driver on next-20230525. The trimmed stack trace is as follows: BUG: spinlock bad magic on CPU#4, swapper/0/1 lock: 0xffffff80ff529ac0, .magic: 00000000, .owner: /-1, .owner_cpu: 0 Call trace: spin_bug+0xa4/0xe8 do_raw_spin_lock+0xec/0x120 _raw_spin_lock_irqsave+0x78/0xb8 synchronize_srcu+0x3c/0x168 srcu_notifier_chain_unregister+0x5c/0xa0 cpufreq_unregister_notifier+0x94/0xe0 devfreq_passive_event_handler+0x7c/0x3e0 devfreq_remove_device+0x48/0xe8 Add __SRCU_USAGE_INIT() to SRCU_NOTIFIER_INIT() so that srcu_usage gets initialized properly. Reported-by: Jon Hunter Fixes: 95433f726301 ("srcu: Begin offloading srcu_struct fields to srcu_update") Signed-off-by: Chen-Yu Tsai Tested-by: AngeloGioacchino Del Regno Cc: Matthias Brugger Cc: "Rafael J. Wysocki" Cc: "Michał Mirosław" Cc: Dmitry Osipenko Cc: Sachin Sant Cc: Joel Fernandes (Google) Acked-by: Zqiang Signed-off-by: Paul E. McKenney --- include/linux/notifier.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 2aba75145144..86544707236a 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -106,12 +106,22 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); #define RAW_NOTIFIER_INIT(name) { \ .head = NULL } +#ifdef CONFIG_TREE_SRCU +#define SRCU_NOTIFIER_INIT(name, pcpu) \ + { \ + .mutex = __MUTEX_INITIALIZER(name.mutex), \ + .head = NULL, \ + .srcuu = __SRCU_USAGE_INIT(name.srcuu), \ + .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ + } +#else #define SRCU_NOTIFIER_INIT(name, pcpu) \ { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ .head = NULL, \ .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \ } +#endif #define ATOMIC_NOTIFIER_HEAD(name) \ struct atomic_notifier_head name = \ From 30c3d3b70aba2464ee8c91025e91428f92464077 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 30 May 2023 11:57:59 -0500 Subject: [PATCH 539/934] drm/amd: Disallow s0ix without BIOS support again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cf488dcd0ab7 ("drm/amd: Allow s0ix without BIOS support") showed improvements to power consumption over suspend when s0ix wasn't enabled in BIOS and the system didn't support S3. This patch however was misguided because the reason the system didn't support S3 was because SMT was disabled in OEM BIOS setup. This prevented the BIOS from allowing S3. Also allowing GPUs to use the s2idle path actually causes problems if they're invoked on systems that may not support s2idle in the platform firmware. `systemd` has a tendency to try to use `s2idle` if `deep` fails for any reason, which could lead to unexpected flows. The original commit also fixed a problem during resume from suspend to idle without hardware support, but this is no longer necessary with commit ca4751866397 ("drm/amd: Don't allow s0ix on APUs older than Raven") Revert commit cf488dcd0ab7 ("drm/amd: Allow s0ix without BIOS support") to make it match the expected behavior again. Cc: Rafael Ávila de Espíndola Link: https://github.com/torvalds/linux/blob/v6.1/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c#L1060 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2599 Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index aeeec211861c..e1b01554e323 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1092,16 +1092,20 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) * S0ix even though the system is suspending to idle, so return false * in that case. */ - if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) + if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) { dev_warn_once(adev->dev, "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n" "To use suspend-to-idle change the sleep mode in BIOS setup.\n"); + return false; + } #if !IS_ENABLED(CONFIG_AMD_PMC) dev_warn_once(adev->dev, "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n"); -#endif /* CONFIG_AMD_PMC */ + return false; +#else return true; +#endif /* CONFIG_AMD_PMC */ } #endif /* CONFIG_SUSPEND */ From 2a1eb1a343208ce7d6839b73d62aece343e693ff Mon Sep 17 00:00:00 2001 From: Horatio Zhang Date: Mon, 29 May 2023 14:23:37 -0400 Subject: [PATCH 540/934] drm/amdgpu: fix Null pointer dereference error in amdgpu_device_recover_vram Use the function of amdgpu_bo_vm_destroy to handle the resource release of shadow bo. During the amdgpu_mes_self_test, shadow bo released, but vmbo->shadow_list was not, which caused a null pointer reference error in amdgpu_device_recover_vram when GPU reset. Fixes: 6c032c37ac3e ("drm/amdgpu: Fix vram recover doesn't work after whole GPU reset (v2)") Signed-off-by: xinhui pan Signed-off-by: Horatio Zhang Acked-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 ++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 1 - 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 2bd1a54ee866..3b225be89cb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -79,9 +79,10 @@ static void amdgpu_bo_user_destroy(struct ttm_buffer_object *tbo) static void amdgpu_bo_vm_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); + struct amdgpu_bo *shadow_bo = ttm_to_amdgpu_bo(tbo), *bo; struct amdgpu_bo_vm *vmbo; + bo = shadow_bo->parent; vmbo = to_amdgpu_bo_vm(bo); /* in case amdgpu_device_recover_vram got NULL of bo->parent */ if (!list_empty(&vmbo->shadow_list)) { @@ -694,11 +695,6 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, return r; *vmbo_ptr = to_amdgpu_bo_vm(bo_ptr); - INIT_LIST_HEAD(&(*vmbo_ptr)->shadow_list); - /* Set destroy callback to amdgpu_bo_vm_destroy after vmbo->shadow_list - * is initialized. - */ - bo_ptr->tbo.destroy = &amdgpu_bo_vm_destroy; return r; } @@ -715,6 +711,8 @@ void amdgpu_bo_add_to_shadow_list(struct amdgpu_bo_vm *vmbo) mutex_lock(&adev->shadow_list_lock); list_add_tail(&vmbo->shadow_list, &adev->shadow_list); + vmbo->shadow->parent = amdgpu_bo_ref(&vmbo->bo); + vmbo->shadow->tbo.destroy = &amdgpu_bo_vm_destroy; mutex_unlock(&adev->shadow_list_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index df63dc3bca18..051c7194ab4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -564,7 +564,6 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; } - (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); amdgpu_bo_add_to_shadow_list(*vmbo); return 0; From 1d13c49cf4e246b218d71873f1bb1bbd376aa10e Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 31 Mar 2023 16:30:01 +0530 Subject: [PATCH 541/934] drm/amd/pm: Fix power context allocation in SMU13 Use the right data structure for allocation. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 393c6a7b9609..ca379181081c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -573,11 +573,11 @@ int smu_v13_0_init_power(struct smu_context *smu) if (smu_power->power_context || smu_power->power_context_size != 0) return -EINVAL; - smu_power->power_context = kzalloc(sizeof(struct smu_13_0_dpm_context), + smu_power->power_context = kzalloc(sizeof(struct smu_13_0_power_context), GFP_KERNEL); if (!smu_power->power_context) return -ENOMEM; - smu_power->power_context_size = sizeof(struct smu_13_0_dpm_context); + smu_power->power_context_size = sizeof(struct smu_13_0_power_context); return 0; } From 38e4ced804796c5725e2a52ec3601951552c4a97 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 6 Apr 2023 12:08:21 +0800 Subject: [PATCH 542/934] drm/amd/pm: conditionally disable pcie lane switching for some sienna_cichlid SKUs Disable the pcie lane switching for some sienna_cichlid SKUs since it might not work well on some platforms. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 92 +++++++++++++++---- 1 file changed, 74 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 75f18681e984..85d53597eb07 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2067,33 +2067,94 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context return ret; } +static void sienna_cichlid_get_override_pcie_settings(struct smu_context *smu, + uint32_t *gen_speed_override, + uint32_t *lane_width_override) +{ + struct amdgpu_device *adev = smu->adev; + + *gen_speed_override = 0xff; + *lane_width_override = 0xff; + + switch (adev->pdev->device) { + case 0x73A0: + case 0x73A1: + case 0x73A2: + case 0x73A3: + case 0x73AB: + case 0x73AE: + /* Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 */ + *lane_width_override = 6; + break; + case 0x73E0: + case 0x73E1: + case 0x73E3: + *lane_width_override = 4; + break; + case 0x7420: + case 0x7421: + case 0x7422: + case 0x7423: + case 0x7424: + *lane_width_override = 3; + break; + default: + break; + } +} + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap) { struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; - - uint32_t smu_pcie_arg; + struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table; + uint32_t gen_speed_override, lane_width_override; uint8_t *table_member1, *table_member2; + uint32_t min_gen_speed, max_gen_speed; + uint32_t min_lane_width, max_lane_width; + uint32_t smu_pcie_arg; int ret, i; GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1); GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2); - /* lclk dpm table setup */ - for (i = 0; i < MAX_PCIE_CONF; i++) { - dpm_context->dpm_tables.pcie_table.pcie_gen[i] = table_member1[i]; - dpm_context->dpm_tables.pcie_table.pcie_lane[i] = table_member2[i]; + sienna_cichlid_get_override_pcie_settings(smu, + &gen_speed_override, + &lane_width_override); + + /* PCIE gen speed override */ + if (gen_speed_override != 0xff) { + min_gen_speed = MIN(pcie_gen_cap, gen_speed_override); + max_gen_speed = MIN(pcie_gen_cap, gen_speed_override); + } else { + min_gen_speed = MAX(0, table_member1[0]); + max_gen_speed = MIN(pcie_gen_cap, table_member1[1]); + min_gen_speed = min_gen_speed > max_gen_speed ? + max_gen_speed : min_gen_speed; } + pcie_table->pcie_gen[0] = min_gen_speed; + pcie_table->pcie_gen[1] = max_gen_speed; + + /* PCIE lane width override */ + if (lane_width_override != 0xff) { + min_lane_width = MIN(pcie_width_cap, lane_width_override); + max_lane_width = MIN(pcie_width_cap, lane_width_override); + } else { + min_lane_width = MAX(1, table_member2[0]); + max_lane_width = MIN(pcie_width_cap, table_member2[1]); + min_lane_width = min_lane_width > max_lane_width ? + max_lane_width : min_lane_width; + } + pcie_table->pcie_lane[0] = min_lane_width; + pcie_table->pcie_lane[1] = max_lane_width; for (i = 0; i < NUM_LINK_LEVELS; i++) { - smu_pcie_arg = (i << 16) | - ((table_member1[i] <= pcie_gen_cap) ? - (table_member1[i] << 8) : - (pcie_gen_cap << 8)) | - ((table_member2[i] <= pcie_width_cap) ? - table_member2[i] : - pcie_width_cap); + smu_pcie_arg = (i << 16 | + pcie_table->pcie_gen[i] << 8 | + pcie_table->pcie_lane[i]); ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_OverridePcieParameters, @@ -2101,11 +2162,6 @@ static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu, NULL); if (ret) return ret; - - if (table_member1[i] > pcie_gen_cap) - dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pcie_gen_cap; - if (table_member2[i] > pcie_width_cap) - dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pcie_width_cap; } return 0; From 99b3886f8674502e967b1d050e40aa669c9098c1 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 30 May 2023 12:44:30 -0500 Subject: [PATCH 543/934] drm/amd: Make lack of `ACPI_FADT_LOW_POWER_S0` or `CONFIG_AMD_PMC` louder during suspend path Users have reported that s2idle wasn't working on OEM Phoenix systems, but it was root caused to be because `CONFIG_AMD_PMC` wasn't set in the distribution kernel config. To make this more apparent, raise the messaging to err instead of warn. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217497 Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index e1b01554e323..fd6e83795873 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1093,14 +1093,14 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) * in that case. */ if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) { - dev_warn_once(adev->dev, + dev_err_once(adev->dev, "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n" "To use suspend-to-idle change the sleep mode in BIOS setup.\n"); return false; } #if !IS_ENABLED(CONFIG_AMD_PMC) - dev_warn_once(adev->dev, + dev_err_once(adev->dev, "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n"); return false; #else From 59de751e3845d699e02dc4da47322b92d83a41e2 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Tue, 9 May 2023 16:40:19 -0400 Subject: [PATCH 544/934] drm/amd/display: add ODM case when looking for first split pipe [Why] When going from ODM 2:1 single display case to max displays, second odm pipe needs to be repurposed for one of the new single displays. However, acquire_first_split_pipe() only handles MPC case and not ODM case [How] Add ODM conditions in acquire_first_split_pipe() Add commit_minimal_transition_state() in commit_streams() to handle odm 2:1 exit first, and then process new streams Handle ODM condition in commit_minimal_transition_state() Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Stylon Wang Signed-off-by: Samson Tam Reviewed-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 36 ++++++++++++++++++- .../gpu/drm/amd/display/dc/core/dc_resource.c | 20 +++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 52564b93f7eb..7cde67b7f0c3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1981,6 +1981,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c return result; } +static bool commit_minimal_transition_state(struct dc *dc, + struct dc_state *transition_base_context); + /** * dc_commit_streams - Commit current stream state * @@ -2002,6 +2005,8 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_state *context; enum dc_status res = DC_OK; struct dc_validation_set set[MAX_STREAMS] = {0}; + struct pipe_ctx *pipe; + bool handle_exit_odm2to1 = false; if (dc->ctx->dce_environment == DCE_ENV_VIRTUAL_HW) return res; @@ -2026,6 +2031,22 @@ enum dc_status dc_commit_streams(struct dc *dc, } } + /* Check for case where we are going from odm 2:1 to max + * pipe scenario. For these cases, we will call + * commit_minimal_transition_state() to exit out of odm 2:1 + * first before processing new streams + */ + if (stream_count == dc->res_pool->pipe_count) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + if (pipe->next_odm_pipe) + handle_exit_odm2to1 = true; + } + } + + if (handle_exit_odm2to1) + res = commit_minimal_transition_state(dc, dc->current_state); + context = dc_create_state(dc); if (!context) goto context_alloc_fail; @@ -3872,6 +3893,7 @@ static bool commit_minimal_transition_state(struct dc *dc, unsigned int i, j; unsigned int pipe_in_use = 0; bool subvp_in_use = false; + bool odm_in_use = false; if (!transition_context) return false; @@ -3900,6 +3922,18 @@ static bool commit_minimal_transition_state(struct dc *dc, } } + /* If ODM is enabled and we are adding or removing planes from any ODM + * pipe, we must use the minimal transition. + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (pipe->stream && pipe->next_odm_pipe) { + odm_in_use = true; + break; + } + } + /* When the OS add a new surface if we have been used all of pipes with odm combine * and mpc split feature, it need use commit_minimal_transition_state to transition safely. * After OS exit MPO, it will back to use odm and mpc split with all of pipes, we need @@ -3908,7 +3942,7 @@ static bool commit_minimal_transition_state(struct dc *dc, * Reduce the scenarios to use dc_commit_state_no_check in the stage of flip. Especially * enter/exit MPO when DCN still have enough resources. */ - if (pipe_in_use != dc->res_pool->pipe_count && !subvp_in_use) { + if (pipe_in_use != dc->res_pool->pipe_count && !subvp_in_use && !odm_in_use) { dc_release_state(transition_context); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 117d80cb36fb..fe1551393b26 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1444,6 +1444,26 @@ static int acquire_first_split_pipe( split_pipe->plane_res.mpcc_inst = pool->dpps[i]->inst; split_pipe->pipe_idx = i; + split_pipe->stream = stream; + return i; + } else if (split_pipe->prev_odm_pipe && + split_pipe->prev_odm_pipe->plane_state == split_pipe->plane_state) { + split_pipe->prev_odm_pipe->next_odm_pipe = split_pipe->next_odm_pipe; + if (split_pipe->next_odm_pipe) + split_pipe->next_odm_pipe->prev_odm_pipe = split_pipe->prev_odm_pipe; + + if (split_pipe->prev_odm_pipe->plane_state) + resource_build_scaling_params(split_pipe->prev_odm_pipe); + + memset(split_pipe, 0, sizeof(*split_pipe)); + split_pipe->stream_res.tg = pool->timing_generators[i]; + split_pipe->plane_res.hubp = pool->hubps[i]; + split_pipe->plane_res.ipp = pool->ipps[i]; + split_pipe->plane_res.dpp = pool->dpps[i]; + split_pipe->stream_res.opp = pool->opps[i]; + split_pipe->plane_res.mpcc_inst = pool->dpps[i]->inst; + split_pipe->pipe_idx = i; + split_pipe->stream = stream; return i; } From 3b3ffd729e7e3ad706ddba4bb84358df5d43a647 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Jun 2023 14:32:55 -0400 Subject: [PATCH 545/934] Revert "drm/amdgpu: change the reference clock for raven/raven2" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit fbc24293ca16b3b9ef891fe32ccd04735a6f8dc1. This results in inconsistent timing reported via asynchronous GPU queries. Link: https://lists.freedesktop.org/archives/amd-gfx/2023-May/093731.html Cc: Jesse.Zhang@amd.com Cc: michel@daenzer.net Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 6d15d5cd9e07..a2fd1ffadb59 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -301,10 +301,11 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) u32 reference_clock = adev->clock.spll.reference_freq; if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0) || - adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1) || - adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) || - adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1)) + adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1)) return 10000; + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) || + adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1)) + return reference_clock / 4; return reference_clock; } From d511f95938bf9e75ab73ace0ab1cebbe9a13df4b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Jun 2023 14:34:12 -0400 Subject: [PATCH 546/934] Revert "drm/amdgpu: Differentiate between Raven2 and Raven/Picasso according to revision id" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 9d2d1827af295fd6971786672c41c4dba3657154. This results in inconsistent timing reported via asynchronous GPU queries. Link: https://lists.freedesktop.org/archives/amd-gfx/2023-May/093731.html Cc: Jesse.Zhang@amd.com Cc: michel@daenzer.net Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 35 +++++++++++++++------------ 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ce22f7b30416..e5b0677591c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4005,25 +4005,30 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) clock = clock_lo | (clock_hi << 32ULL); break; case IP_VERSION(9, 1, 0): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); + hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); + /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; case IP_VERSION(9, 2, 2): preempt_disable(); - if (adev->rev_id >= 0x8) { - clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); - hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); - } else { - clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); - hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); - } + clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); + hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over - * roughly every 42 seconds. - */ + * roughly every 42 seconds. + */ if (hi_check != clock_hi) { - if (adev->rev_id >= 0x8) - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); - else - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); clock_hi = hi_check; } preempt_enable(); From 73c12de8bee258b51bd418f33dc59f2c6e5fb5f6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Jun 2023 14:37:13 -0400 Subject: [PATCH 547/934] Revert "drm/amdgpu: switch to golden tsc registers for raven/raven2" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f03eb1d26c2739b75580f58bbab4ab2d5d3eba46. This results in inconsistent timing reported via asynchronous GPU queries. Link: https://lists.freedesktop.org/archives/amd-gfx/2023-May/093731.html Cc: Jesse.Zhang@amd.com Cc: michel@daenzer.net Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 40 --------------------------- 1 file changed, 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e5b0677591c2..e7f2b7bf0ff5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -149,16 +149,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 -#define mmGOLDEN_TSC_COUNT_UPPER_Raven 0x007a -#define mmGOLDEN_TSC_COUNT_UPPER_Raven_BASE_IDX 0 -#define mmGOLDEN_TSC_COUNT_LOWER_Raven 0x007b -#define mmGOLDEN_TSC_COUNT_LOWER_Raven_BASE_IDX 0 - -#define mmGOLDEN_TSC_COUNT_UPPER_Raven2 0x0068 -#define mmGOLDEN_TSC_COUNT_UPPER_Raven2_BASE_IDX 0 -#define mmGOLDEN_TSC_COUNT_LOWER_Raven2 0x0069 -#define mmGOLDEN_TSC_COUNT_LOWER_Raven2_BASE_IDX 0 - enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -4004,36 +3994,6 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) preempt_enable(); clock = clock_lo | (clock_hi << 32ULL); break; - case IP_VERSION(9, 1, 0): - preempt_disable(); - clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); - hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); - /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over - * roughly every 42 seconds. - */ - if (hi_check != clock_hi) { - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); - clock_hi = hi_check; - } - preempt_enable(); - clock = clock_lo | (clock_hi << 32ULL); - break; - case IP_VERSION(9, 2, 2): - preempt_disable(); - clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); - hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); - /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over - * roughly every 42 seconds. - */ - if (hi_check != clock_hi) { - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); - clock_hi = hi_check; - } - preempt_enable(); - clock = clock_lo | (clock_hi << 32ULL); - break; default: amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); From 982b173a6c6d9472730c3116051977e05d17c8c5 Mon Sep 17 00:00:00 2001 From: Min Li Date: Sat, 3 Jun 2023 15:43:45 +0800 Subject: [PATCH 548/934] drm/radeon: fix race condition UAF in radeon_gem_set_domain_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace can race to free the gobj(robj converted from), robj should not be accessed again after drm_gem_object_put, otherwith it will result in use-after-free. Reviewed-by: Christian König Signed-off-by: Min Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index bdc5af23f005..d3f5ddbc1704 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -459,7 +459,6 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_set_domain *args = data; struct drm_gem_object *gobj; - struct radeon_bo *robj; int r; /* for now if someone requests domain CPU - @@ -472,13 +471,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, up_read(&rdev->exclusive_lock); return -ENOENT; } - robj = gem_to_radeon_bo(gobj); r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain); drm_gem_object_put(gobj); up_read(&rdev->exclusive_lock); - r = radeon_gem_handle_lockup(robj->rdev, r); + r = radeon_gem_handle_lockup(rdev, r); return r; } From b447b079cf3a9971ea4d31301e673f49612ccc18 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 1 Jun 2023 14:48:08 -0700 Subject: [PATCH 549/934] drm/amdgpu: fix xclk freq on CHIP_STONEY According to Alex, most APUs from that time seem to have the same issue (vbios says 48Mhz, actual is 100Mhz). I only have a CHIP_STONEY so I limit the fixup to CHIP_STONEY Signed-off-by: Chia-I Wu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vi.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 531f173ade2d..c0360dbebd4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -542,8 +542,15 @@ static u32 vi_get_xclk(struct amdgpu_device *adev) u32 reference_clock = adev->clock.spll.reference_freq; u32 tmp; - if (adev->flags & AMD_IS_APU) - return reference_clock; + if (adev->flags & AMD_IS_APU) { + switch (adev->asic_type) { + case CHIP_STONEY: + /* vbios says 48Mhz, but the actual freq is 100Mhz */ + return 10000; + default: + return reference_clock; + } + } tmp = RREG32_SMC(ixCG_CLKPIN_CNTL_2); if (REG_GET_FIELD(tmp, CG_CLKPIN_CNTL_2, MUX_TCLK_TO_XCLK)) From dac652220ba0e5a2ef2da2a47a60b60aea333fdb Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Wed, 24 May 2023 17:14:15 +0800 Subject: [PATCH 550/934] drm/amdgpu: change reserved vram info print MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The link object of mgr->reserved_pages is the blocks variable in struct amdgpu_vram_reservation, not the link variable in struct drm_buddy_block. Signed-off-by: YiPeng Chai Reviewed-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 43d6a9d6a538..afacfb9b5bf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -800,7 +800,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; + struct amdgpu_vram_reservation *rsv; drm_printf(printer, " vis usage:%llu\n", amdgpu_vram_mgr_vis_usage(mgr)); @@ -812,8 +812,9 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, drm_buddy_print(mm, printer); drm_printf(printer, "reserved:\n"); - list_for_each_entry(block, &mgr->reserved_pages, link) - drm_buddy_block_print(mm, block, printer); + list_for_each_entry(rsv, &mgr->reserved_pages, blocks) + drm_printf(printer, "%#018llx-%#018llx: %llu\n", + rsv->start, rsv->start + rsv->size, rsv->size); mutex_unlock(&mgr->lock); } From e1a600208286c197c2696e51fc313e49889315bd Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 19 May 2023 11:38:15 -0400 Subject: [PATCH 551/934] drm/amd/display: Reduce sdp bw after urgent to 90% [Description] Reduce expected SDP bandwidth due to poor QoS and arbitration issues on high bandwidth configs Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Stylon Wang Signed-off-by: Alvin Lee Reviewed-by: Nevenko Stupar Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 47beb4ea779d..0c4c3208def1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -138,7 +138,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 100.0, + .pct_ideal_sdp_bw_after_urgent = 90.0, .pct_ideal_fabric_bw_after_urgent = 67.0, .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented From 212ed75dc5fb9d1423b3942c8f872a868cda3466 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 7 Jun 2023 11:55:42 +0200 Subject: [PATCH 552/934] netfilter: nf_tables: integrate pipapo into commit protocol The pipapo set backend follows copy-on-update approach, maintaining one clone of the existing datastructure that is being updated. The clone and current datastructures are swapped via rcu from the commit step. The existing integration with the commit protocol is flawed because there is no operation to clean up the clone if the transaction is aborted. Moreover, the datastructure swap happens on set element activation. This patch adds two new operations for sets: commit and abort, these new operations are invoked from the commit and abort steps, after the transactions have been digested, and it updates the pipapo set backend to use it. This patch adds a new ->pending_update field to sets to maintain a list of sets that require this new commit and abort operations. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++- net/netfilter/nf_tables_api.c | 56 +++++++++++++++++++++++++++++++ net/netfilter/nft_set_pipapo.c | 55 +++++++++++++++++++++--------- 3 files changed, 99 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2e24ea1d744c..83db182decc8 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -462,7 +462,8 @@ struct nft_set_ops { const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags); - + void (*commit)(const struct nft_set *set); + void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, @@ -557,6 +558,7 @@ struct nft_set { u16 policy; u16 udlen; unsigned char *udata; + struct list_head pending_update; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; u16 flags:14, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0519d45ede6b..3bb0800b3849 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4919,6 +4919,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, set->num_exprs = num_exprs; set->handle = nf_tables_alloc_handle(table); + INIT_LIST_HEAD(&set->pending_update); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) @@ -9275,10 +9276,25 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation) } } +static void nft_set_commit_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->commit) + continue; + + set->ops->commit(set); + } +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; @@ -9453,6 +9469,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_NEWSETELEM); + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: @@ -9467,6 +9488,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) atomic_dec(&te->set->nelems); te->set->ndeact--; } + if (te->set->ops->commit && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { @@ -9529,6 +9555,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_set_commit_update(&set_update_list); + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_audit_log(&adl, nft_net->base_seq); @@ -9588,10 +9616,25 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } +static void nft_set_abort_update(struct list_head *set_update_list) +{ + struct nft_set *set, *next; + + list_for_each_entry_safe(set, next, set_update_list, pending_update) { + list_del_init(&set->pending_update); + + if (!set->ops->abort) + continue; + + set->ops->abort(set); + } +} + static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { struct nftables_pernet *nft_net = nft_pernet(net); struct nft_trans *trans, *next; + LIST_HEAD(set_update_list); struct nft_trans_elem *te; if (action == NFNL_ABORT_VALIDATE && @@ -9701,6 +9744,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_setelem_remove(net, te->set, &te->elem); if (!nft_setelem_is_catchall(te->set, &te->elem)) atomic_dec(&te->set->nelems); + + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: @@ -9711,6 +9760,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) if (!nft_setelem_is_catchall(te->set, &te->elem)) te->set->ndeact--; + if (te->set->ops->abort && + list_empty(&te->set->pending_update)) { + list_add_tail(&te->set->pending_update, + &set_update_list); + } nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: @@ -9753,6 +9807,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } } + nft_set_abort_update(&set_update_list); + synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 06d46d182634..15e451dc3fc4 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1600,17 +1600,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m) } } -/** - * pipapo_reclaim_match - RCU callback to free fields from old matching data - * @rcu: RCU head - */ -static void pipapo_reclaim_match(struct rcu_head *rcu) +static void pipapo_free_match(struct nft_pipapo_match *m) { - struct nft_pipapo_match *m; int i; - m = container_of(rcu, struct nft_pipapo_match, rcu); - for_each_possible_cpu(i) kfree(*per_cpu_ptr(m->scratch, i)); @@ -1625,7 +1618,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) } /** - * pipapo_commit() - Replace lookup data with current working copy + * pipapo_reclaim_match - RCU callback to free fields from old matching data + * @rcu: RCU head + */ +static void pipapo_reclaim_match(struct rcu_head *rcu) +{ + struct nft_pipapo_match *m; + + m = container_of(rcu, struct nft_pipapo_match, rcu); + pipapo_free_match(m); +} + +/** + * nft_pipapo_commit() - Replace lookup data with current working copy * @set: nftables API set representation * * While at it, check if we should perform garbage collection on the working @@ -1635,7 +1640,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * We also need to create a new working copy for subsequent insertions and * deletions. */ -static void pipapo_commit(const struct nft_set *set) +static void nft_pipapo_commit(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *new_clone, *old; @@ -1660,6 +1665,26 @@ static void pipapo_commit(const struct nft_set *set) priv->clone = new_clone; } +static void nft_pipapo_abort(const struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *new_clone, *m; + + if (!priv->dirty) + return; + + m = rcu_dereference(priv->match); + + new_clone = pipapo_clone(m); + if (IS_ERR(new_clone)) + return; + + priv->dirty = false; + + pipapo_free_match(priv->clone); + priv->clone = new_clone; +} + /** * nft_pipapo_activate() - Mark element reference as active given key, commit * @net: Network namespace @@ -1667,8 +1692,7 @@ static void pipapo_commit(const struct nft_set *set) * @elem: nftables API element representation containing key data * * On insertion, elements are added to a copy of the matching data currently - * in use for lookups, and not directly inserted into current lookup data, so - * we'll take care of that by calling pipapo_commit() here. Both + * in use for lookups, and not directly inserted into current lookup data. Both * nft_pipapo_insert() and nft_pipapo_activate() are called once for each * element, hence we can't purpose either one as a real commit operation. */ @@ -1684,8 +1708,6 @@ static void nft_pipapo_activate(const struct net *net, nft_set_elem_change_active(net, set, &e->ext); nft_set_elem_clear_busy(&e->ext); - - pipapo_commit(set); } /** @@ -1931,7 +1953,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, if (i == m->field_count) { priv->dirty = true; pipapo_drop(m, rulemap); - pipapo_commit(set); return; } @@ -2230,6 +2251,8 @@ const struct nft_set_type nft_set_pipapo_type = { .init = nft_pipapo_init, .destroy = nft_pipapo_destroy, .gc_init = nft_pipapo_gc_init, + .commit = nft_pipapo_commit, + .abort = nft_pipapo_abort, .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; @@ -2252,6 +2275,8 @@ const struct nft_set_type nft_set_pipapo_avx2_type = { .init = nft_pipapo_init, .destroy = nft_pipapo_destroy, .gc_init = nft_pipapo_gc_init, + .commit = nft_pipapo_commit, + .abort = nft_pipapo_abort, .elemsize = offsetof(struct nft_pipapo_elem, ext), }, }; From a1a64a151dae8ac3581c1cbde44b672045cb658b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 8 Jun 2023 00:19:12 +0200 Subject: [PATCH 553/934] netfilter: nfnetlink: skip error delivery on batch in case of ENOMEM If caller reports ENOMEM, then stop iterating over the batch and send a single netlink message to userspace to report OOM. Fixes: cbb8125eb40b ("netfilter: nfnetlink: deliver netlink errors on batch completion") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index ae7146475d17..c9fbe0f707b5 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -533,7 +533,8 @@ ack: * processed, this avoids that the same error is * reported several times when replaying the batch. */ - if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) { + if (err == -ENOMEM || + nfnl_err_add(&err_list, nlh, err, &extack) < 0) { /* We failed to enqueue an error, reset the * list of errors and send OOM to userspace * pointing to the batch header. From 7c5d4801ecf0564c860033d89726b99723c55146 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 2 Jun 2023 20:28:15 +0200 Subject: [PATCH 554/934] lib: cpu_rmap: Fix potential use-after-free in irq_cpu_rmap_release() irq_cpu_rmap_release() calls cpu_rmap_put(), which may free the rmap. So we need to clear the pointer to our glue structure in rmap before doing that, not after. Fixes: 4e0473f1060a ("lib: cpu_rmap: Avoid use after free on rmap->obj array entries") Signed-off-by: Ben Hutchings Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ZHo0vwquhOy3FaXc@decadent.org.uk Signed-off-by: Jakub Kicinski --- lib/cpu_rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c index 73c1636b927b..4c348670da31 100644 --- a/lib/cpu_rmap.c +++ b/lib/cpu_rmap.c @@ -280,8 +280,8 @@ static void irq_cpu_rmap_release(struct kref *ref) struct irq_glue *glue = container_of(ref, struct irq_glue, notify.kref); - cpu_rmap_put(glue->rmap); glue->rmap->obj[glue->index] = NULL; + cpu_rmap_put(glue->rmap); kfree(glue); } From 649c3fed36730a53447d8f479c14e431363563b6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Jun 2023 18:08:25 -0700 Subject: [PATCH 555/934] eth: bnxt: fix the wake condition The down condition should be the negation of the wake condition, IOW when I moved it from: if (cond && wake()) to if (__netif_txq_completed_wake(cond)) Cond should have been negated. Flip it now. This bug leads to occasional crashes with netconsole. It may also lead to queue never waking up in case BQL is not enabled. Reported-by: David Wei Fixes: 08a096780d92 ("bnxt: use new queue try_stop/try_wake macros") Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20230607010826.960226-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index dcd9367f05af..1f04cd4cfab9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -692,7 +692,7 @@ next_tx_int: __netif_txq_completed_wake(txq, nr_pkts, tx_bytes, bnxt_tx_avail(bp, txr), bp->tx_wake_thresh, - READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING); + READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING); } static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, From f0d751973f739feb3742d4e5f4c0914e8b84e7c7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Jun 2023 18:08:26 -0700 Subject: [PATCH 556/934] eth: ixgbe: fix the wake condition Flip the netif_carrier_ok() condition in queue wake logic. When I moved it to inside __netif_txq_completed_wake() I missed negating it. This made the condition ineffective and could probably lead to crashes. Fixes: 301f227fc860 ("net: piggy back on the memory barrier in bql when waking queues") Reviewed-by: Tony Nguyen Link: https://lore.kernel.org/r/20230607010826.960226-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5d83c887a3fc..1726297f2e0d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1256,7 +1256,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, if (!__netif_txq_completed_wake(txq, total_packets, total_bytes, ixgbe_desc_unused(tx_ring), TX_WAKE_THRESHOLD, - netif_carrier_ok(tx_ring->netdev) && + !netif_carrier_ok(tx_ring->netdev) || test_bit(__IXGBE_DOWN, &adapter->state))) ++tx_ring->tx_stats.restart_queue; From a9f31047baca57d47440c879cf259b86f900260c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 6 Jun 2023 14:43:47 -0700 Subject: [PATCH 557/934] net: bcmgenet: Fix EEE implementation We had a number of short comings: - EEE must be re-evaluated whenever the state machine detects a link change as wight be switching from a link partner with EEE enabled/disabled - tx_lpi_enabled controls whether EEE should be enabled/disabled for the transmit path, which applies to the TBUF block - We do not need to forcibly enable EEE upon system resume, as the PHY state machine will trigger a link event that will do that, too Fixes: 6ef398ea60d9 ("net: bcmgenet: add EEE support") Signed-off-by: Florian Fainelli Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20230606214348.2408018-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/genet/bcmgenet.c | 22 +++++++------------ .../net/ethernet/broadcom/genet/bcmgenet.h | 3 +++ drivers/net/ethernet/broadcom/genet/bcmmii.c | 5 +++++ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index eca0c92c0c84..2b5761ad2f92 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1272,7 +1272,8 @@ static void bcmgenet_get_ethtool_stats(struct net_device *dev, } } -static void bcmgenet_eee_enable_set(struct net_device *dev, bool enable) +void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, + bool tx_lpi_enabled) { struct bcmgenet_priv *priv = netdev_priv(dev); u32 off = priv->hw_params->tbuf_offset + TBUF_ENERGY_CTRL; @@ -1292,7 +1293,7 @@ static void bcmgenet_eee_enable_set(struct net_device *dev, bool enable) /* Enable EEE and switch to a 27Mhz clock automatically */ reg = bcmgenet_readl(priv->base + off); - if (enable) + if (tx_lpi_enabled) reg |= TBUF_EEE_EN | TBUF_PM_EN; else reg &= ~(TBUF_EEE_EN | TBUF_PM_EN); @@ -1313,6 +1314,7 @@ static void bcmgenet_eee_enable_set(struct net_device *dev, bool enable) priv->eee.eee_enabled = enable; priv->eee.eee_active = enable; + priv->eee.tx_lpi_enabled = tx_lpi_enabled; } static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) @@ -1328,6 +1330,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) e->eee_enabled = p->eee_enabled; e->eee_active = p->eee_active; + e->tx_lpi_enabled = p->tx_lpi_enabled; e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); return phy_ethtool_get_eee(dev->phydev, e); @@ -1337,7 +1340,6 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) { struct bcmgenet_priv *priv = netdev_priv(dev); struct ethtool_eee *p = &priv->eee; - int ret = 0; if (GENET_IS_V1(priv)) return -EOPNOTSUPP; @@ -1348,16 +1350,11 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) p->eee_enabled = e->eee_enabled; if (!p->eee_enabled) { - bcmgenet_eee_enable_set(dev, false); + bcmgenet_eee_enable_set(dev, false, false); } else { - ret = phy_init_eee(dev->phydev, false); - if (ret) { - netif_err(priv, hw, dev, "EEE initialization failed\n"); - return ret; - } - + p->eee_active = phy_init_eee(dev->phydev, false) >= 0; bcmgenet_umac_writel(priv, e->tx_lpi_timer, UMAC_EEE_LPI_TIMER); - bcmgenet_eee_enable_set(dev, true); + bcmgenet_eee_enable_set(dev, p->eee_active, e->tx_lpi_enabled); } return phy_ethtool_set_eee(dev->phydev, e); @@ -4279,9 +4276,6 @@ static int bcmgenet_resume(struct device *d) if (!device_may_wakeup(d)) phy_resume(dev->phydev); - if (priv->eee.eee_enabled) - bcmgenet_eee_enable_set(dev, true); - bcmgenet_netif_start(dev); netif_device_attach(dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 946f6e283c4e..1985c0ec4da2 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -703,4 +703,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode); +void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, + bool tx_lpi_enabled); + #endif /* __BCMGENET_H__ */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index be042905ada2..c15ed0acdb77 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -87,6 +87,11 @@ static void bcmgenet_mac_config(struct net_device *dev) reg |= CMD_TX_EN | CMD_RX_EN; } bcmgenet_umac_writel(priv, reg, UMAC_CMD); + + priv->eee.eee_active = phy_init_eee(phydev, 0) >= 0; + bcmgenet_eee_enable_set(dev, + priv->eee.eee_enabled && priv->eee.eee_active, + priv->eee.tx_lpi_enabled); } /* setup netdev link state when PHY link status change and From 9f7e3611f6c828fcb6001c39d8e7a523a4f31525 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Thu, 25 May 2023 12:38:17 +0200 Subject: [PATCH 558/934] accel/ivpu: Do not trigger extra VPU reset if the VPU is idle Turning off the PLL and entering D0i3 will reset the VPU so an explicit IP reset is redundant. But if the VPU is active, it may interfere with PLL disabling and to avoid that, we have to issue an additional IP reset to silence the VPU before turning off the PLL. Fixes: a8fed6d1e0b9 ("accel/ivpu: Fix power down sequence") Cc: stable@vger.kernel.org # 6.3.x Signed-off-by: Andrzej Kacprowski Reviewed-by: Stanislaw Gruszka Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230525103818.877590-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_mtl.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c index 382ec127be8e..156dae676967 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -754,9 +754,8 @@ static int ivpu_hw_mtl_power_down(struct ivpu_device *vdev) { int ret = 0; - if (ivpu_hw_mtl_reset(vdev)) { + if (!ivpu_hw_mtl_is_idle(vdev) && ivpu_hw_mtl_reset(vdev)) { ivpu_err(vdev, "Failed to reset the VPU\n"); - ret = -EIO; } if (ivpu_pll_disable(vdev)) { @@ -764,8 +763,10 @@ static int ivpu_hw_mtl_power_down(struct ivpu_device *vdev) ret = -EIO; } - if (ivpu_hw_mtl_d0i3_enable(vdev)) - ivpu_warn(vdev, "Failed to enable D0I3\n"); + if (ivpu_hw_mtl_d0i3_enable(vdev)) { + ivpu_err(vdev, "Failed to enter D0I3\n"); + ret = -EIO; + } return ret; } From b563e47957af4ff71736c5cc4120a59b055ab583 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 25 May 2023 12:38:18 +0200 Subject: [PATCH 559/934] accel/ivpu: Do not use mutex_lock_interruptible If we get signal when waiting for the mmu->lock we do not invalidate current MMU configuration that might result in undefined behavior. Additionally there is little or no benefit on break waiting for ipc->lock. In current code base, we keep this lock for short periods. Fixes: 263b2ba5fc93 ("accel/ivpu: Add Intel VPU MMU support") Reviewed-by: Krystian Pradzynski Reviewed-by: Jeffrey Hugo Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230525103818.877590-2-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_ipc.c | 4 +--- drivers/accel/ivpu/ivpu_mmu.c | 22 ++++++---------------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 3adcfa80fc0e..fa0af59e39ab 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -183,9 +183,7 @@ ivpu_ipc_send(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct v struct ivpu_ipc_info *ipc = vdev->ipc; int ret; - ret = mutex_lock_interruptible(&ipc->lock); - if (ret) - return ret; + mutex_lock(&ipc->lock); if (!ipc->on) { ret = -EAGAIN; diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 694e978aba66..b8b259b3aa63 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -587,16 +587,11 @@ static int ivpu_mmu_strtab_init(struct ivpu_device *vdev) int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid) { struct ivpu_mmu_info *mmu = vdev->mmu; - int ret; + int ret = 0; - ret = mutex_lock_interruptible(&mmu->lock); - if (ret) - return ret; - - if (!mmu->on) { - ret = 0; + mutex_lock(&mmu->lock); + if (!mmu->on) goto unlock; - } ret = ivpu_mmu_cmdq_write_tlbi_nh_asid(vdev, ssid); if (ret) @@ -614,7 +609,7 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) struct ivpu_mmu_cdtab *cdtab = &mmu->cdtab; u64 *entry; u64 cd[4]; - int ret; + int ret = 0; if (ssid > IVPU_MMU_CDTAB_ENT_COUNT) return -EINVAL; @@ -655,14 +650,9 @@ static int ivpu_mmu_cd_add(struct ivpu_device *vdev, u32 ssid, u64 cd_dma) ivpu_dbg(vdev, MMU, "CDTAB %s entry (SSID=%u, dma=%pad): 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", cd_dma ? "write" : "clear", ssid, &cd_dma, cd[0], cd[1], cd[2], cd[3]); - ret = mutex_lock_interruptible(&mmu->lock); - if (ret) - return ret; - - if (!mmu->on) { - ret = 0; + mutex_lock(&mmu->lock); + if (!mmu->on) goto unlock; - } ret = ivpu_mmu_cmdq_write_cfgi_all(vdev); if (ret) From a3efabee5878b8d7b1863debb78cb7129d07a346 Mon Sep 17 00:00:00 2001 From: Andrzej Kacprowski Date: Wed, 7 Jun 2023 11:45:02 +0200 Subject: [PATCH 560/934] accel/ivpu: Fix sporadic VPU boot failure Wait for AON bit in HOST_SS_CPR_RST_CLR to return 0 before starting VPUIP power up sequence, otherwise the VPU device may sporadically fail to boot. An error in power up sequence is propagated to the runtime power management - the device will be in an error state until the VPU driver is reloaded. Fixes: 35b137630f08 ("accel/ivpu: Introduce a new DRM driver for Intel VPU") Cc: stable@vger.kernel.org # 6.3.x Signed-off-by: Andrzej Kacprowski Reviewed-by: Krystian Pradzynski Signed-off-by: Stanislaw Gruszka Link: https://patchwork.freedesktop.org/patch/msgid/20230607094502.388489-1-stanislaw.gruszka@linux.intel.com --- drivers/accel/ivpu/ivpu_hw_mtl.c | 13 ++++++++++++- drivers/accel/ivpu/ivpu_hw_mtl_reg.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_hw_mtl.c b/drivers/accel/ivpu/ivpu_hw_mtl.c index 156dae676967..fef35422c6f0 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl.c +++ b/drivers/accel/ivpu/ivpu_hw_mtl.c @@ -197,6 +197,11 @@ static void ivpu_pll_init_frequency_ratios(struct ivpu_device *vdev) hw->pll.pn_ratio = clamp_t(u8, fuse_pn_ratio, hw->pll.min_ratio, hw->pll.max_ratio); } +static int ivpu_hw_mtl_wait_for_vpuip_bar(struct ivpu_device *vdev) +{ + return REGV_POLL_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, AON, 0, 100); +} + static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) { struct ivpu_hw_info *hw = vdev->hw; @@ -239,6 +244,12 @@ static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) ivpu_err(vdev, "Timed out waiting for PLL ready status\n"); return ret; } + + ret = ivpu_hw_mtl_wait_for_vpuip_bar(vdev); + if (ret) { + ivpu_err(vdev, "Timed out waiting for VPUIP bar\n"); + return ret; + } } return 0; @@ -256,7 +267,7 @@ static int ivpu_pll_disable(struct ivpu_device *vdev) static void ivpu_boot_host_ss_rst_clr_assert(struct ivpu_device *vdev) { - u32 val = REGV_RD32(MTL_VPU_HOST_SS_CPR_RST_CLR); + u32 val = 0; val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, TOP_NOC, val); val = REG_SET_FLD(MTL_VPU_HOST_SS_CPR_RST_CLR, DSS_MAS, val); diff --git a/drivers/accel/ivpu/ivpu_hw_mtl_reg.h b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h index d83ccfd9a871..593b8ff07417 100644 --- a/drivers/accel/ivpu/ivpu_hw_mtl_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_mtl_reg.h @@ -91,6 +91,7 @@ #define MTL_VPU_HOST_SS_CPR_RST_SET_MSS_MAS_MASK BIT_MASK(11) #define MTL_VPU_HOST_SS_CPR_RST_CLR 0x00000098u +#define MTL_VPU_HOST_SS_CPR_RST_CLR_AON_MASK BIT_MASK(0) #define MTL_VPU_HOST_SS_CPR_RST_CLR_TOP_NOC_MASK BIT_MASK(1) #define MTL_VPU_HOST_SS_CPR_RST_CLR_DSS_MAS_MASK BIT_MASK(10) #define MTL_VPU_HOST_SS_CPR_RST_CLR_MSS_MAS_MASK BIT_MASK(11) From 409e873ea3c1fd3079909718bbeb06ac1ec7f38b Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 1 Jun 2023 08:59:31 +0800 Subject: [PATCH 561/934] ceph: fix use-after-free bug for inodes when flushing capsnaps There is a race between capsnaps flush and removing the inode from 'mdsc->snap_flush_list' list: == Thread A == == Thread B == ceph_queue_cap_snap() -> allocate 'capsnapA' ->ihold('&ci->vfs_inode') ->add 'capsnapA' to 'ci->i_cap_snaps' ->add 'ci' to 'mdsc->snap_flush_list' ... == Thread C == ceph_flush_snaps() ->__ceph_flush_snaps() ->__send_flush_snap() handle_cap_flushsnap_ack() ->iput('&ci->vfs_inode') this also will release 'ci' ... == Thread D == ceph_handle_snap() ->flush_snaps() ->iterate 'mdsc->snap_flush_list' ->get the stale 'ci' ->remove 'ci' from ->ihold(&ci->vfs_inode) this 'mdsc->snap_flush_list' will WARNING To fix this we will increase the inode's i_count ref when adding 'ci' to the 'mdsc->snap_flush_list' list. [ idryomov: need_put int -> bool ] Cc: stable@vger.kernel.org Link: https://bugzilla.redhat.com/show_bug.cgi?id=2209299 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 6 ++++++ fs/ceph/snap.c | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 789be30d6ee2..2321e5ddb664 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1627,6 +1627,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci, struct inode *inode = &ci->netfs.inode; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_session *session = NULL; + bool need_put = false; int mds; dout("ceph_flush_snaps %p\n", inode); @@ -1671,8 +1672,13 @@ out: ceph_put_mds_session(session); /* we flushed them all; remove this inode from the queue */ spin_lock(&mdsc->snap_flush_lock); + if (!list_empty(&ci->i_snap_flush_item)) + need_put = true; list_del_init(&ci->i_snap_flush_item); spin_unlock(&mdsc->snap_flush_lock); + + if (need_put) + iput(inode); } /* diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 0b236ebd989f..2e73ba62bd7a 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -693,8 +693,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->size); spin_lock(&mdsc->snap_flush_lock); - if (list_empty(&ci->i_snap_flush_item)) + if (list_empty(&ci->i_snap_flush_item)) { + ihold(inode); list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list); + } spin_unlock(&mdsc->snap_flush_lock); return 1; /* caller may want to ceph_flush_snaps */ } From 7f3c782b3914e510b646a77aedc3adeac2e4a63b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 7 Jun 2023 19:54:24 -0700 Subject: [PATCH 562/934] eeprom: at24: also select REGMAP Selecting only REGMAP_I2C can leave REGMAP unset, causing build errors, so also select REGMAP to prevent the build errors. ../drivers/misc/eeprom/at24.c:540:42: warning: 'struct regmap_config' declared inside parameter list will not be visible outside of this definition or declaration 540 | struct regmap_config *regmap_config) ../drivers/misc/eeprom/at24.c: In function 'at24_make_dummy_client': ../drivers/misc/eeprom/at24.c:552:18: error: implicit declaration of function 'devm_regmap_init_i2c' [-Werror=implicit-function-declaration] 552 | regmap = devm_regmap_init_i2c(dummy_client, regmap_config); ../drivers/misc/eeprom/at24.c:552:16: warning: assignment to 'struct regmap *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 552 | regmap = devm_regmap_init_i2c(dummy_client, regmap_config); ../drivers/misc/eeprom/at24.c: In function 'at24_probe': ../drivers/misc/eeprom/at24.c:586:16: error: variable 'regmap_config' has initializer but incomplete type 586 | struct regmap_config regmap_config = { }; ../drivers/misc/eeprom/at24.c:586:30: error: storage size of 'regmap_config' isn't known 586 | struct regmap_config regmap_config = { }; ../drivers/misc/eeprom/at24.c:586:30: warning: unused variable 'regmap_config' [-Wunused-variable] Fixes: 5c015258478e ("eeprom: at24: add basic regmap_i2c support") Signed-off-by: Randy Dunlap Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig index f0a7531f354c..2d240bfa819f 100644 --- a/drivers/misc/eeprom/Kconfig +++ b/drivers/misc/eeprom/Kconfig @@ -6,6 +6,7 @@ config EEPROM_AT24 depends on I2C && SYSFS select NVMEM select NVMEM_SYSFS + select REGMAP select REGMAP_I2C help Enable this driver to get read/write support to most I2C EEPROMs From 713274f1f2c896d37017efee333fd44149710119 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 7 Jun 2023 15:39:50 +0300 Subject: [PATCH 563/934] bpf: Fix verifier id tracking of scalars on spill The following scenario describes a bug in the verifier where it incorrectly concludes about equivalent scalar IDs which could lead to verifier bypass in privileged mode: 1. Prepare a 32-bit rogue number. 2. Put the rogue number into the upper half of a 64-bit register, and roll a random (unknown to the verifier) bit in the lower half. The rest of the bits should be zero (although variations are possible). 3. Assign an ID to the register by MOVing it to another arbitrary register. 4. Perform a 32-bit spill of the register, then perform a 32-bit fill to another register. Due to a bug in the verifier, the ID will be preserved, although the new register will contain only the lower 32 bits, i.e. all zeros except one random bit. At this point there are two registers with different values but the same ID, which means the integrity of the verifier state has been corrupted. 5. Compare the new 32-bit register with 0. In the branch where it's equal to 0, the verifier will believe that the original 64-bit register is also 0, because it has the same ID, but its actual value still contains the rogue number in the upper half. Some optimizations of the verifier prevent the actual bypass, so extra care is needed: the comparison must be between two registers, and both branches must be reachable (this is why one random bit is needed). Both branches are still suitable for the bypass. 6. Right shift the original register by 32 bits to pop the rogue number. 7. Use the rogue number as an offset with any pointer. The verifier will believe that the offset is 0, while in reality it's the given number. The fix is similar to the 32-bit BPF_MOV handling in check_alu_op for SCALAR_VALUE. If the spill is narrowing the actual register value, don't keep the ID, make sure it's reset to 0. Fixes: 354e8f1970f8 ("bpf: Support <8-byte scalar spill and refill") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Daniel Borkmann Tested-by: Andrii Nakryiko # Checked veristat delta Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20230607123951.558971-2-maxtram95@gmail.com --- kernel/bpf/verifier.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5871aa78d01a..0dd8adc7a159 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3868,6 +3868,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, return err; } save_register_state(state, spi, reg, size); + /* Break the relation on a narrowing spill. */ + if (fls64(reg->umax_value) > BITS_PER_BYTE * size) + state->stack[spi].spilled_ptr.id = 0; } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && insn->imm != 0 && env->bpf_capable) { struct bpf_reg_state fake_reg = {}; From f57ade27fc3eae16eab0f1edba1248ced429b2f0 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 7 Jun 2023 15:39:51 +0300 Subject: [PATCH 564/934] selftests/bpf: Add test cases to assert proper ID tracking on spill The previous commit fixed a verifier bypass by ensuring that ID is not preserved on narrowing spills. Add the test cases to check the problematic patterns. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20230607123951.558971-3-maxtram95@gmail.com --- .../selftests/bpf/progs/verifier_spill_fill.c | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 136e5530b72c..6115520154e3 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -371,4 +371,83 @@ __naked void and_then_at_fp_8(void) " ::: __clobber_all); } +SEC("xdp") +__description("32-bit spill of 64-bit reg should clear ID") +__failure __msg("math between ctx pointer and 4294967295 is not allowed") +__naked void spill_32bit_of_64bit_fail(void) +{ + asm volatile (" \ + r6 = r1; \ + /* Roll one bit to force the verifier to track both branches. */\ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x8; \ + /* Put a large number into r1. */ \ + r1 = 0xffffffff; \ + r1 <<= 32; \ + r1 += r0; \ + /* Assign an ID to r1. */ \ + r2 = r1; \ + /* 32-bit spill r1 to stack - should clear the ID! */\ + *(u32*)(r10 - 8) = r1; \ + /* 32-bit fill r2 from stack. */ \ + r2 = *(u32*)(r10 - 8); \ + /* Compare r2 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. If the ID was mistakenly preserved on spill, this would\ + * cause the verifier to think that r1 is also equal to zero in one of\ + * the branches, and equal to eight on the other branch.\ + */ \ + r3 = 0; \ + if r2 != r3 goto l0_%=; \ +l0_%=: r1 >>= 32; \ + /* At this point, if the verifier thinks that r1 is 0, an out-of-bounds\ + * read will happen, because it actually contains 0xffffffff.\ + */ \ + r6 += r1; \ + r0 = *(u32*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("16-bit spill of 32-bit reg should clear ID") +__failure __msg("dereference of modified ctx ptr R6 off=65535 disallowed") +__naked void spill_16bit_of_32bit_fail(void) +{ + asm volatile (" \ + r6 = r1; \ + /* Roll one bit to force the verifier to track both branches. */\ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x8; \ + /* Put a large number into r1. */ \ + w1 = 0xffff0000; \ + r1 += r0; \ + /* Assign an ID to r1. */ \ + r2 = r1; \ + /* 16-bit spill r1 to stack - should clear the ID! */\ + *(u16*)(r10 - 8) = r1; \ + /* 16-bit fill r2 from stack. */ \ + r2 = *(u16*)(r10 - 8); \ + /* Compare r2 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. If the ID was mistakenly preserved on spill, this would\ + * cause the verifier to think that r1 is also equal to zero in one of\ + * the branches, and equal to eight on the other branch.\ + */ \ + r3 = 0; \ + if r2 != r3 goto l0_%=; \ +l0_%=: r1 >>= 16; \ + /* At this point, if the verifier thinks that r1 is 0, an out-of-bounds\ + * read will happen, because it actually contains 0xffff.\ + */ \ + r6 += r1; \ + r0 = *(u32*)(r6 + 0); \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; From 095d5dc0c1d9f3284e3c575ccf4c0e8b04b548f8 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Wed, 7 Jun 2023 00:54:04 -0700 Subject: [PATCH 565/934] bnxt_en: Fix bnxt_hwrm_update_rss_hash_cfg() We must specify the vnic id of the vnic in the input structure of this firmware message. Otherwise we will get an error from the firmware. Fixes: 98a4322b70e8 ("bnxt_en: update RSS config using difference algorithm") Reviewed-by: Kalesh Anakkur Purayil Reviewed-by: Somnath Kotur Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1f04cd4cfab9..45739b4a84a2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5350,6 +5350,7 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp) if (hwrm_req_init(bp, req, HWRM_VNIC_RSS_QCFG)) return; + req->vnic_id = cpu_to_le16(vnic->fw_vnic_id); /* all contexts configured to same hash_type, zero always exists */ req->rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]); resp = hwrm_req_hold(bp, req); From 1d997801c7cc6a7f542e46d5a6bf16f893ad3fe9 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Wed, 7 Jun 2023 00:54:05 -0700 Subject: [PATCH 566/934] bnxt_en: Don't issue AP reset during ethtool's reset operation Only older NIC controller's firmware uses the PROC AP reset type. Firmware on 5731X/5741X and newer chips does not support this reset type. When bnxt_reset() issues a series of resets, this PROC AP reset may actually fail on these newer chips because the firmware is not ready to accept this unsupported command yet. Avoid this unnecessary error by skipping this reset type on chips that don't support it. Fixes: 7a13240e3718 ("bnxt_en: fix ethtool_reset_flags ABI violations") Reviewed-by: Pavan Chebbi Signed-off-by: Sreekanth Reddy Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 2dd8ee4a6f75..8fd5071d8b09 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -3831,7 +3831,7 @@ static int bnxt_reset(struct net_device *dev, u32 *flags) } } - if (req & BNXT_FW_RESET_AP) { + if (!BNXT_CHIP_P4_PLUS(bp) && (req & BNXT_FW_RESET_AP)) { /* This feature is not supported in older firmware versions */ if (bp->hwrm_spec_code >= 0x10803) { if (!bnxt_firmware_reset_ap(dev)) { From 1a9e4f501bc6ff1b6ecb60df54fbf2b54db43bfe Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 7 Jun 2023 00:54:06 -0700 Subject: [PATCH 567/934] bnxt_en: Query default VLAN before VNIC setup on a VF We need to call bnxt_hwrm_func_qcfg() on a VF to query the default VLAN that may be setup by the PF. If a default VLAN is enabled, the VF cannot support VLAN acceleration on the receive side and the VNIC must be setup to strip out the default VLAN tag. If a default VLAN is not enabled, the VF can support VLAN acceleration on the receive side. The VNIC should be set up to strip or not strip the VLAN based on the RX VLAN acceleration setting. Without this call to determine the default VLAN before calling bnxt_setup_vnic(), the VNIC may not be set up correctly. For example, bnxt_setup_vnic() may set up to strip the VLAN tag based on stale default VLAN information. If RX VLAN acceleration is not enabled, the VLAN tag will be incorrectly stripped and the RX data path will not work correctly. Fixes: cf6645f8ebc6 ("bnxt_en: Add function for VF driver to query default VLAN.") Reviewed-by: Pavan Chebbi Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 45739b4a84a2..b3fa840cb71f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8813,6 +8813,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) goto err_out; } + if (BNXT_VF(bp)) + bnxt_hwrm_func_qcfg(bp); + rc = bnxt_setup_vnic(bp, 0); if (rc) goto err_out; From 83474a9b252ab23e6003865c2775024344cb9c09 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Wed, 7 Jun 2023 00:54:07 -0700 Subject: [PATCH 568/934] bnxt_en: Skip firmware fatal error recovery if chip is not accessible Driver starts firmware fatal error recovery by detecting heartbeat failure or fw reset count register changing. But these checks are not reliable if the device is not accessible. This can happen while DPC (Downstream Port containment) is in progress. Skip firmware fatal recovery if pci_device_is_present() returns false. Fixes: acfb50e4e773 ("bnxt_en: Add FW fatal devlink_health_reporter.") Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b3fa840cb71f..546eb5d22b7a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11602,6 +11602,7 @@ static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue) static void bnxt_fw_health_check(struct bnxt *bp) { struct bnxt_fw_health *fw_health = bp->fw_health; + struct pci_dev *pdev = bp->pdev; u32 val; if (!fw_health->enabled || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) @@ -11615,7 +11616,7 @@ static void bnxt_fw_health_check(struct bnxt *bp) } val = bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG); - if (val == fw_health->last_fw_heartbeat) { + if (val == fw_health->last_fw_heartbeat && pci_device_is_present(pdev)) { fw_health->arrests++; goto fw_reset; } @@ -11623,7 +11624,7 @@ static void bnxt_fw_health_check(struct bnxt *bp) fw_health->last_fw_heartbeat = val; val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); - if (val != fw_health->last_fw_reset_cnt) { + if (val != fw_health->last_fw_reset_cnt && pci_device_is_present(pdev)) { fw_health->discoveries++; goto fw_reset; } From 319a7827df9784048abe072afe6b4fb4501d8de4 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Wed, 7 Jun 2023 00:54:08 -0700 Subject: [PATCH 569/934] bnxt_en: Prevent kernel panic when receiving unexpected PHC_UPDATE event The firmware can send PHC_RTC_UPDATE async event on a PF that may not have PTP registered. In such a case, there will be a null pointer deference for bp->ptp_cfg when we try to handle the event. Fix it by not registering for this event with the firmware if !bp->ptp_cfg. Also, check that bp->ptp_cfg is valid before proceeding when we receive the event. Fixes: 8bcf6f04d4a5 ("bnxt_en: Handle async event when the PHC is updated in RTC mode") Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 546eb5d22b7a..bb0332210674 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2365,6 +2365,9 @@ static int bnxt_async_event_process(struct bnxt *bp, struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; u64 ns; + if (!ptp) + goto async_event_process_exit; + spin_lock_bh(&ptp->ptp_lock); bnxt_ptp_update_current_time(bp); ns = (((u64)BNXT_EVENT_PHC_RTC_UPDATE(data1) << @@ -4763,6 +4766,9 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size, if (event_id == ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY && !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)) continue; + if (event_id == ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE && + !bp->ptp_cfg) + continue; __set_bit(bnxt_async_events_arr[i], async_events_bmap); } if (bmap && bmap_size) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index e46689128e32..f3886710e778 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -952,6 +952,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) bnxt_ptp_timecounter_init(bp, true); bnxt_ptp_adjfine_rtc(bp, 0); } + bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, true); ptp->ptp_info = bnxt_ptp_caps; if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) { From 1eb4ef12591348c440ac9d6efcf7521e73cf2b10 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 7 Jun 2023 00:54:09 -0700 Subject: [PATCH 570/934] bnxt_en: Implement .set_port / .unset_port UDP tunnel callbacks As per the new udp tunnel framework, drivers which need to know the details of a port entry (i.e. port type) when it gets deleted should use the .set_port / .unset_port callbacks. Implementing the current .udp_tunnel_sync callback would mean that the deleted tunnel port entry would be all zeros. This used to work on older firmware because it would not check the input when deleting a tunnel port. With newer firmware, the delete will now fail and subsequent tunnel port allocation will fail as a result. Fixes: 442a35a5a7aa ("bnxt: convert to new udp_tunnel_nic infra") Reviewed-by: Kalesh Anakkur Purayil Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 25 ++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index bb0332210674..b499bc9c4e06 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13036,26 +13036,37 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp) #endif /* CONFIG_RFS_ACCEL */ -static int bnxt_udp_tunnel_sync(struct net_device *netdev, unsigned int table) +static int bnxt_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) { struct bnxt *bp = netdev_priv(netdev); - struct udp_tunnel_info ti; unsigned int cmd; - udp_tunnel_nic_get_port(netdev, table, 0, &ti); - if (ti.type == UDP_TUNNEL_TYPE_VXLAN) + if (ti->type == UDP_TUNNEL_TYPE_VXLAN) cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN; else cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE; - if (ti.port) - return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti.port, cmd); + return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti->port, cmd); +} + +static int bnxt_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) +{ + struct bnxt *bp = netdev_priv(netdev); + unsigned int cmd; + + if (ti->type == UDP_TUNNEL_TYPE_VXLAN) + cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN; + else + cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE; return bnxt_hwrm_tunnel_dst_port_free(bp, cmd); } static const struct udp_tunnel_nic_info bnxt_udp_tunnels = { - .sync_table = bnxt_udp_tunnel_sync, + .set_port = bnxt_udp_tunnel_set_port, + .unset_port = bnxt_udp_tunnel_unset_port, .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | UDP_TUNNEL_NIC_INFO_OPEN_ONLY, .tables = { From 58d95889f3c2064c6139ee94bb0e4d86e1ad4eab Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 2 Jun 2023 11:11:36 +0100 Subject: [PATCH 571/934] soundwire: stream: Add missing clear of alloc_slave_rt The current path that skips allocating the slave runtime does not clear the alloc_slave_rt flag, this is clearly incorrect. Add the missing clear, so the runtime won't be erroneously cleaned up. Fixes: f3016b891c8c ("soundwire: stream: sdw_stream_add_ functions can be called multiple times") Reviewed-by: Pierre-Louis Bossart Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20230602101140.2040141-1-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- drivers/soundwire/stream.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index c2191c07442b..379228f22186 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -2021,8 +2021,10 @@ int sdw_stream_add_slave(struct sdw_slave *slave, skip_alloc_master_rt: s_rt = sdw_slave_rt_find(slave, stream); - if (s_rt) + if (s_rt) { + alloc_slave_rt = false; goto skip_alloc_slave_rt; + } s_rt = sdw_slave_rt_alloc(slave, m_rt); if (!s_rt) { From 1f2030ff6e4958780af9e65a22c39fa8082f292b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 6 Jun 2023 10:05:04 +0800 Subject: [PATCH 572/934] btrfs: scrub: respect the read-only flag during repair [BUG] With recent scrub rework, the scrub operation no longer respects the read-only flag passed by "-r" option of "btrfs scrub start" command. # mkfs.btrfs -f -d raid1 $dev1 $dev2 # mount $dev1 $mnt # xfs_io -f -d -c "pwrite -b 128K -S 0xaa 0 128k" $mnt/file # sync # xfs_io -c "pwrite -S 0xff $phy1 64k" $dev1 # xfs_io -c "pwrite -S 0xff $((phy2 + 65536)) 64k" $dev2 # mount $dev1 $mnt -o ro # btrfs scrub start -BrRd $mnt Scrub device $dev1 (id 1) done Scrub started: Tue Jun 6 09:59:14 2023 Status: finished Duration: 0:00:00 [...] corrected_errors: 16 <<< Still has corrupted sectors last_physical: 1372585984 Scrub device $dev2 (id 2) done Scrub started: Tue Jun 6 09:59:14 2023 Status: finished Duration: 0:00:00 [...] corrected_errors: 16 <<< Still has corrupted sectors last_physical: 1351614464 # btrfs scrub start -BrRd $mnt Scrub device $dev1 (id 1) done Scrub started: Tue Jun 6 10:00:17 2023 Status: finished Duration: 0:00:00 [...] corrected_errors: 0 <<< No more errors last_physical: 1372585984 Scrub device $dev2 (id 2) done [...] corrected_errors: 0 <<< No more errors last_physical: 1372585984 [CAUSE] In the newly reworked scrub code, repair is always submitted no matter if we're doing a read-only scrub. [FIX] Fix it by skipping the write submission if the scrub is a read-only one. Unfortunately for the report part, even for a read-only scrub we will still report it as corrected errors, as we know it's repairable, even we won't really submit the write. Fixes: e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure") Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 7c666517d3d3..72c2c093df4b 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1730,7 +1730,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx) break; } } - } else { + } else if (!sctx->readonly) { for (int i = 0; i < nr_stripes; i++) { unsigned long repaired; From 79b8ee702c918f1936e17cc53e14bec388ce1045 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 6 Jun 2023 15:08:28 +0800 Subject: [PATCH 573/934] btrfs: scrub: also report errors hit during the initial read [BUG] After the recent scrub rework introduced in commit e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure"), btrfs scrub no longer reports repaired errors any more: # mkfs.btrfs -f $dev -d DUP # mount $dev $mnt # xfs_io -f -d -c "pwrite -b 64K -S 0xaa 0 64" $mnt/file # umount $dev # xfs_io -f -c "pwrite -S 0xff $phy1 64K" $dev # Corrupt the first mirror # mount $dev $mnt # btrfs scrub start -BR $mnt scrub done for 725e7cb7-8a4a-4c77-9f2a-86943619e218 Scrub started: Tue Jun 6 14:56:50 2023 Status: finished Duration: 0:00:00 data_extents_scrubbed: 2 tree_extents_scrubbed: 18 data_bytes_scrubbed: 131072 tree_bytes_scrubbed: 294912 read_errors: 0 csum_errors: 0 <<< No errors here verify_errors: 0 [...] uncorrectable_errors: 0 unverified_errors: 0 corrected_errors: 16 <<< Only corrected errors last_physical: 2723151872 This can confuse btrfs-progs, as it relies on the csum_errors to determine if there is anything wrong. While on v6.3.x kernels, the report is different: csum_errors: 16 <<< verify_errors: 0 [...] uncorrectable_errors: 0 unverified_errors: 0 corrected_errors: 16 <<< [CAUSE] In the reworked scrub, we update the scrub progress inside scrub_stripe_report_errors(), using various bitmaps to update the result. For example for csum_errors, we use bitmap_weight() of stripe->csum_error_bitmap. Unfortunately at that stage, all error bitmaps (except init_error_bitmap) are the result of the latest repair attempt, thus if the stripe is fully repaired, those error bitmaps will all be empty, resulting the above output mismatch. To fix this, record the number of errors into stripe->init_nr_*_errors. Since we don't really care about where those errors are, we only need to record the number of errors. Then in scrub_stripe_report_errors(), use those initial numbers to update the progress other than using the latest error bitmaps. Fixes: e02ee89baa66 ("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure") Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 72c2c093df4b..50c241aba1a1 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -134,8 +134,14 @@ struct scrub_stripe { * The errors hit during the initial read of the stripe. * * Would be utilized for error reporting and repair. + * + * The remaining init_nr_* records the number of errors hit, only used + * by error reporting. */ unsigned long init_error_bitmap; + unsigned int init_nr_io_errors; + unsigned int init_nr_csum_errors; + unsigned int init_nr_meta_errors; /* * The following error bitmaps are all for the current status. @@ -1003,12 +1009,9 @@ skip: sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits; sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits; sctx->stat.no_csum += nr_nodatacsum_sectors; - sctx->stat.read_errors += - bitmap_weight(&stripe->io_error_bitmap, stripe->nr_sectors); - sctx->stat.csum_errors += - bitmap_weight(&stripe->csum_error_bitmap, stripe->nr_sectors); - sctx->stat.verify_errors += - bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors); + sctx->stat.read_errors += stripe->init_nr_io_errors; + sctx->stat.csum_errors += stripe->init_nr_csum_errors; + sctx->stat.verify_errors += stripe->init_nr_meta_errors; sctx->stat.uncorrectable_errors += bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors); sctx->stat.corrected_errors += nr_repaired_sectors; @@ -1041,6 +1044,12 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work) scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap); /* Save the initial failed bitmap for later repair and report usage. */ stripe->init_error_bitmap = stripe->error_bitmap; + stripe->init_nr_io_errors = bitmap_weight(&stripe->io_error_bitmap, + stripe->nr_sectors); + stripe->init_nr_csum_errors = bitmap_weight(&stripe->csum_error_bitmap, + stripe->nr_sectors); + stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap, + stripe->nr_sectors); if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) goto out; @@ -1490,6 +1499,9 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe) { stripe->extent_sector_bitmap = 0; stripe->init_error_bitmap = 0; + stripe->init_nr_io_errors = 0; + stripe->init_nr_csum_errors = 0; + stripe->init_nr_meta_errors = 0; stripe->error_bitmap = 0; stripe->io_error_bitmap = 0; stripe->csum_error_bitmap = 0; From f7efdf5e4dc10756ad632b1863b54b1b42a07530 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 15 May 2023 07:26:14 +0900 Subject: [PATCH 574/934] dt-bindings: Change Damien Le Moal's contact email Change my email address to dlemoal@kernel.org. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230514222614.115299-1-dlemoal@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/ata/ahci-common.yaml | 2 +- Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml | 2 +- Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml | 2 +- .../devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml | 2 +- Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml | 2 +- Documentation/devicetree/bindings/riscv/canaan.yaml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/devicetree/bindings/ata/ahci-common.yaml b/Documentation/devicetree/bindings/ata/ahci-common.yaml index 7fdf40954a4c..38770c4c85fd 100644 --- a/Documentation/devicetree/bindings/ata/ahci-common.yaml +++ b/Documentation/devicetree/bindings/ata/ahci-common.yaml @@ -8,7 +8,7 @@ title: Common Properties for Serial ATA AHCI controllers maintainers: - Hans de Goede - - Damien Le Moal + - Damien Le Moal description: This document defines device tree properties for a common AHCI SATA diff --git a/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml index 998e5cce652f..380cb6d80025 100644 --- a/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml +++ b/Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Canaan Kendryte K210 Clock maintainers: - - Damien Le Moal + - Damien Le Moal description: | Canaan Kendryte K210 SoC clocks driver bindings. The clock diff --git a/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml b/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml index 8459d3642205..3b3beab9db3f 100644 --- a/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml +++ b/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Canaan Kendryte K210 System Controller maintainers: - - Damien Le Moal + - Damien Le Moal description: Canaan Inc. Kendryte K210 SoC system controller which provides a diff --git a/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml b/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml index 7f4f36a58e56..739a08f00467 100644 --- a/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml +++ b/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Canaan Kendryte K210 FPIOA maintainers: - - Damien Le Moal + - Damien Le Moal description: The Canaan Kendryte K210 SoC Fully Programmable IO Array (FPIOA) diff --git a/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml b/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml index ee8a2dcf5dfa..0c0135964b91 100644 --- a/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml +++ b/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Canaan Kendryte K210 Reset Controller maintainers: - - Damien Le Moal + - Damien Le Moal description: | Canaan Kendryte K210 reset controller driver which supports the SoC diff --git a/Documentation/devicetree/bindings/riscv/canaan.yaml b/Documentation/devicetree/bindings/riscv/canaan.yaml index f8f3f286bd55..41fd11f70a49 100644 --- a/Documentation/devicetree/bindings/riscv/canaan.yaml +++ b/Documentation/devicetree/bindings/riscv/canaan.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Canaan SoC-based boards maintainers: - - Damien Le Moal + - Damien Le Moal description: Canaan Kendryte K210 SoC-based boards From c44e0503e5fd90c5ccbf81bcff7b6b70e9e2655b Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 24 Apr 2023 13:52:53 +0300 Subject: [PATCH 575/934] docs: dt: fix documented Primecell compatible string Only arm,primecell is documented as compatible string for Primecell peripherals. Current code agrees with that. Signed-off-by: Baruch Siach Link: https://lore.kernel.org/r/9e137548c4e76e0d8deef6d49460cb37897934ca.1682333574.git.baruch@tkos.co.il Signed-off-by: Rob Herring --- Documentation/devicetree/usage-model.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/usage-model.rst b/Documentation/devicetree/usage-model.rst index b6a287955ee5..0717426856b2 100644 --- a/Documentation/devicetree/usage-model.rst +++ b/Documentation/devicetree/usage-model.rst @@ -415,6 +415,6 @@ When using the DT, this creates problems for of_platform_populate() because it must decide whether to register each node as either a platform_device or an amba_device. This unfortunately complicates the device creation model a little bit, but the solution turns out not to -be too invasive. If a node is compatible with "arm,amba-primecell", then +be too invasive. If a node is compatible with "arm,primecell", then of_platform_populate() will register it as an amba_device instead of a platform_device. From b0b4a63f38d7cae3c57588592556481aedcae8b1 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 24 Apr 2023 13:52:54 +0300 Subject: [PATCH 576/934] docs: zh_CN/devicetree: sync usage-model fix Sync compatibly string fix from the English document. Signed-off-by: Baruch Siach Reviewed-by: Yanteng Si Link: https://lore.kernel.org/r/b39560250cb58f8cdcfe95791ce5af7455c6e8e3.1682333574.git.baruch@tkos.co.il Signed-off-by: Rob Herring --- Documentation/translations/zh_CN/devicetree/usage-model.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/translations/zh_CN/devicetree/usage-model.rst b/Documentation/translations/zh_CN/devicetree/usage-model.rst index c6aee82c7e6e..19ba4ae0cd81 100644 --- a/Documentation/translations/zh_CN/devicetree/usage-model.rst +++ b/Documentation/translations/zh_CN/devicetree/usage-model.rst @@ -325,6 +325,6 @@ Primecell设备。然而,棘手的一点是,AMBA总线上的所有设备并 当使用DT时,这给of_platform_populate()带来了问题,因为它必须决定是否将 每个节点注册为platform_device或amba_device。不幸的是,这使设备创建模型 -变得有点复杂,但解决方案原来并不是太具有侵略性。如果一个节点与“arm,amba-primecell” +变得有点复杂,但解决方案原来并不是太具有侵略性。如果一个节点与“arm,primecell” 兼容,那么of_platform_populate()将把它注册为amba_device而不是 platform_device。 From 1b29243933098cdbc31b579b5616e183b4275e2f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 8 Jun 2023 09:57:04 -0400 Subject: [PATCH 577/934] Revert "ext4: don't clear SB_RDONLY when remounting r/w until quota is re-enabled" This reverts commit a44be64bbecb15a452496f60db6eacfee2b59c79. Link: https://lore.kernel.org/r/653b3359-2005-21b1-039d-c55ca4cffdcc@gmail.com Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 56a5d1c469fc..05fcecc36244 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6388,7 +6388,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) struct ext4_mount_options old_opts; ext4_group_t g; int err = 0; - int enable_rw = 0; #ifdef CONFIG_QUOTA int enable_quota = 0; int i, j; @@ -6575,7 +6574,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) if (err) goto restore_opts; - enable_rw = 1; + sb->s_flags &= ~SB_RDONLY; if (ext4_has_feature_mmp(sb)) { err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)); @@ -6622,9 +6621,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks) ext4_release_system_zone(sb); - if (enable_rw) - sb->s_flags &= ~SB_RDONLY; - /* * Reinitialize lazy itable initialization thread based on * current settings From dea9d8f7643fab07bf89a1155f1f94f37d096a5e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 8 Jun 2023 10:06:40 -0400 Subject: [PATCH 578/934] ext4: only check dquot_initialize_needed() when debugging ext4_xattr_block_set() relies on its caller to call dquot_initialize() on the inode. To assure that this has happened there are WARN_ON checks. Unfortunately, this is subject to false positives if there is an antagonist thread which is flipping the file system at high rates between r/o and rw. So only do the check if EXT4_XATTR_DEBUG is enabled. Link: https://lore.kernel.org/r/20230608044056.GA1418535@mit.edu Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 13d7f17a9c8c..321e3a888c20 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2056,8 +2056,9 @@ inserted: else { u32 ref; +#ifdef EXT4_XATTR_DEBUG WARN_ON_ONCE(dquot_initialize_needed(inode)); - +#endif /* The old block is released after updating the inode. */ error = dquot_alloc_block(inode, @@ -2120,8 +2121,9 @@ inserted: /* We need to allocate a new block */ ext4_fsblk_t goal, block; +#ifdef EXT4_XATTR_DEBUG WARN_ON_ONCE(dquot_initialize_needed(inode)); - +#endif goal = ext4_group_first_block_no(sb, EXT4_I(inode)->i_block_group); block = ext4_new_meta_blocks(handle, inode, goal, 0, From 05a1308a2e08e4a375bf60eb4c6c057a201d81fc Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 25 May 2023 12:52:58 +0200 Subject: [PATCH 579/934] ice: Don't dereference NULL in ice_gnss_read error path If pf is NULL in ice_gnss_read() then it will be dereferenced in the error path by a call to dev_dbg(ice_pf_to_dev(pf), ...). Avoid this by simply returning in this case. If logging is desired an alternate approach might be to use pr_err() before returning. Flagged by Smatch as: .../ice_gnss.c:196 ice_gnss_read() error: we previously assumed 'pf' could be null (see line 131) Fixes: 43113ff73453 ("ice: add TTY for GNSS module for E810T device") Signed-off-by: Simon Horman Reviewed-by: Tariq Toukan Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_gnss.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index bd0ed155e11b..75c9de675f20 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -96,12 +96,7 @@ static void ice_gnss_read(struct kthread_work *work) int err = 0; pf = gnss->back; - if (!pf) { - err = -EFAULT; - goto exit; - } - - if (!test_bit(ICE_FLAG_GNSS, pf->flags)) + if (!pf || !test_bit(ICE_FLAG_GNSS, pf->flags)) return; hw = &pf->hw; @@ -159,7 +154,6 @@ free_buf: free_page((unsigned long)buf); requeue: kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, delay); -exit: if (err) dev_dbg(ice_pf_to_dev(pf), "GNSS failed to read err=%d\n", err); } From 99a670b2069c725a7b50318aa681d9cae8f89325 Mon Sep 17 00:00:00 2001 From: Ruan Jinjie Date: Thu, 4 May 2023 15:29:10 +0800 Subject: [PATCH 580/934] riscv: fix kprobe __user string arg print fault issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On riscv qemu platform, when add kprobe event on do_sys_open() to show filename string arg, it just print fault as follow: echo 'p:myprobe do_sys_open dfd=$arg1 filename=+0($arg2):string flags=$arg3 mode=$arg4' > kprobe_events bash-166 [000] ...1. 360.195367: myprobe: (do_sys_open+0x0/0x84) dfd=0xffffffffffffff9c filename=(fault) flags=0x8241 mode=0x1b6 bash-166 [000] ...1. 360.219369: myprobe: (do_sys_open+0x0/0x84) dfd=0xffffffffffffff9c filename=(fault) flags=0x8241 mode=0x1b6 bash-191 [000] ...1. 360.378827: myprobe: (do_sys_open+0x0/0x84) dfd=0xffffffffffffff9c filename=(fault) flags=0x98800 mode=0x0 As riscv do not select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE, the +0($arg2) addr is processed as a kernel address though it is a userspace address, cause the above filename=(fault) print. So select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE to avoid the issue, after that the kprobe trace is ok as below: bash-166 [000] ...1. 96.767641: myprobe: (do_sys_open+0x0/0x84) dfd=0xffffffffffffff9c filename="/dev/null" flags=0x8241 mode=0x1b6 bash-166 [000] ...1. 96.793751: myprobe: (do_sys_open+0x0/0x84) dfd=0xffffffffffffff9c filename="/dev/null" flags=0x8241 mode=0x1b6 bash-177 [000] ...1. 96.962354: myprobe: (do_sys_open+0x0/0x84) dfd=0xffffffffffffff9c filename="/sys/kernel/debug/tracing/events/kprobes/" flags=0x98800 mode=0x0 Signed-off-by: Ruan Jinjie Acked-by: Björn Töpel Fixes: 0ebeea8ca8a4 ("bpf: Restrict bpf_probe_read{, str}() only to archs where they work") Link: https://lore.kernel.org/r/20230504072910.3742842-1-ruanjinjie@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 2bb0c38419ff..5966ad97c30c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -26,6 +26,7 @@ config RISCV select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MMIOWB + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP if MMU From 78c50d6961fc05491ebbc71c35d87324b1a4f49a Mon Sep 17 00:00:00 2001 From: Kamil Maziarz Date: Tue, 6 Jun 2023 12:33:58 +0200 Subject: [PATCH 581/934] ice: Fix XDP memory leak when NIC is brought up and down Fix the buffer leak that occurs while switching the port up and down with traffic and XDP by checking for an active XDP program and freeing all empty TX buffers. Fixes: efc2214b6047 ("ice: Add support for XDP") Signed-off-by: Kamil Maziarz Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a1f7c8edc22f..03513d4871ab 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7056,6 +7056,10 @@ int ice_down(struct ice_vsi *vsi) ice_for_each_txq(vsi, i) ice_clean_tx_ring(vsi->tx_rings[i]); + if (ice_is_xdp_ena_vsi(vsi)) + ice_for_each_xdp_txq(vsi, i) + ice_clean_tx_ring(vsi->xdp_rings[i]); + ice_for_each_rxq(vsi, i) ice_clean_rx_ring(vsi->rx_rings[i]); From 8d38800a762c880244f3df87abbc9f34e276e955 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Wed, 7 Jun 2023 18:07:29 +0100 Subject: [PATCH 582/934] MAINTAINERS: Add entries for Renesas RZ/V2M I2C driver Add the MAINTAINERS entries for the Renesas RZ/V2M I2C driver. Signed-off-by: Biju Das Acked-by: Fabrizio Castro Signed-off-by: Wolfram Sang --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c2e40cc1d6be..3dfeadba5dc2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18039,6 +18039,14 @@ S: Maintained F: Documentation/devicetree/bindings/usb/renesas,rzn1-usbf.yaml F: drivers/usb/gadget/udc/renesas_usbf.c +RENESAS RZ/V2M I2C DRIVER +M: Fabrizio Castro +L: linux-i2c@vger.kernel.org +L: linux-renesas-soc@vger.kernel.org +S: Supported +F: Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml +F: drivers/i2c/busses/i2c-rzv2m.c + RENESAS USB PHY DRIVER M: Yoshihiro Shimoda L: linux-renesas-soc@vger.kernel.org From a90e8608eb0ed93d31ac0feb055f77ce59512542 Mon Sep 17 00:00:00 2001 From: Sheng Zhao Date: Tue, 30 May 2023 11:36:26 +0800 Subject: [PATCH 583/934] vduse: avoid empty string for dev name Syzkaller hits a kernel WARN when the first character of the dev name provided is NULL. Solution is to add a NULL check before calling cdev_device_add() in vduse_create_dev(). kobject: (0000000072042169): attempted to be registered with empty name! WARNING: CPU: 0 PID: 112695 at lib/kobject.c:236 Call Trace: kobject_add_varg linux/src/lib/kobject.c:390 [inline] kobject_add+0xf6/0x150 linux/src/lib/kobject.c:442 device_add+0x28f/0xc20 linux/src/drivers/base/core.c:2167 cdev_device_add+0x83/0xc0 linux/src/fs/char_dev.c:546 vduse_create_dev linux/src/drivers/vdpa/vdpa_user/vduse_dev.c:2254 [inline] vduse_ioctl+0x7b5/0xf30 linux/src/drivers/vdpa/vdpa_user/vduse_dev.c:2316 vfs_ioctl linux/src/fs/ioctl.c:47 [inline] file_ioctl linux/src/fs/ioctl.c:510 [inline] do_vfs_ioctl+0x14b/0xa80 linux/src/fs/ioctl.c:697 ksys_ioctl+0x7c/0xa0 linux/src/fs/ioctl.c:714 __do_sys_ioctl linux/src/fs/ioctl.c:721 [inline] __se_sys_ioctl linux/src/fs/ioctl.c:719 [inline] __x64_sys_ioctl+0x42/0x50 linux/src/fs/ioctl.c:719 do_syscall_64+0x94/0x330 linux/src/arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Cc: "Xie Yongji" Reported-by: Xianjun Zeng Signed-off-by: Sheng Zhao Message-Id: <20230530033626.1266794-1-sheng.zhao@bytedance.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Xie Yongji Cc: "Michael S. Tsirkin", "Jason Wang", Reviewed-by: Xie Yongji --- drivers/vdpa/vdpa_user/vduse_dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index de97e38c3b82..5f5c21674fdc 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1685,6 +1685,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config) if (config->vq_num > 0xffff) return false; + if (!config->name[0]) + return false; + if (!device_is_allowed(config->device_id)) return false; From 57380fd1249b20ef772549af2c58ef57b21faba7 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Wed, 24 May 2023 20:31:24 +0800 Subject: [PATCH 584/934] tools/virtio: Fix arm64 ringtest compilation error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add cpu_relax() for arm64 instead of directly assert(), and add assert.h header file. Also, add smp_wmb and smp_mb for arm64. Compilation error as follows, avoid __always_inline undefined. $ make cc -Wall -pthread -O2 -ggdb -flto -fwhole-program -c -o ring.o ring.c In file included from ring.c:10: main.h: In function ‘busy_wait’: main.h:99:21: warning: implicit declaration of function ‘assert’ [-Wimplicit-function-declaration] 99 | #define cpu_relax() assert(0) | ^~~~~~ main.h:107:17: note: in expansion of macro ‘cpu_relax’ 107 | cpu_relax(); | ^~~~~~~~~ main.h:12:1: note: ‘assert’ is defined in header ‘’; did you forget to ‘#include ’? 11 | #include +++ |+#include 12 | main.h: At top level: main.h:143:23: error: expected ‘;’ before ‘void’ 143 | static __always_inline | ^ | ; 144 | void __read_once_size(const volatile void *p, void *res, int size) | ~~~~ main.h:158:23: error: expected ‘;’ before ‘void’ 158 | static __always_inline void __write_once_size(volatile void *p, void *res, int size) | ^~~~~ | ; make: *** [: ring.o] Error 1 Signed-off-by: Rong Tao Message-Id: Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/main.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index b68920d52750..d18dd317e27f 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -8,6 +8,7 @@ #ifndef MAIN_H #define MAIN_H +#include #include extern int param; @@ -95,6 +96,8 @@ extern unsigned ring_size; #define cpu_relax() asm ("rep; nop" ::: "memory") #elif defined(__s390x__) #define cpu_relax() barrier() +#elif defined(__aarch64__) +#define cpu_relax() asm ("yield" ::: "memory") #else #define cpu_relax() assert(0) #endif @@ -112,6 +115,8 @@ static inline void busy_wait(void) #if defined(__x86_64__) || defined(__i386__) #define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") +#elif defined(__aarch64__) +#define smp_mb() asm volatile("dmb ish" ::: "memory") #else /* * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized @@ -136,10 +141,16 @@ static inline void busy_wait(void) #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) #define smp_wmb() barrier() +#elif defined(__aarch64__) +#define smp_wmb() asm volatile("dmb ishst" ::: "memory") #else #define smp_wmb() smp_release() #endif +#ifndef __always_inline +#define __always_inline inline __attribute__((always_inline)) +#endif + static __always_inline void __read_once_size(const volatile void *p, void *res, int size) { From c66dbc39a7104c5a9f033c0450dfa6f697a71f94 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Thu, 25 May 2023 16:37:28 +0800 Subject: [PATCH 585/934] tools/virtio: Add .gitignore for ringtest Ignore executables for ringtest. Signed-off-by: Rong Tao Message-Id: Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/.gitignore | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 tools/virtio/ringtest/.gitignore diff --git a/tools/virtio/ringtest/.gitignore b/tools/virtio/ringtest/.gitignore new file mode 100644 index 000000000000..100b9e30c0f4 --- /dev/null +++ b/tools/virtio/ringtest/.gitignore @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +/noring +/ptr_ring +/ring +/virtio_ring_0_9 +/virtio_ring_inorder +/virtio_ring_poll From 73790bdfba076c0886f0f14fd46ff2c70ee31ce9 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 16 May 2023 12:58:01 +0300 Subject: [PATCH 586/934] vdpa/mlx5: Fix hang when cvq commands are triggered during device unregister Currently the vdpa device is unregistered after the workqueue that processes vq commands is disabled. However, the device unregister process can still send commands to the cvq (a vlan delete for example) which leads to a hang because the handing workqueue has been disabled and the command never finishes: [ 2263.095764] rcu: INFO: rcu_sched self-detected stall on CPU [ 2263.096307] rcu: 9-....: (5250 ticks this GP) idle=dac4/1/0x4000000000000000 softirq=111009/111009 fqs=2544 [ 2263.097154] rcu: (t=5251 jiffies g=393549 q=347 ncpus=10) [ 2263.097648] CPU: 9 PID: 94300 Comm: kworker/u20:2 Not tainted 6.3.0-rc6_for_upstream_min_debug_2023_04_14_00_02 #1 [ 2263.098535] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 2263.099481] Workqueue: mlx5_events mlx5_vhca_state_work_handler [mlx5_core] [ 2263.100143] RIP: 0010:virtnet_send_command+0x109/0x170 [ 2263.100621] Code: 1d df f5 ff 85 c0 78 5c 48 8b 7b 08 e8 d0 c5 f5 ff 84 c0 75 11 eb 22 48 8b 7b 08 e8 01 b7 f5 ff 84 c0 75 15 f3 90 48 8b 7b 08 <48> 8d 74 24 04 e8 8d c5 f5 ff 48 85 c0 74 de 48 8b 83 f8 00 00 00 [ 2263.102148] RSP: 0018:ffff888139cf36e8 EFLAGS: 00000246 [ 2263.102624] RAX: 0000000000000000 RBX: ffff888166bea940 RCX: 0000000000000001 [ 2263.103244] RDX: 0000000000000000 RSI: ffff888139cf36ec RDI: ffff888146763800 [ 2263.103864] RBP: ffff888139cf3710 R08: ffff88810d201000 R09: 0000000000000000 [ 2263.104473] R10: 0000000000000002 R11: 0000000000000003 R12: 0000000000000002 [ 2263.105082] R13: 0000000000000002 R14: ffff888114528400 R15: ffff888166bea000 [ 2263.105689] FS: 0000000000000000(0000) GS:ffff88852cc80000(0000) knlGS:0000000000000000 [ 2263.106404] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2263.106925] CR2: 00007f31f394b000 CR3: 000000010615b006 CR4: 0000000000370ea0 [ 2263.107542] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 2263.108163] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 2263.108769] Call Trace: [ 2263.109059] [ 2263.109320] ? check_preempt_wakeup+0x11f/0x230 [ 2263.109750] virtnet_vlan_rx_kill_vid+0x5a/0xa0 [ 2263.110180] vlan_vid_del+0x9c/0x170 [ 2263.110546] vlan_device_event+0x351/0x760 [8021q] [ 2263.111004] raw_notifier_call_chain+0x41/0x60 [ 2263.111426] dev_close_many+0xcb/0x120 [ 2263.111808] unregister_netdevice_many_notify+0x130/0x770 [ 2263.112297] ? wq_worker_running+0xa/0x30 [ 2263.112688] unregister_netdevice_queue+0x89/0xc0 [ 2263.113128] unregister_netdev+0x18/0x20 [ 2263.113512] virtnet_remove+0x4f/0x230 [ 2263.113885] virtio_dev_remove+0x31/0x70 [ 2263.114273] device_release_driver_internal+0x18f/0x1f0 [ 2263.114746] bus_remove_device+0xc6/0x130 [ 2263.115146] device_del+0x173/0x3c0 [ 2263.115502] ? kernfs_find_ns+0x35/0xd0 [ 2263.115895] device_unregister+0x1a/0x60 [ 2263.116279] unregister_virtio_device+0x11/0x20 [ 2263.116706] device_release_driver_internal+0x18f/0x1f0 [ 2263.117182] bus_remove_device+0xc6/0x130 [ 2263.117576] device_del+0x173/0x3c0 [ 2263.117929] ? vdpa_dev_remove+0x20/0x20 [vdpa] [ 2263.118364] device_unregister+0x1a/0x60 [ 2263.118752] mlx5_vdpa_dev_del+0x4c/0x80 [mlx5_vdpa] [ 2263.119232] vdpa_match_remove+0x21/0x30 [vdpa] [ 2263.119663] bus_for_each_dev+0x71/0xc0 [ 2263.120054] vdpa_mgmtdev_unregister+0x57/0x70 [vdpa] [ 2263.120520] mlx5v_remove+0x12/0x20 [mlx5_vdpa] [ 2263.120953] auxiliary_bus_remove+0x18/0x30 [ 2263.121356] device_release_driver_internal+0x18f/0x1f0 [ 2263.121830] bus_remove_device+0xc6/0x130 [ 2263.122223] device_del+0x173/0x3c0 [ 2263.122581] ? devl_param_driverinit_value_get+0x29/0x90 [ 2263.123070] mlx5_rescan_drivers_locked+0xc4/0x2d0 [mlx5_core] [ 2263.123633] mlx5_unregister_device+0x54/0x80 [mlx5_core] [ 2263.124169] mlx5_uninit_one+0x54/0x150 [mlx5_core] [ 2263.124656] mlx5_sf_dev_remove+0x45/0x90 [mlx5_core] [ 2263.125153] auxiliary_bus_remove+0x18/0x30 [ 2263.125560] device_release_driver_internal+0x18f/0x1f0 [ 2263.126052] bus_remove_device+0xc6/0x130 [ 2263.126451] device_del+0x173/0x3c0 [ 2263.126815] mlx5_sf_dev_remove+0x39/0xf0 [mlx5_core] [ 2263.127318] mlx5_sf_dev_state_change_handler+0x178/0x270 [mlx5_core] [ 2263.127920] blocking_notifier_call_chain+0x5a/0x80 [ 2263.128379] mlx5_vhca_state_work_handler+0x151/0x200 [mlx5_core] [ 2263.128951] process_one_work+0x1bb/0x3c0 [ 2263.129355] ? process_one_work+0x3c0/0x3c0 [ 2263.129766] worker_thread+0x4d/0x3c0 [ 2263.130140] ? process_one_work+0x3c0/0x3c0 [ 2263.130548] kthread+0xb9/0xe0 [ 2263.130895] ? kthread_complete_and_exit+0x20/0x20 [ 2263.131349] ret_from_fork+0x1f/0x30 [ 2263.131717] The fix is to disable and destroy the workqueue after the device unregister. It is expected that vhost will not trigger kicks after the unregister. But even if it would, the wq is disabled already by setting the pointer to NULL (done so in the referenced commit). Fixes: ad6dc1daaf29 ("vdpa/mlx5: Avoid processing works if workqueue was destroyed") Signed-off-by: Dragos Tatulea Message-Id: <20230516095800.3549932-1-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Tariq Toukan Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index e29e32b306ad..279ac6a558d2 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -3349,10 +3349,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * mlx5_vdpa_remove_debugfs(ndev->debugfs); ndev->debugfs = NULL; unregister_link_notifier(ndev); + _vdpa_unregister_device(dev); wq = mvdev->wq; mvdev->wq = NULL; destroy_workqueue(wq); - _vdpa_unregister_device(dev); mgtdev->ndev = NULL; } From 376daf317753ccb6b1ecbdece66018f7f6313a7f Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 24 Apr 2023 15:50:29 -0700 Subject: [PATCH 587/934] vhost_vdpa: tell vqs about the negotiated As is done in the net, iscsi, and vsock vhost support, let the vdpa vqs know about the features that have been negotiated. This allows vhost to more safely make decisions based on the features, such as when using PACKED vs split queues. Signed-off-by: Shannon Nelson Acked-by: Jason Wang Message-Id: <20230424225031.18947-2-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 8c1aefc865f0..89d5ecfe7d2e 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -407,7 +407,10 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) { struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; + struct vhost_dev *d = &v->vdev; + u64 actual_features; u64 features; + int i; /* * It's not allowed to change the features after they have @@ -422,6 +425,16 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) if (vdpa_set_features(vdpa, features)) return -EINVAL; + /* let the vqs know what has been configured */ + actual_features = ops->get_driver_features(vdpa); + for (i = 0; i < d->nvqs; ++i) { + struct vhost_virtqueue *vq = d->vqs[i]; + + mutex_lock(&vq->mutex); + vq->acked_features = actual_features; + mutex_unlock(&vq->mutex); + } + return 0; } From 1f5d2e3bab16369d5d4b4020a25db4ab1f4f082c Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 24 Apr 2023 23:44:11 +0300 Subject: [PATCH 588/934] vhost_net: revert upend_idx only on retriable error Fix possible virtqueue used buffers leak and corresponding stuck in case of temporary -EIO from sendmsg() which is produced by tun driver while backend device is not up. In case of no-retriable error and zcopy do not revert upend_idx to pass packet data (that is update used_idx in corresponding vhost_zerocopy_signal_used()) as if packet data has been transferred successfully. v2: set vq->heads[ubuf->desc].len equal to VHOST_DMA_DONE_LEN in case of fake successful transmit. Signed-off-by: Andrey Smetanin Message-Id: <20230424204411.24888-1-asmetanin@yandex-team.ru> Signed-off-by: Michael S. Tsirkin Signed-off-by: Andrey Smetanin Acked-by: Jason Wang --- drivers/vhost/net.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 07181cd8d52e..ae2273196b0c 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -935,13 +935,18 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { + bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS; + if (zcopy_used) { if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) vhost_net_ubuf_put(ubufs); - nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) - % UIO_MAXIOV; + if (retry) + nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) + % UIO_MAXIOV; + else + vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; } - if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { + if (retry) { vhost_discard_vq_desc(vq, 1); vhost_net_enable_vq(net, vq); break; From a284f09effea1908db7985dbfb5458c9100038d8 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 7 Jun 2023 14:23:37 -0500 Subject: [PATCH 589/934] vhost: Fix crash during early vhost_transport_send_pkt calls If userspace does VHOST_VSOCK_SET_GUEST_CID before VHOST_SET_OWNER we can race where: 1. thread0 calls vhost_transport_send_pkt -> vhost_work_queue 2. thread1 does VHOST_SET_OWNER which calls vhost_worker_create. 3. vhost_worker_create will set the dev->worker pointer before setting the worker->vtsk pointer. 4. thread0's vhost_work_queue will see the dev->worker pointer is set and try to call vhost_task_wake using not yet set worker->vtsk pointer. 5. We then crash since vtsk is NULL. Before commit 6e890c5d5021 ("vhost: use vhost_tasks for worker threads"), we only had the worker pointer so we could just check it to see if VHOST_SET_OWNER has been done. After that commit we have the vhost_worker and vhost_task pointer, so we can now hit the bug above. This patch embeds the vhost_worker in the vhost_dev and moves the work list initialization back to vhost_dev_init, so we can just check the worker.vtsk pointer to check if VHOST_SET_OWNER has been done like before. Fixes: 6e890c5d5021 ("vhost: use vhost_tasks for worker threads") Signed-off-by: Mike Christie Message-Id: <20230607192338.6041-2-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin Reported-by: syzbot+d0d442c22fa8db45ff0e@syzkaller.appspotmail.com Reviewed-by: Stefano Garzarella --- drivers/vhost/vhost.c | 50 +++++++++++++++---------------------------- drivers/vhost/vhost.h | 2 +- 2 files changed, 18 insertions(+), 34 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ecb3b397bb38..ca1041c88c68 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -235,7 +235,7 @@ void vhost_dev_flush(struct vhost_dev *dev) { struct vhost_flush_struct flush; - if (dev->worker) { + if (dev->worker.vtsk) { init_completion(&flush.wait_event); vhost_work_init(&flush.work, vhost_flush_work); @@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(vhost_dev_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { - if (!dev->worker) + if (!dev->worker.vtsk) return; if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { @@ -255,8 +255,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) * sure it was not in the list. * test_and_set_bit() implies a memory barrier. */ - llist_add(&work->node, &dev->worker->work_list); - vhost_task_wake(dev->worker->vtsk); + llist_add(&work->node, &dev->worker.work_list); + vhost_task_wake(dev->worker.vtsk); } } EXPORT_SYMBOL_GPL(vhost_work_queue); @@ -264,7 +264,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue); /* A lockless hint for busy polling code to exit the loop */ bool vhost_has_work(struct vhost_dev *dev) { - return dev->worker && !llist_empty(&dev->worker->work_list); + return !llist_empty(&dev->worker.work_list); } EXPORT_SYMBOL_GPL(vhost_has_work); @@ -456,7 +456,8 @@ void vhost_dev_init(struct vhost_dev *dev, dev->umem = NULL; dev->iotlb = NULL; dev->mm = NULL; - dev->worker = NULL; + memset(&dev->worker, 0, sizeof(dev->worker)); + init_llist_head(&dev->worker.work_list); dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; @@ -530,47 +531,30 @@ static void vhost_detach_mm(struct vhost_dev *dev) static void vhost_worker_free(struct vhost_dev *dev) { - struct vhost_worker *worker = dev->worker; - - if (!worker) + if (!dev->worker.vtsk) return; - dev->worker = NULL; - WARN_ON(!llist_empty(&worker->work_list)); - vhost_task_stop(worker->vtsk); - kfree(worker); + WARN_ON(!llist_empty(&dev->worker.work_list)); + vhost_task_stop(dev->worker.vtsk); + dev->worker.kcov_handle = 0; + dev->worker.vtsk = NULL; } static int vhost_worker_create(struct vhost_dev *dev) { - struct vhost_worker *worker; struct vhost_task *vtsk; char name[TASK_COMM_LEN]; - int ret; - worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); - if (!worker) - return -ENOMEM; - - dev->worker = worker; - worker->kcov_handle = kcov_common_handle(); - init_llist_head(&worker->work_list); snprintf(name, sizeof(name), "vhost-%d", current->pid); - vtsk = vhost_task_create(vhost_worker, worker, name); - if (!vtsk) { - ret = -ENOMEM; - goto free_worker; - } + vtsk = vhost_task_create(vhost_worker, &dev->worker, name); + if (!vtsk) + return -ENOMEM; - worker->vtsk = vtsk; + dev->worker.kcov_handle = kcov_common_handle(); + dev->worker.vtsk = vtsk; vhost_task_start(vtsk); return 0; - -free_worker: - kfree(worker); - dev->worker = NULL; - return ret; } /* Caller should have device mutex */ diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 0308638cdeee..305ec8593d46 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -154,7 +154,7 @@ struct vhost_dev { struct vhost_virtqueue **vqs; int nvqs; struct eventfd_ctx *log_ctx; - struct vhost_worker *worker; + struct vhost_worker worker; struct vhost_iotlb *umem; struct vhost_iotlb *iotlb; spinlock_t iotlb_lock; From 4b13cbef797048fbb525f8c635a5279e9d209d93 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 7 Jun 2023 14:23:38 -0500 Subject: [PATCH 590/934] vhost: Fix worker hangs due to missed wake up calls We can race where we have added work to the work_list, but vhost_task_fn has passed that check but not yet set us into TASK_INTERRUPTIBLE. wake_up_process will see us in TASK_RUNNING and just return. This bug was intoduced in commit f9010dbdce91 ("fork, vhost: Use CLONE_THREAD to fix freezer/ps regression") when I moved the setting of TASK_INTERRUPTIBLE to simplfy the code and avoid get_signal from logging warnings about being in the wrong state. This moves the setting of TASK_INTERRUPTIBLE back to before we test if we need to stop the task to avoid a possible race there as well. We then have vhost_worker set TASK_RUNNING if it finds work similar to before. Fixes: f9010dbdce91 ("fork, vhost: Use CLONE_THREAD to fix freezer/ps regression") Signed-off-by: Mike Christie Message-Id: <20230607192338.6041-3-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 2 ++ kernel/vhost_task.c | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ca1041c88c68..1f80eac5d6ae 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -341,6 +341,8 @@ static bool vhost_worker(void *data) node = llist_del_all(&worker->work_list); if (node) { + __set_current_state(TASK_RUNNING); + node = llist_reverse_order(node); /* make sure flag is seen after deletion */ smp_wmb(); diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index f80d5c51ae67..da35e5b7f047 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -28,10 +28,6 @@ static int vhost_task_fn(void *data) for (;;) { bool did_work; - /* mb paired w/ vhost_task_stop */ - if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) - break; - if (!dead && signal_pending(current)) { struct ksignal ksig; /* @@ -48,11 +44,17 @@ static int vhost_task_fn(void *data) clear_thread_flag(TIF_SIGPENDING); } - did_work = vtsk->fn(vtsk->data); - if (!did_work) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); + /* mb paired w/ vhost_task_stop */ + set_current_state(TASK_INTERRUPTIBLE); + + if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) { + __set_current_state(TASK_RUNNING); + break; } + + did_work = vtsk->fn(vtsk->data); + if (!did_work) + schedule(); } complete(&vtsk->exited); From 1240eb93f0616b21c675416516ff3d74798fdc97 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 8 Jun 2023 02:32:02 +0200 Subject: [PATCH 591/934] netfilter: nf_tables: incorrect error path handling with NFT_MSG_NEWRULE In case of error when adding a new rule that refers to an anonymous set, deactivate expressions via NFT_TRANS_PREPARE state, not NFT_TRANS_RELEASE. Thus, the lookup expression marks anonymous sets as inactive in the next generation to ensure it is not reachable in this transaction anymore and decrement the set refcount as introduced by c1592a89942e ("netfilter: nf_tables: deactivate anonymous set from preparation phase"). The abort step takes care of undoing the anonymous set. This is also consistent with rule deletion, where NFT_TRANS_PREPARE is used. Note that this error path is exercised in the preparation step of the commit protocol. This patch replaces nf_tables_rule_release() by the deactivate and destroy calls, this time with NFT_TRANS_PREPARE. Due to this incorrect error handling, it is possible to access a dangling pointer to the anonymous set that remains in the transaction list. [1009.379054] BUG: KASAN: use-after-free in nft_set_lookup_global+0x147/0x1a0 [nf_tables] [1009.379106] Read of size 8 at addr ffff88816c4c8020 by task nft-rule-add/137110 [1009.379116] CPU: 7 PID: 137110 Comm: nft-rule-add Not tainted 6.4.0-rc4+ #256 [1009.379128] Call Trace: [1009.379132] [1009.379135] dump_stack_lvl+0x33/0x50 [1009.379146] ? nft_set_lookup_global+0x147/0x1a0 [nf_tables] [1009.379191] print_address_description.constprop.0+0x27/0x300 [1009.379201] kasan_report+0x107/0x120 [1009.379210] ? nft_set_lookup_global+0x147/0x1a0 [nf_tables] [1009.379255] nft_set_lookup_global+0x147/0x1a0 [nf_tables] [1009.379302] nft_lookup_init+0xa5/0x270 [nf_tables] [1009.379350] nf_tables_newrule+0x698/0xe50 [nf_tables] [1009.379397] ? nf_tables_rule_release+0xe0/0xe0 [nf_tables] [1009.379441] ? kasan_unpoison+0x23/0x50 [1009.379450] nfnetlink_rcv_batch+0x97c/0xd90 [nfnetlink] [1009.379470] ? nfnetlink_rcv_msg+0x480/0x480 [nfnetlink] [1009.379485] ? __alloc_skb+0xb8/0x1e0 [1009.379493] ? __alloc_skb+0xb8/0x1e0 [1009.379502] ? entry_SYSCALL_64_after_hwframe+0x46/0xb0 [1009.379509] ? unwind_get_return_address+0x2a/0x40 [1009.379517] ? write_profile+0xc0/0xc0 [1009.379524] ? avc_lookup+0x8f/0xc0 [1009.379532] ? __rcu_read_unlock+0x43/0x60 Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3bb0800b3849..69bceefaa5c8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3844,7 +3844,8 @@ err_destroy_flow_rule: if (flow) nft_flow_rule_destroy(flow); err_release_rule: - nf_tables_rule_release(&ctx, rule); + nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE); + nf_tables_rule_destroy(&ctx, rule); err_release_expr: for (i = 0; i < n; i++) { if (expr_info[i].ops) { From 21225873be1472b7c59ed3650396af0e40578112 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 7 Jun 2023 17:10:48 +0800 Subject: [PATCH 592/934] net: enetc: correct the indexes of highest and 2nd highest TCs For ENETC hardware, the TCs are numbered from 0 to N-1, where N is the number of TCs. Numerically higher TC has higher priority. It's obvious that the highest priority TC index should be N-1 and the 2nd highest priority TC index should be N-2. However, the previous logic uses netdev_get_prio_tc_map() to get the indexes of highest priority and 2nd highest priority TCs, it does not make sense and is incorrect to give a "tc" argument to netdev_get_prio_tc_map(). So the driver may get the wrong indexes of the two highest priotiry TCs which would lead to failed to set the CBS for the two highest priotiry TCs. e.g. $ tc qdisc add dev eno0 parent root handle 100: mqprio num_tc 6 \ map 0 0 1 1 2 3 4 5 queues 1@0 1@1 1@2 1@3 2@4 2@6 hw 1 $ tc qdisc replace dev eno0 parent 100:6 cbs idleslope 100000 \ sendslope -900000 hicredit 12 locredit -113 offload 1 $ Error: Specified device failed to setup cbs hardware offload. ^^^^^ In this example, the previous logic deems the indexes of the two highest priotiry TCs should be 3 and 2. Actually, the indexes are 5 and 4, because the number of TCs is 6. So it would be failed to configure the CBS for the two highest priority TCs. Fixes: c431047c4efe ("enetc: add support Credit Based Shaper(CBS) for hardware offload") Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Reviewed-by: Maciej Fijalkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 83c27bbbc6ed..126007ab70f6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -181,8 +181,8 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) int bw_sum = 0; u8 bw; - prio_top = netdev_get_prio_tc_map(ndev, tc_nums - 1); - prio_next = netdev_get_prio_tc_map(ndev, tc_nums - 2); + prio_top = tc_nums - 1; + prio_next = tc_nums - 2; /* Support highest prio and second prio tc in cbs mode */ if (tc != prio_top && tc != prio_next) From a8e981ac2d0eb9dd53a4c173e29ca0c99c88abe2 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 23 May 2023 14:52:36 -0700 Subject: [PATCH 593/934] drm/msm/dp: enable HDP plugin/unplugged interrupts at hpd_enable/disable The internal_hpd flag is set to true by dp_bridge_hpd_enable() and set to false by dp_bridge_hpd_disable() to handle GPIO pinmuxed into DP controller case. HDP related interrupts can not be enabled until internal_hpd is set to true. At current implementation dp_display_config_hpd() will initialize DP host controller first followed by enabling HDP related interrupts if internal_hpd was true at that time. Enable HDP related interrupts depends on internal_hpd status may leave system with DP driver host is in running state but without HDP related interrupts being enabled. This will prevent external display from being detected. Eliminated this dependency by moving HDP related interrupts enable/disable be done at dp_bridge_hpd_enable/disable() directly regardless of internal_hpd status. Changes in V3: -- dp_catalog_ctrl_hpd_enable() and dp_catalog_ctrl_hpd_disable() -- rewording ocmmit text Changes in V4: -- replace dp_display_config_hpd() with dp_display_host_start() -- move enable_irq() at dp_display_host_start(); Changes in V5: -- replace dp_display_host_start() with dp_display_host_init() Changes in V6: -- squash remove enable_irq() and disable_irq() Fixes: cd198caddea7 ("drm/msm/dp: Rely on hpd_enable/disable callbacks") Signed-off-by: Kuogee Hsieh Tested-by: Leonard Lausen # on sc7180 lazor Reviewed-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Tested-by: Bjorn Andersson Reviewed-by: Abhinav Kumar Link: https://lore.kernel.org/r/1684878756-17830-1-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_catalog.c | 15 +++++- drivers/gpu/drm/msm/dp/dp_catalog.h | 3 +- drivers/gpu/drm/msm/dp/dp_display.c | 71 ++++++++--------------------- 3 files changed, 35 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index 7a8cf1c8233d..5142aeb705a4 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -620,7 +620,7 @@ void dp_catalog_hpd_config_intr(struct dp_catalog *dp_catalog, config & DP_DP_HPD_INT_MASK); } -void dp_catalog_ctrl_hpd_config(struct dp_catalog *dp_catalog) +void dp_catalog_ctrl_hpd_enable(struct dp_catalog *dp_catalog) { struct dp_catalog_private *catalog = container_of(dp_catalog, struct dp_catalog_private, dp_catalog); @@ -635,6 +635,19 @@ void dp_catalog_ctrl_hpd_config(struct dp_catalog *dp_catalog) dp_write_aux(catalog, REG_DP_DP_HPD_CTRL, DP_DP_HPD_CTRL_HPD_EN); } +void dp_catalog_ctrl_hpd_disable(struct dp_catalog *dp_catalog) +{ + struct dp_catalog_private *catalog = container_of(dp_catalog, + struct dp_catalog_private, dp_catalog); + + u32 reftimer = dp_read_aux(catalog, REG_DP_DP_HPD_REFTIMER); + + reftimer &= ~DP_DP_HPD_REFTIMER_ENABLE; + dp_write_aux(catalog, REG_DP_DP_HPD_REFTIMER, reftimer); + + dp_write_aux(catalog, REG_DP_DP_HPD_CTRL, 0); +} + static void dp_catalog_enable_sdp(struct dp_catalog_private *catalog) { /* trigger sdp */ diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.h b/drivers/gpu/drm/msm/dp/dp_catalog.h index 82376a2697ef..38786e855b51 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.h +++ b/drivers/gpu/drm/msm/dp/dp_catalog.h @@ -104,7 +104,8 @@ bool dp_catalog_ctrl_mainlink_ready(struct dp_catalog *dp_catalog); void dp_catalog_ctrl_enable_irq(struct dp_catalog *dp_catalog, bool enable); void dp_catalog_hpd_config_intr(struct dp_catalog *dp_catalog, u32 intr_mask, bool en); -void dp_catalog_ctrl_hpd_config(struct dp_catalog *dp_catalog); +void dp_catalog_ctrl_hpd_enable(struct dp_catalog *dp_catalog); +void dp_catalog_ctrl_hpd_disable(struct dp_catalog *dp_catalog); void dp_catalog_ctrl_config_psr(struct dp_catalog *dp_catalog); void dp_catalog_ctrl_set_psr(struct dp_catalog *dp_catalog, bool enter); u32 dp_catalog_link_is_connected(struct dp_catalog *dp_catalog); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 37e0d12b3319..03b0eda6df54 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -620,12 +620,6 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data) dp->hpd_state = ST_MAINLINK_READY; } - /* enable HDP irq_hpd/replug interrupt */ - if (dp->dp_display.internal_hpd) - dp_catalog_hpd_config_intr(dp->catalog, - DP_DP_IRQ_HPD_INT_MASK | DP_DP_HPD_REPLUG_INT_MASK, - true); - drm_dbg_dp(dp->drm_dev, "After, type=%d hpd_state=%d\n", dp->dp_display.connector_type, state); mutex_unlock(&dp->event_mutex); @@ -663,12 +657,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) drm_dbg_dp(dp->drm_dev, "Before, type=%d hpd_state=%d\n", dp->dp_display.connector_type, state); - /* disable irq_hpd/replug interrupts */ - if (dp->dp_display.internal_hpd) - dp_catalog_hpd_config_intr(dp->catalog, - DP_DP_IRQ_HPD_INT_MASK | DP_DP_HPD_REPLUG_INT_MASK, - false); - /* unplugged, no more irq_hpd handle */ dp_del_event(dp, EV_IRQ_HPD_INT); @@ -692,10 +680,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) return 0; } - /* disable HPD plug interrupts */ - if (dp->dp_display.internal_hpd) - dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK, false); - /* * We don't need separate work for disconnect as * connect/attention interrupts are disabled @@ -711,10 +695,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) /* signal the disconnect event early to ensure proper teardown */ dp_display_handle_plugged_change(&dp->dp_display, false); - /* enable HDP plug interrupt to prepare for next plugin */ - if (dp->dp_display.internal_hpd) - dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK, true); - drm_dbg_dp(dp->drm_dev, "After, type=%d hpd_state=%d\n", dp->dp_display.connector_type, state); @@ -1087,26 +1067,6 @@ void msm_dp_snapshot(struct msm_disp_state *disp_state, struct msm_dp *dp) mutex_unlock(&dp_display->event_mutex); } -static void dp_display_config_hpd(struct dp_display_private *dp) -{ - - dp_display_host_init(dp); - dp_catalog_ctrl_hpd_config(dp->catalog); - - /* Enable plug and unplug interrupts only if requested */ - if (dp->dp_display.internal_hpd) - dp_catalog_hpd_config_intr(dp->catalog, - DP_DP_HPD_PLUG_INT_MASK | - DP_DP_HPD_UNPLUG_INT_MASK, - true); - - /* Enable interrupt first time - * we are leaving dp clocks on during disconnect - * and never disable interrupt - */ - enable_irq(dp->irq); -} - void dp_display_set_psr(struct msm_dp *dp_display, bool enter) { struct dp_display_private *dp; @@ -1181,7 +1141,7 @@ static int hpd_event_thread(void *data) switch (todo->event_id) { case EV_HPD_INIT_SETUP: - dp_display_config_hpd(dp_priv); + dp_display_host_init(dp_priv); break; case EV_HPD_PLUG_INT: dp_hpd_plug_handle(dp_priv, todo->data); @@ -1287,7 +1247,6 @@ int dp_display_request_irq(struct msm_dp *dp_display) dp->irq, rc); return rc; } - disable_irq(dp->irq); return 0; } @@ -1399,13 +1358,8 @@ static int dp_pm_resume(struct device *dev) /* turn on dp ctrl/phy */ dp_display_host_init(dp); - dp_catalog_ctrl_hpd_config(dp->catalog); - - if (dp->dp_display.internal_hpd) - dp_catalog_hpd_config_intr(dp->catalog, - DP_DP_HPD_PLUG_INT_MASK | - DP_DP_HPD_UNPLUG_INT_MASK, - true); + if (dp_display->is_edp) + dp_catalog_ctrl_hpd_enable(dp->catalog); if (dp_catalog_link_is_connected(dp->catalog)) { /* @@ -1573,9 +1527,8 @@ static int dp_display_get_next_bridge(struct msm_dp *dp) if (aux_bus && dp->is_edp) { dp_display_host_init(dp_priv); - dp_catalog_ctrl_hpd_config(dp_priv->catalog); + dp_catalog_ctrl_hpd_enable(dp_priv->catalog); dp_display_host_phy_init(dp_priv); - enable_irq(dp_priv->irq); /* * The code below assumes that the panel will finish probing @@ -1617,7 +1570,6 @@ static int dp_display_get_next_bridge(struct msm_dp *dp) error: if (dp->is_edp) { - disable_irq(dp_priv->irq); dp_display_host_phy_exit(dp_priv); dp_display_host_deinit(dp_priv); } @@ -1806,16 +1758,31 @@ void dp_bridge_hpd_enable(struct drm_bridge *bridge) { struct msm_dp_bridge *dp_bridge = to_dp_bridge(bridge); struct msm_dp *dp_display = dp_bridge->dp_display; + struct dp_display_private *dp = container_of(dp_display, struct dp_display_private, dp_display); + + mutex_lock(&dp->event_mutex); + dp_catalog_ctrl_hpd_enable(dp->catalog); + + /* enable HDP interrupts */ + dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, true); dp_display->internal_hpd = true; + mutex_unlock(&dp->event_mutex); } void dp_bridge_hpd_disable(struct drm_bridge *bridge) { struct msm_dp_bridge *dp_bridge = to_dp_bridge(bridge); struct msm_dp *dp_display = dp_bridge->dp_display; + struct dp_display_private *dp = container_of(dp_display, struct dp_display_private, dp_display); + + mutex_lock(&dp->event_mutex); + /* disable HDP interrupts */ + dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false); + dp_catalog_ctrl_hpd_disable(dp->catalog); dp_display->internal_hpd = false; + mutex_unlock(&dp->event_mutex); } void dp_bridge_hpd_notify(struct drm_bridge *bridge, From 12abd735f0300600bfc01b2a3832b966312df205 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 10 Apr 2023 19:59:08 +0300 Subject: [PATCH 594/934] drm/msm/a6xx: initialize GMU mutex earlier Move GMU mutex initialization earlier to make sure that it is always initialized. a6xx_destroy can be called from ther failure path before GMU initialization. This fixes the following backtrace: ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 58 at kernel/locking/mutex.c:582 __mutex_lock+0x1ec/0x3d0 Modules linked in: CPU: 0 PID: 58 Comm: kworker/u16:1 Not tainted 6.3.0-rc5-00155-g187c06436519 #565 Hardware name: Qualcomm Technologies, Inc. SM8350 HDK (DT) Workqueue: events_unbound deferred_probe_work_func pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __mutex_lock+0x1ec/0x3d0 lr : __mutex_lock+0x1ec/0x3d0 sp : ffff800008993620 x29: ffff800008993620 x28: 0000000000000002 x27: ffff47b253c52800 x26: 0000000001000606 x25: ffff47b240bb2810 x24: fffffffffffffff4 x23: 0000000000000000 x22: ffffc38bba15ac14 x21: 0000000000000002 x20: ffff800008993690 x19: ffff47b2430cc668 x18: fffffffffffe98f0 x17: 6f74616c75676572 x16: 20796d6d75642067 x15: 0000000000000038 x14: 0000000000000000 x13: ffffc38bbba050b8 x12: 0000000000000666 x11: 0000000000000222 x10: ffffc38bbba603e8 x9 : ffffc38bbba050b8 x8 : 00000000ffffefff x7 : ffffc38bbba5d0b8 x6 : 0000000000000222 x5 : 000000000000bff4 x4 : 40000000fffff222 x3 : 0000000000000000 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff47b240cb1880 Call trace: __mutex_lock+0x1ec/0x3d0 mutex_lock_nested+0x2c/0x38 a6xx_destroy+0xa0/0x138 a6xx_gpu_init+0x41c/0x618 adreno_bind+0x188/0x290 component_bind_all+0x118/0x248 msm_drm_bind+0x1c0/0x670 try_to_bring_up_aggregate_device+0x164/0x1d0 __component_add+0xa8/0x16c component_add+0x14/0x20 dsi_dev_attach+0x20/0x2c dsi_host_attach+0x9c/0x144 devm_mipi_dsi_attach+0x34/0xac lt9611uxc_attach_dsi.isra.0+0x84/0xfc lt9611uxc_probe+0x5b8/0x67c i2c_device_probe+0x1ac/0x358 really_probe+0x148/0x2ac __driver_probe_device+0x78/0xe0 driver_probe_device+0x3c/0x160 __device_attach_driver+0xb8/0x138 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x8c/0xc8 process_one_work+0x2bc/0x594 worker_thread+0x228/0x438 kthread+0x108/0x10c ret_from_fork+0x10/0x20 irq event stamp: 299345 hardirqs last enabled at (299345): [] put_cpu_partial+0x1c8/0x22c hardirqs last disabled at (299344): [] put_cpu_partial+0x1c0/0x22c softirqs last enabled at (296752): [] _stext+0x434/0x4e8 softirqs last disabled at (296741): [] ____do_softirq+0x10/0x1c ---[ end trace 0000000000000000 ]--- Fixes: 4cd15a3e8b36 ("drm/msm/a6xx: Make GPU destroy a bit safer") Cc: Douglas Anderson Signed-off-by: Dmitry Baryshkov Reviewed-by: Douglas Anderson Patchwork: https://patchwork.freedesktop.org/patch/531540/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 2 -- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index e16b4b3f8535..8914992378f2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -1526,8 +1526,6 @@ int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node) if (!pdev) return -ENODEV; - mutex_init(&gmu->lock); - gmu->dev = &pdev->dev; of_dma_configure(gmu->dev, node, true); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 9fb214f150dd..52da3795b175 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1981,6 +1981,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev) adreno_gpu = &a6xx_gpu->base; gpu = &adreno_gpu->base; + mutex_init(&a6xx_gpu->gmu.lock); + adreno_gpu->registers = NULL; /* From 91ffd1bae1dafbb9e34b46813f5b058581d9144d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 7 Jun 2023 18:05:02 +0200 Subject: [PATCH 595/934] ping6: Fix send to link-local addresses with VRF. Ping sockets can't send packets when they're bound to a VRF master device and the output interface is set to a slave device. For example, when net.ipv4.ping_group_range is properly set, so that ping6 can use ping sockets, the following kind of commands fails: $ ip vrf exec red ping6 fe80::854:e7ff:fe88:4bf1%eth1 What happens is that sk->sk_bound_dev_if is set to the VRF master device, but 'oif' is set to the real output device. Since both are set but different, ping_v6_sendmsg() sees their value as inconsistent and fails. Fix this by allowing 'oif' to be a slave device of ->sk_bound_dev_if. This fixes the following kselftest failure: $ ./fcnal-test.sh -t ipv6_ping [...] TEST: ping out, vrf device+address bind - ns-B IPv6 LLA [FAIL] Reported-by: Mirsad Todorovac Closes: https://lore.kernel.org/netdev/b6191f90-ffca-dbca-7d06-88a9788def9c@alu.unizg.hr/ Tested-by: Mirsad Todorovac Fixes: 5e457896986e ("net: ipv6: Fix ping to link-local addresses.") Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/6c8b53108816a8d0d5705ae37bdc5a8322b5e3d9.1686153846.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv6/ping.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index c4835dbdfcff..f804c11e2146 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -114,7 +114,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) addr_type = ipv6_addr_type(daddr); if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) || (addr_type & IPV6_ADDR_MAPPED) || - (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if)) + (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if && + l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if)) return -EINVAL; ipcm6_init_sk(&ipc6, np); From 6292d7436cf2f0a2ea8800a1d2cbb155d237818a Mon Sep 17 00:00:00 2001 From: Yuezhen Luan Date: Wed, 7 Jun 2023 09:41:16 -0700 Subject: [PATCH 596/934] igb: Fix extts capture value format for 82580/i354/i350 82580/i354/i350 features circle-counter-like timestamp registers that are different with newer i210. The EXTTS capture value in AUXTSMPx should be converted from raw circle counter value to timestamp value in resolution of 1 nanosec by the driver. This issue can be reproduced on i350 nics, connecting an 1PPS signal to a SDP pin, and run 'ts2phc' command to read external 1PPS timestamp value. On i210 this works fine, but on i350 the extts is not correctly converted. The i350/i354/82580's SYSTIM and other timestamp registers are 40bit counters, presenting time range of 2^40 ns, that means these registers overflows every about 1099s. This causes all these regs can't be used directly in contrast to the newer i210/i211s. The igb driver needs to convert these raw register values to valid time stamp format by using kernel timecounter apis for i350s families. Here the igb_extts() just forgot to do the convert. Fixes: 38970eac41db ("igb: support EXTTS on 82580/i354/i350") Signed-off-by: Yuezhen Luan Reviewed-by: Jacob Keller Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230607164116.3768175-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 58872a4c2540..bb3db387d49c 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6947,6 +6947,7 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) struct e1000_hw *hw = &adapter->hw; struct ptp_clock_event event; struct timespec64 ts; + unsigned long flags; if (pin < 0 || pin >= IGB_N_SDP) return; @@ -6954,9 +6955,12 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) if (hw->mac.type == e1000_82580 || hw->mac.type == e1000_i354 || hw->mac.type == e1000_i350) { - s64 ns = rd32(auxstmpl); + u64 ns = rd32(auxstmpl); - ns += ((s64)(rd32(auxstmph) & 0xFF)) << 32; + ns += ((u64)(rd32(auxstmph) & 0xFF)) << 32; + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_cyc2time(&adapter->tc, ns); + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); ts = ns_to_timespec64(ns); } else { ts.tv_nsec = rd32(auxstmpl); From 863199199713908afaa47ba09332b87621c12496 Mon Sep 17 00:00:00 2001 From: Wes Huang Date: Thu, 8 Jun 2023 11:01:42 +0800 Subject: [PATCH 597/934] net: usb: qmi_wwan: add support for Compal RXM-G1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for Compal RXM-G1 which is based on Qualcomm SDX55 chip. This patch adds support for two compositions: 0x9091: DIAG + MODEM + QMI_RMNET + ADB 0x90db: DIAG + DUN + RMNET + DPL + QDSS(Trace) + ADB T: Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=05c6 ProdID=9091 Rev= 4.14 S: Manufacturer=QCOM S: Product=SDXPRAIRIE-MTP _SN:719AB680 S: SerialNumber=719ab680 C:* #Ifs= 4 Cfg#= 1 Atr=80 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=84(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms T: Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=05c6 ProdID=90db Rev= 4.14 S: Manufacturer=QCOM S: Product=SDXPRAIRIE-MTP _SN:719AB680 S: SerialNumber=719ab680 C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=84(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=8f(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=85(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms Cc: stable@vger.kernel.org Signed-off-by: Wes Huang Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20230608030141.3546-1-wes.huang@moxa.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index f1865d047971..2e7c7b0cdc54 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1220,7 +1220,9 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9080, 8)}, {QMI_FIXED_INTF(0x05c6, 0x9083, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, + {QMI_QUIRK_SET_DTR(0x05c6, 0x9091, 2)}, /* Compal RXM-G1 */ {QMI_FIXED_INTF(0x05c6, 0x90b2, 3)}, /* ublox R410M */ + {QMI_QUIRK_SET_DTR(0x05c6, 0x90db, 2)}, /* Compal RXM-G1 */ {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ From 1166a530a84758bb9e6b448fc8c195ed413f5ded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Mon, 5 Jun 2023 04:06:54 -0700 Subject: [PATCH 598/934] xfrm: fix inbound ipv4/udp/esp packets to UDPv6 dualstack sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before Linux v5.8 an AF_INET6 SOCK_DGRAM (udp/udplite) socket with SOL_UDP, UDP_ENCAP, UDP_ENCAP_ESPINUDP{,_NON_IKE} enabled would just unconditionally use xfrm4_udp_encap_rcv(), afterwards such a socket would use the newly added xfrm6_udp_encap_rcv() which only handles IPv6 packets. Cc: Sabrina Dubroca Cc: Steffen Klassert Cc: Jakub Kicinski Cc: Benedict Wong Cc: Yan Yan Fixes: 0146dca70b87 ("xfrm: add support for UDPv6 encapsulation of ESP") Signed-off-by: Maciej Żenczykowski Reviewed-by: Simon Horman Reviewed-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_input.c | 1 + net/ipv6/xfrm6_input.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index ad2afeef4f10..eac206a290d0 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -164,6 +164,7 @@ drop: kfree_skb(skb); return 0; } +EXPORT_SYMBOL(xfrm4_udp_encap_rcv); int xfrm4_rcv(struct sk_buff *skb) { diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 04cbeefd8982..4907ab241d6b 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -86,6 +86,9 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) __be32 *udpdata32; __u16 encap_type = up->encap_type; + if (skb->protocol == htons(ETH_P_IP)) + return xfrm4_udp_encap_rcv(sk, skb); + /* if this is not encapsulated socket, then just return now */ if (!encap_type) return 1; From dfaed3e1fa7099de8de4e89cbe7eb9c1bca27dfe Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 7 Jun 2023 10:56:00 +1000 Subject: [PATCH 599/934] powerpc/64s/radix: Fix exit lazy tlb mm switch with irqs enabled Switching mm and tinkering with current->active_mm should be done with irqs disabled. There is a path where exit_lazy_flush_tlb can be called with irqs enabled: exit_lazy_flush_tlb flush_type_needed __flush_all_mm tlb_finish_mmu exit_mmap Which results in the switching being done with irqs enabled, which is incorrect. Fixes: a665eec0a22e ("powerpc/64s/radix: Fix mm_cpumask trimming race vs kthread_use_mm") Cc: stable@vger.kernel.org # v5.10+ Reported-by: Sachin Sant Link: https://lore.kernel.org/linuxppc-dev/A9A5D83D-BA70-47A4-BCB4-30C1AE19BC22@linux.ibm.com/ Tested-by: Sachin Sant Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20230607005601.583293-1-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index ce804b7bf84e..0bd4866d9824 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -795,12 +795,20 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) goto out; if (current->active_mm == mm) { + unsigned long flags; + WARN_ON_ONCE(current->mm != NULL); - /* Is a kernel thread and is using mm as the lazy tlb */ + /* + * It is a kernel thread and is using mm as the lazy tlb, so + * switch it to init_mm. This is not always called from IPI + * (e.g., flush_type_needed), so must disable irqs. + */ + local_irq_save(flags); mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; switch_mm_irqs_off(mm, &init_mm, current); mmdrop_lazy_tlb(mm); + local_irq_restore(flags); } /* From 6c02568fd1ae53099b4ab86365c5be1ff15f586b Mon Sep 17 00:00:00 2001 From: Max Tottenham Date: Wed, 7 Jun 2023 12:23:54 -0400 Subject: [PATCH 600/934] net/sched: act_pedit: Parse L3 Header for L4 offset Instead of relying on skb->transport_header being set correctly, opt instead to parse the L3 header length out of the L3 headers for both IPv4/IPv6 when the Extended Layer Op for tcp/udp is used. This fixes a bug if GRO is disabled, when GRO is disabled skb->transport_header is set by __netif_receive_skb_core() to point to the L3 header, it's later fixed by the upper protocol layers, but act_pedit will receive the SKB before the fixups are completed. The existing behavior causes the following to edit the L3 header if GRO is disabled instead of the UDP header: tc filter add dev eth0 ingress protocol ip flower ip_proto udp \ dst_ip 192.168.1.3 action pedit ex munge udp set dport 18053 Also re-introduce a rate-limited warning if we were unable to extract the header offset when using the 'ex' interface. Fixes: 71d0ed7079df ("net/act_pedit: Support using offset relative to the conventional network headers") Signed-off-by: Max Tottenham Reviewed-by: Josh Hunt Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202305261541.N165u9TZ-lkp@intel.com/ Reviewed-by: Pedro Tammela Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 48 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index fc945c7e4123..c819b812a899 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -13,7 +13,10 @@ #include #include #include +#include +#include #include +#include #include #include #include @@ -327,28 +330,58 @@ static bool offset_valid(struct sk_buff *skb, int offset) return true; } -static void pedit_skb_hdr_offset(struct sk_buff *skb, +static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type) +{ + const int noff = skb_network_offset(skb); + int ret = -EINVAL; + struct iphdr _iph; + + switch (skb->protocol) { + case htons(ETH_P_IP): { + const struct iphdr *iph = skb_header_pointer(skb, noff, sizeof(_iph), &_iph); + + if (!iph) + goto out; + *hoffset = noff + iph->ihl * 4; + ret = 0; + break; + } + case htons(ETH_P_IPV6): + ret = ipv6_find_hdr(skb, hoffset, header_type, NULL, NULL) == header_type ? 0 : -EINVAL; + break; + } +out: + return ret; +} + +static int pedit_skb_hdr_offset(struct sk_buff *skb, enum pedit_header_type htype, int *hoffset) { + int ret = -EINVAL; /* 'htype' is validated in the netlink parsing */ switch (htype) { case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: - if (skb_mac_header_was_set(skb)) + if (skb_mac_header_was_set(skb)) { *hoffset = skb_mac_offset(skb); + ret = 0; + } break; case TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK: case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: *hoffset = skb_network_offset(skb); + ret = 0; break; case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_TCP); + break; case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: - if (skb_transport_header_was_set(skb)) - *hoffset = skb_transport_offset(skb); + ret = pedit_l4_skb_offset(skb, hoffset, IPPROTO_UDP); break; default: break; } + return ret; } TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, @@ -384,6 +417,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, int hoffset = 0; u32 *ptr, hdata; u32 val; + int rc; if (tkey_ex) { htype = tkey_ex->htype; @@ -392,7 +426,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, tkey_ex++; } - pedit_skb_hdr_offset(skb, htype, &hoffset); + rc = pedit_skb_hdr_offset(skb, htype, &hoffset); + if (rc) { + pr_info_ratelimited("tc action pedit unable to extract header offset for header type (0x%x)\n", htype); + goto bad; + } if (tkey->offmask) { u8 *d, _d; From 0ad4982c520ed87ea7ebfc9381ea1f617ed75364 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 8 Jun 2023 10:57:27 +0900 Subject: [PATCH 601/934] net: renesas: rswitch: Fix timestamp feature after all descriptors are used The timestamp descriptors were intended to act cyclically. Descriptors from index 0 through gq->ring_size - 1 contain actual information, and the last index (gq->ring_size) should have LINKFIX to indicate the first index 0 descriptor. However, the LINKFIX value is missing, causing the timestamp feature to stop after all descriptors are used. To resolve this issue, set the LINKFIX to the timestamp descritors. Reported-by: Phong Hoang Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 36 ++++++++++++++++---------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index aace87139cea..fa6d6202b129 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -347,17 +347,6 @@ out: return -ENOMEM; } -static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv) -{ - struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue; - - gq->ring_size = TS_RING_SIZE; - gq->ts_ring = dma_alloc_coherent(&priv->pdev->dev, - sizeof(struct rswitch_ts_desc) * - (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL); - return !gq->ts_ring ? -ENOMEM : 0; -} - static void rswitch_desc_set_dptr(struct rswitch_desc *desc, dma_addr_t addr) { desc->dptrl = cpu_to_le32(lower_32_bits(addr)); @@ -533,6 +522,28 @@ static void rswitch_gwca_linkfix_free(struct rswitch_private *priv) gwca->linkfix_table = NULL; } +static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv) +{ + struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue; + struct rswitch_ts_desc *desc; + + gq->ring_size = TS_RING_SIZE; + gq->ts_ring = dma_alloc_coherent(&priv->pdev->dev, + sizeof(struct rswitch_ts_desc) * + (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL); + + if (!gq->ts_ring) + return -ENOMEM; + + rswitch_gwca_ts_queue_fill(priv, 0, TS_RING_SIZE); + desc = &gq->ts_ring[gq->ring_size]; + desc->desc.die_dt = DT_LINKFIX; + rswitch_desc_set_dptr(&desc->desc, gq->ring_dma); + INIT_LIST_HEAD(&priv->gwca.ts_info_list); + + return 0; +} + static struct rswitch_gwca_queue *rswitch_gwca_get(struct rswitch_private *priv) { struct rswitch_gwca_queue *gq; @@ -1780,9 +1791,6 @@ static int rswitch_init(struct rswitch_private *priv) if (err < 0) goto err_ts_queue_alloc; - rswitch_gwca_ts_queue_fill(priv, 0, TS_RING_SIZE); - INIT_LIST_HEAD(&priv->gwca.ts_info_list); - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { err = rswitch_device_alloc(priv, i); if (err < 0) { From c0e489372a294044feea650b38f38c888eff57a4 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Thu, 8 Jun 2023 10:46:25 +0530 Subject: [PATCH 602/934] octeontx2-af: Fix promiscuous mode CN10KB silicon introduced a new exact match feature, which is used for DMAC filtering. The state of installed DMAC filters in this exact match table is getting corrupted when promiscuous mode is toggled. Fix this by not touching Exact match related config when promiscuous mode is toggled. Fixes: 2dba9459d2c9 ("octeontx2-af: Wrapper functions for MAC addr add/del/update/reset") Signed-off-by: Ratheesh Kannoth Signed-off-by: David S. Miller --- .../marvell/octeontx2/af/rvu_npc_hash.c | 29 ++----------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 51209119f0f2..9f11c1e40737 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -1164,10 +1164,8 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i { struct npc_exact_table *table; u16 *cnt, old_cnt; - bool promisc; table = rvu->hw->table; - promisc = table->promisc_mode[drop_mcam_idx]; cnt = &table->cnt_cmd_rules[drop_mcam_idx]; old_cnt = *cnt; @@ -1179,16 +1177,13 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i *enable_or_disable_cam = false; - if (promisc) - goto done; - - /* If all rules are deleted and not already in promisc mode; disable cam */ + /* If all rules are deleted, disable cam */ if (!*cnt && val < 0) { *enable_or_disable_cam = true; goto done; } - /* If rule got added and not already in promisc mode; enable cam */ + /* If rule got added, enable cam */ if (!old_cnt && val > 0) { *enable_or_disable_cam = true; goto done; @@ -1443,7 +1438,6 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc) u32 drop_mcam_idx; bool *promisc; bool rc; - u32 cnt; table = rvu->hw->table; @@ -1466,17 +1460,8 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc) return LMAC_AF_ERR_INVALID_PARAM; } *promisc = false; - cnt = __rvu_npc_exact_cmd_rules_cnt_update(rvu, drop_mcam_idx, 0, NULL); mutex_unlock(&table->lock); - /* If no dmac filter entries configured, disable drop rule */ - if (!cnt) - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, false); - else - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, !*promisc); - - dev_dbg(rvu->dev, "%s: disabled promisc mode (cgx=%d lmac=%d, cnt=%d)\n", - __func__, cgx_id, lmac_id, cnt); return 0; } @@ -1494,7 +1479,6 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc) u32 drop_mcam_idx; bool *promisc; bool rc; - u32 cnt; table = rvu->hw->table; @@ -1517,17 +1501,8 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc) return LMAC_AF_ERR_INVALID_PARAM; } *promisc = true; - cnt = __rvu_npc_exact_cmd_rules_cnt_update(rvu, drop_mcam_idx, 0, NULL); mutex_unlock(&table->lock); - /* If no dmac filter entries configured, disable drop rule */ - if (!cnt) - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, false); - else - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, !*promisc); - - dev_dbg(rvu->dev, "%s: Enabled promisc mode (cgx=%d lmac=%d cnt=%d)\n", - __func__, cgx_id, lmac_id, cnt); return 0; } From be3618d9651002cd5ff190dbfc6cf78f03e34e27 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 8 Jun 2023 14:27:56 +0800 Subject: [PATCH 603/934] net/sched: taprio: fix slab-out-of-bounds Read in taprio_dequeue_from_txq As shown in [1], out-of-bounds access occurs in two cases: 1)when the qdisc of the taprio type is used to replace the previously configured taprio, count and offset in tc_to_txq can be set to 0. In this case, the value of *txq in taprio_next_tc_txq() will increases continuously. When the number of accessed queues exceeds the number of queues on the device, out-of-bounds access occurs. 2)When packets are dequeued, taprio can be deleted. In this case, the tc rule of dev is cleared. The count and offset values are also set to 0. In this case, out-of-bounds access is also caused. Now the restriction on the queue number is added. [1] https://groups.google.com/g/syzkaller-bugs/c/_lYOKgkBVMg Fixes: 2f530df76c8c ("net/sched: taprio: give higher priority to higher TCs in software dequeue mode") Reported-by: syzbot+04afcb3d2c840447559a@syzkaller.appspotmail.com Signed-off-by: Zhengchao Shao Tested-by: Pedro Tammela Signed-off-by: David S. Miller --- net/sched/sch_taprio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index dd7dea2f6e83..cf0e61ed9225 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -797,6 +797,9 @@ static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch, taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]); + if (q->cur_txq[tc] >= dev->num_tx_queues) + q->cur_txq[tc] = first_txq; + if (skb) return skb; } while (q->cur_txq[tc] != first_txq); From 04c55383fa5689357bcdd2c8036725a55ed632bc Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Jun 2023 08:29:03 +0100 Subject: [PATCH 604/934] net/sched: cls_u32: Fix reference counter leak leading to overflow In the event of a failure in tcf_change_indev(), u32_set_parms() will immediately return without decrementing the recently incremented reference counter. If this happens enough times, the counter will rollover and the reference freed, leading to a double free which can be used to do 'bad things'. In order to prevent this, move the point of possible failure above the point where the reference counter is incremented. Also save any meaningful return values to be applied to the return data at the appropriate point in time. This issue was caught with KASAN. Fixes: 705c7091262d ("net: sched: cls_u32: no need to call tcf_exts_change for newly allocated struct") Suggested-by: Eric Dumazet Signed-off-by: Lee Jones Reviewed-by: Eric Dumazet Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4e2e269f121f..d15d50de7980 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -718,13 +718,19 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr *est, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { - int err; + int err, ifindex = -1; err = tcf_exts_validate_ex(net, tp, tb, est, &n->exts, flags, fl_flags, extack); if (err < 0) return err; + if (tb[TCA_U32_INDEV]) { + ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); + if (ifindex < 0) + return -EINVAL; + } + if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; @@ -759,13 +765,9 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, tcf_bind_filter(tp, &n->res, base); } - if (tb[TCA_U32_INDEV]) { - int ret; - ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack); - if (ret < 0) - return -EINVAL; - n->ifindex = ret; - } + if (ifindex >= 0) + n->ifindex = ifindex; + return 0; } From 996c3117dae4c02b38a3cb68e5c2aec9d907ec15 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Jun 2023 13:48:44 +0300 Subject: [PATCH 605/934] wifi: cfg80211: fix double lock bug in reg_wdev_chan_valid() The locking was changed recently so now the caller holds the wiphy_lock() lock. Taking the lock inside the reg_wdev_chan_valid() function will lead to a deadlock. Fixes: f7e60032c661 ("wifi: cfg80211: fix locking in regulatory disconnect") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/40c4114a-6cb4-4abf-b013-300b598aba65@moroto.mountain Signed-off-by: Johannes Berg --- net/wireless/reg.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 949e1fb3bec6..26f11e4746c0 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2404,11 +2404,8 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: - wiphy_lock(wiphy); ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); - wiphy_unlock(wiphy); - if (!ret) return ret; break; From 01605ad6c3e8608d7e147c9b75d67eb8a3d27d88 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Jun 2023 16:35:59 +0300 Subject: [PATCH 606/934] wifi: mac80211: fix link activation settings order In the normal MLME code we always call ieee80211_mgd_set_link_qos_params() before ieee80211_link_info_change_notify() and some drivers, notably iwlwifi, rely on that as they don't do anything (but store the data) in their conf_tx. Fix the order here to be the same as in the normal code paths, so this isn't broken. Fixes: 3d9011029227 ("wifi: mac80211: implement link switching") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230608163202.a2a86bba2f80.Iac97e04827966d22161e63bb6e201b4061e9651b@changeid Signed-off-by: Johannes Berg --- net/mac80211/link.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/link.c b/net/mac80211/link.c index e82db88a47f8..40f030b8ece9 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -2,7 +2,7 @@ /* * MLO link handling * - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include #include @@ -409,6 +409,7 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, IEEE80211_CHANCTX_SHARED); WARN_ON_ONCE(ret); + ieee80211_mgd_set_link_qos_params(link); ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE | @@ -423,7 +424,6 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_TWT | BSS_CHANGED_HE_OBSS_PD | BSS_CHANGED_HE_BSS_COLOR); - ieee80211_mgd_set_link_qos_params(link); } old_active = sdata->vif.active_links; From 1ff56684fa8682bdfbbce4e12cf67ab23cb1db05 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Thu, 8 Jun 2023 16:36:01 +0300 Subject: [PATCH 607/934] wifi: cfg80211: fix link del callback to call correct handler The wrapper function was incorrectly calling the add handler instead of the del handler. This had no negative side effect as the default handlers are essentially identical. Fixes: f2a0290b2df2 ("wifi: cfg80211: add optional link add/remove callbacks") Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230608163202.ebd00e000459.Iaff7dc8d1cdecf77f53ea47a0e5080caa36ea02a@changeid Signed-off-by: Johannes Berg --- net/wireless/rdev-ops.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 2e497cf26ef2..69b508743e57 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018, 2021-2022 Intel Corporation + * Copyright (C) 2018, 2021-2023 Intel Corporation */ #ifndef __CFG80211_RDEV_OPS #define __CFG80211_RDEV_OPS @@ -1441,8 +1441,8 @@ rdev_del_intf_link(struct cfg80211_registered_device *rdev, unsigned int link_id) { trace_rdev_del_intf_link(&rdev->wiphy, wdev, link_id); - if (rdev->ops->add_intf_link) - rdev->ops->add_intf_link(&rdev->wiphy, wdev, link_id); + if (rdev->ops->del_intf_link) + rdev->ops->del_intf_link(&rdev->wiphy, wdev, link_id); trace_rdev_return_void(&rdev->wiphy); } From 15846f95ab01b71fdb1cef8df73680aad41edf70 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Thu, 8 Jun 2023 16:36:02 +0300 Subject: [PATCH 608/934] wifi: mac80211: take lock before setting vif links ieee80211_vif_set_links requires the sdata->local->mtx lock to be held. Add the appropriate locking around the calls in both the link add and remove handlers. This causes a warning when e.g. ieee80211_link_release_channel is called via ieee80211_link_stop from ieee80211_vif_update_links. Fixes: 0d8c4a3c8688 ("wifi: mac80211: implement add/del interface link callbacks") Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230608163202.fa0c6597fdad.I83dd70359f6cda30f86df8418d929c2064cf4995@changeid Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 86b2036d73ff..f2d08dbccfb7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -4865,11 +4865,16 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int res; if (wdev->use_4addr) return -EOPNOTSUPP; - return ieee80211_vif_set_links(sdata, wdev->valid_links); + mutex_lock(&sdata->local->mtx); + res = ieee80211_vif_set_links(sdata, wdev->valid_links); + mutex_unlock(&sdata->local->mtx); + + return res; } static void ieee80211_del_intf_link(struct wiphy *wiphy, @@ -4878,7 +4883,9 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + mutex_lock(&sdata->local->mtx); ieee80211_vif_set_links(sdata, wdev->valid_links); + mutex_unlock(&sdata->local->mtx); } static int sta_add_link_station(struct ieee80211_local *local, From 34d4e3eb67fed9c19719bedb748e5a8b6ccc97a5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Jun 2023 16:36:04 +0300 Subject: [PATCH 609/934] wifi: cfg80211: remove links only on AP Since links are only controlled by userspace via cfg80211 in AP mode, also only remove them from the driver in that case. Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230608163202.ed65b94916fa.I2458c46888284cc5ce30715fe642bc5fc4340c8f@changeid Signed-off-by: Johannes Berg --- net/wireless/util.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 3bc0c3072e78..9755ef281040 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ #include #include @@ -2558,6 +2558,13 @@ void cfg80211_remove_links(struct wireless_dev *wdev) { unsigned int link_id; + /* + * links are controlled by upper layers (userspace/cfg) + * only for AP mode, so only remove them here for AP + */ + if (wdev->iftype != NL80211_IFTYPE_AP) + return; + wdev_lock(wdev); if (wdev->valid_links) { for_each_valid_link(wdev, link_id) From 7b3b9ac899b54f53f7c9fc07e1c562f56b2187fa Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Thu, 8 Jun 2023 16:36:07 +0300 Subject: [PATCH 610/934] wifi: mac80211: Use active_links instead of valid_links in Tx Fix few places on the Tx path where the valid_links were used instead of active links. Signed-off-by: Ilan Peer Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230608163202.e24832691fc8.I9ac10dc246d7798a8d26b1a94933df5668df63fc@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7f1c7f67014b..13b522dab0a3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4445,7 +4445,7 @@ static void ieee80211_mlo_multicast_tx(struct net_device *dev, struct sk_buff *skb) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - unsigned long links = sdata->vif.valid_links; + unsigned long links = sdata->vif.active_links; unsigned int link; u32 ctrl_flags = IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX; @@ -6040,7 +6040,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); if (WARN_ON_ONCE(link == ARRAY_SIZE(sdata->vif.link_conf))) - link = ffs(sdata->vif.valid_links) - 1; + link = ffs(sdata->vif.active_links) - 1; } IEEE80211_SKB_CB(skb)->control.flags |= @@ -6076,7 +6076,7 @@ void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata, band = chanctx_conf->def.chan->band; } else { WARN_ON(link_id >= 0 && - !(sdata->vif.valid_links & BIT(link_id))); + !(sdata->vif.active_links & BIT(link_id))); /* MLD transmissions must not rely on the band */ band = 0; } From 55d8122f5cd62d5aaa225d7167dcd14a44c850b9 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 24 Apr 2023 15:50:30 -0700 Subject: [PATCH 611/934] vhost: support PACKED when setting-getting vring_base Use the right structs for PACKED or split vqs when setting and getting the vring base. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Shannon Nelson Message-Id: <20230424225031.18947-3-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vhost.c | 18 +++++++++++++----- drivers/vhost/vhost.h | 8 ++++++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1f80eac5d6ae..60c9ebd629dd 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1600,17 +1600,25 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - if (s.num > 0xffff) { - r = -EINVAL; - break; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq->last_avail_idx = s.num & 0xffff; + vq->last_used_idx = (s.num >> 16) & 0xffff; + } else { + if (s.num > 0xffff) { + r = -EINVAL; + break; + } + vq->last_avail_idx = s.num; } - vq->last_avail_idx = s.num; /* Forget the cached index value. */ vq->avail_idx = vq->last_avail_idx; break; case VHOST_GET_VRING_BASE: s.index = idx; - s.num = vq->last_avail_idx; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) + s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16); + else + s.num = vq->last_avail_idx; if (copy_to_user(argp, &s, sizeof s)) r = -EFAULT; break; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 305ec8593d46..fc900be504b3 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -92,13 +92,17 @@ struct vhost_virtqueue { /* The routine to call when the Guest pings us, or timeout. */ vhost_work_fn_t handle_kick; - /* Last available index we saw. */ + /* Last available index we saw. + * Values are limited to 0x7fff, and the high bit is used as + * a wrap counter when using VIRTIO_F_RING_PACKED. */ u16 last_avail_idx; /* Caches available index value from user. */ u16 avail_idx; - /* Last index we used. */ + /* Last index we used. + * Values are limited to 0x7fff, and the high bit is used as + * a wrap counter when using VIRTIO_F_RING_PACKED. */ u16 last_used_idx; /* Used flags */ From beee7fdb5b56a46415a4992d28dd4c2d06eb52df Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 24 Apr 2023 15:50:31 -0700 Subject: [PATCH 612/934] vhost_vdpa: support PACKED when setting-getting vring_base Use the right structs for PACKED or split vqs when setting and getting the vring base. Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Shannon Nelson Message-Id: <20230424225031.18947-4-shannon.nelson@amd.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vdpa.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 89d5ecfe7d2e..bf77924d5b60 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -607,7 +607,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, if (r) return r; - vq->last_avail_idx = vq_state.split.avail_index; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq->last_avail_idx = vq_state.packed.last_avail_idx | + (vq_state.packed.last_avail_counter << 15); + vq->last_used_idx = vq_state.packed.last_used_idx | + (vq_state.packed.last_used_counter << 15); + } else { + vq->last_avail_idx = vq_state.split.avail_index; + } break; } @@ -625,9 +632,15 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, break; case VHOST_SET_VRING_BASE: - vq_state.split.avail_index = vq->last_avail_idx; - if (ops->set_vq_state(vdpa, idx, &vq_state)) - r = -EINVAL; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff; + vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000); + vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff; + vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000); + } else { + vq_state.split.avail_index = vq->last_avail_idx; + } + r = ops->set_vq_state(vdpa, idx, &vq_state); break; case VHOST_SET_VRING_CALL: From 07496eeab577eef1d4912b3e1b502a2b52002ac3 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 15 Feb 2023 15:33:49 -0700 Subject: [PATCH 613/934] tools/virtio: use canonical ftrace path The canonical location for the tracefs filesystem is at /sys/kernel/tracing. But, from Documentation/trace/ftrace.rst: Before 4.1, all ftrace tracing control files were within the debugfs file system, which is typically located at /sys/kernel/debug/tracing. For backward compatibility, when mounting the debugfs file system, the tracefs file system will be automatically mounted at: /sys/kernel/debug/tracing A few spots in tools/virtio still refer to this older debugfs path, so let's update them to avoid confusion. Signed-off-by: Ross Zwisler Message-Id: <20230215223350.2658616-6-zwisler@google.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Mukesh Ojha --- tools/virtio/virtio-trace/README | 2 +- tools/virtio/virtio-trace/trace-agent.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README index 4fb9368bf751..0127ff0c54b0 100644 --- a/tools/virtio/virtio-trace/README +++ b/tools/virtio/virtio-trace/README @@ -95,7 +95,7 @@ Run 1) Enable ftrace in the guest - # echo 1 > /sys/kernel/debug/tracing/events/sched/enable + # echo 1 > /sys/kernel/tracing/events/sched/enable 2) Run trace agent in the guest This agent must be operated as root. diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c index cdfe77c2b4c8..7e2d9bbf0b84 100644 --- a/tools/virtio/virtio-trace/trace-agent.c +++ b/tools/virtio/virtio-trace/trace-agent.c @@ -18,8 +18,9 @@ #define PIPE_DEF_BUFS 16 #define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS) #define PIPE_MAX_SIZE (1024*1024) -#define READ_PATH_FMT \ - "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw" +#define TRACEFS "/sys/kernel/tracing" +#define DEBUGFS "/sys/kernel/debug/tracing" +#define READ_PATH_FMT "%s/per_cpu/cpu%d/trace_pipe_raw" #define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d" #define CTL_PATH "/dev/virtio-ports/agent-ctl-path" @@ -120,9 +121,12 @@ static const char *make_path(int cpu_num, bool this_is_write_path) if (this_is_write_path) /* write(output) path */ ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num); - else + else { /* read(input) path */ - ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num); + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, TRACEFS, cpu_num); + if (ret > 0 && access(buf, F_OK) != 0) + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, DEBUGFS, cpu_num); + } if (ret <= 0) { pr_err("Failed to generate %s path(CPU#%d):%d\n", From 2a62b6210ce876c596086ab8fd4c8a0c3d10611a Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 2 Jun 2023 11:54:08 +0800 Subject: [PATCH 614/934] RDMA/rxe: Fix the use-before-initialization error of resp_pkts In the following: Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106 assign_lock_key kernel/locking/lockdep.c:982 [inline] register_lock_class+0xdb6/0x1120 kernel/locking/lockdep.c:1295 __lock_acquire+0x10a/0x5df0 kernel/locking/lockdep.c:4951 lock_acquire kernel/locking/lockdep.c:5691 [inline] lock_acquire+0x1b1/0x520 kernel/locking/lockdep.c:5656 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x3d/0x60 kernel/locking/spinlock.c:162 skb_dequeue+0x20/0x180 net/core/skbuff.c:3639 drain_resp_pkts drivers/infiniband/sw/rxe/rxe_comp.c:555 [inline] rxe_completer+0x250d/0x3cc0 drivers/infiniband/sw/rxe/rxe_comp.c:652 rxe_qp_do_cleanup+0x1be/0x820 drivers/infiniband/sw/rxe/rxe_qp.c:761 execute_in_process_context+0x3b/0x150 kernel/workqueue.c:3473 __rxe_cleanup+0x21e/0x370 drivers/infiniband/sw/rxe/rxe_pool.c:233 rxe_create_qp+0x3f6/0x5f0 drivers/infiniband/sw/rxe/rxe_verbs.c:583 This is a use-before-initialization problem. It happens because rxe_qp_do_cleanup is called during error unwind before the struct has been fully initialized. Move the initialization of the skb earlier. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20230602035408.741534-1-yanjun.zhu@intel.com Reported-by: syzbot+eba589d8f49c73d356da@syzkaller.appspotmail.com Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 61a2eb77d999..a0f206431cf8 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -176,6 +176,9 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, spin_lock_init(&qp->rq.producer_lock); spin_lock_init(&qp->rq.consumer_lock); + skb_queue_head_init(&qp->req_pkts); + skb_queue_head_init(&qp->resp_pkts); + atomic_set(&qp->ssn, 0); atomic_set(&qp->skb_out, 0); } @@ -234,8 +237,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->req.opcode = -1; qp->comp.opcode = -1; - skb_queue_head_init(&qp->req_pkts); - rxe_init_task(&qp->req.task, qp, rxe_requester); rxe_init_task(&qp->comp.task, qp, rxe_completer); @@ -279,8 +280,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, } } - skb_queue_head_init(&qp->resp_pkts); - rxe_init_task(&qp->resp.task, qp, rxe_responder); qp->resp.opcode = OPCODE_NONE; From 3e1b9b2d81901b3ceeb5ec1f1b4c41cd1cff53ba Mon Sep 17 00:00:00 2001 From: Carl Vanderlip Date: Fri, 2 Jun 2023 15:04:39 -0600 Subject: [PATCH 615/934] accel/qaic: Free user handle on interrupted mutex After user handle is allocated, if mutex is interrupted, we do not free the user handle and return an error. Kref had been initialized, but not added to users list, so device teardown would also not call free_usr. Fixes: c501ca23a6a3 ("accel/qaic: Add uapi and core driver file") Signed-off-by: Carl Vanderlip Reviewed-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Jeffrey Hugo Signed-off-by: Jeffrey Hugo Link: https://patchwork.freedesktop.org/patch/msgid/20230602210440.8411-2-quic_jhugo@quicinc.com --- drivers/accel/qaic/qaic_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index 2d0828db28d8..961cd341b414 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -97,6 +97,7 @@ static int qaic_open(struct drm_device *dev, struct drm_file *file) cleanup_usr: cleanup_srcu_struct(&usr->qddev_lock); + ida_free(&qaic_usrs, usr->handle); free_usr: kfree(usr); dev_unlock: From 61d8cdb7872c82d8a4d5e5251b0010332c316a67 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 2 Jun 2023 15:04:40 -0600 Subject: [PATCH 616/934] accel/qaic: Fix NULL pointer deref in qaic_destroy_drm_device() If qaic_destroy_drm_device() is called before the device has fully initialized it will cause a NULL pointer dereference as the drm device has not yet been created. Fix this with a NULL check. Fixes: c501ca23a6a3 ("accel/qaic: Add uapi and core driver file") Signed-off-by: Jeffrey Hugo Reviewed-by: Carl Vanderlip Reviewed-by: Pranjal Ramajor Asha Kanojiya Link: https://patchwork.freedesktop.org/patch/msgid/20230602210440.8411-3-quic_jhugo@quicinc.com --- drivers/accel/qaic/qaic_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index 961cd341b414..b5ba550a0c04 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -225,6 +225,9 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) struct qaic_user *usr; qddev = qdev->qddev; + qdev->qddev = NULL; + if (!qddev) + return; /* * Existing users get unresolvable errors till they close FDs. From ccc45cb4e7271c74dbb27776ae8f73d84557f5c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Fri, 9 Jun 2023 17:37:50 +0200 Subject: [PATCH 617/934] s390/dasd: Use correct lock while counting channel queue length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lock around counting the channel queue length in the BIODASDINFO ioctl was incorrectly changed to the dasd_block->queue_lock with commit 583d6535cb9d ("dasd: remove dead code"). This can lead to endless list iterations and a subsequent crash. The queue_lock is supposed to be used only for queue lists belonging to dasd_block. For dasd_device related queue lists the ccwdev lock must be used. Fix the mentioned issues by correctly using the ccwdev lock instead of the queue lock. Fixes: 583d6535cb9d ("dasd: remove dead code") Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20230609153750.1258763-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 9327dcdd6e5e..8fca725b3dae 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -552,10 +552,10 @@ static int __dasd_ioctl_information(struct dasd_block *block, memcpy(dasd_info->type, base->discipline->name, 4); - spin_lock_irqsave(&block->queue_lock, flags); + spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags); list_for_each(l, &base->ccw_queue) dasd_info->chanq_len++; - spin_unlock_irqrestore(&block->queue_lock, flags); + spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags); return 0; } From 39affd1fdf65983904fafc07cf607cff737eaf30 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Fri, 2 Jun 2023 11:05:02 +0900 Subject: [PATCH 618/934] of: overlay: Fix missing of_node_put() in error case of init_overlay_changeset() In init_overlay_changeset(), the variable "node" is from of_get_child_by_name(), and the "node" should be discarded in error case. Fixes: d1651b03c2df ("of: overlay: add overlay symbols to live device tree") Signed-off-by: Kunihiko Hayashi Link: https://lore.kernel.org/r/20230602020502.11693-1-hayashi.kunihiko@socionext.com Signed-off-by: Rob Herring --- drivers/of/overlay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 2e01960f1aeb..7feb643f1370 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -811,6 +811,7 @@ static int init_overlay_changeset(struct overlay_changeset *ovcs) if (!fragment->target) { pr_err("symbols in overlay, but not in live tree\n"); ret = -EINVAL; + of_node_put(node); goto err_out; } From 5b47f56b6a0fe5752f0827d26037a809ec0080bc Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 9 Jun 2023 16:11:07 +0200 Subject: [PATCH 619/934] dt-bindings: i3c: silvaco,i3c-master: fix missing schema restriction Each device schema must end with unevaluatedProperties: false, if it references other common schema. Otherwise it would allow any properties to be listed. Fixes: b8b0446f1f1a ("dt-bindings: i3c: Describe Silvaco master binding") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230609141107.66128-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml b/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml index 62f3ca66274f..32c821f97779 100644 --- a/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml +++ b/Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml @@ -44,7 +44,7 @@ required: - clock-names - clocks -additionalProperties: true +unevaluatedProperties: false examples: - | From a2a871483161014f1bcc4e9a04354b01aa77cedb Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Fri, 9 Jun 2023 17:10:58 -0300 Subject: [PATCH 620/934] ALSA: hda/realtek: Add a quirk for Compaq N14JP6 Add a quirk for Compaq N14JP6 to fixup ALC897 headset MIC no sound. Signed-off-by: Edson Juliano Drosdeck Cc: Link: https://lore.kernel.org/r/20230609201058.523499-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a5d55a7063d3..308ec7034cc9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11740,6 +11740,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1b0a, 0x01b8, "ACER Veriton", ALC662_FIXUP_ACER_VERITON), SND_PCI_QUIRK(0x1b35, 0x1234, "CZC ET26", ALC662_FIXUP_CZC_ET26), SND_PCI_QUIRK(0x1b35, 0x2206, "CZC P10T", ALC662_FIXUP_CZC_P10T), + SND_PCI_QUIRK(0x1c6c, 0x1239, "Compaq N14JP6-V2", ALC897_FIXUP_HP_HSMIC_VERB), #if 0 /* Below is a quirk table taken from the old code. From 673004821ab98c6645bd21af56a290854e88f533 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:43 +0200 Subject: [PATCH 621/934] selftests: mptcp: lib: skip if missing symbol Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. New functions are now available to easily detect if a certain feature is missing by looking at kallsyms. These new helpers are going to be used in the following commits. In order to ease the backport of such future patches, it would be good if this patch is backported up to the introduction of MPTCP selftests, hence the Fixes tag below: this type of check was supposed to be done from the beginning. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/config | 1 + .../testing/selftests/net/mptcp/mptcp_lib.sh | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 38021a0dd527..6032f9b23c4c 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,3 +1,4 @@ +CONFIG_KALLSYMS=y CONFIG_MPTCP=y CONFIG_IPV6=y CONFIG_MPTCP_IPV6=y diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 3286536b79d5..29b65f4b73b2 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -38,3 +38,41 @@ mptcp_lib_check_mptcp() { exit ${KSFT_SKIP} fi } + +mptcp_lib_check_kallsyms() { + if ! mptcp_lib_has_file "/proc/kallsyms"; then + echo "SKIP: CONFIG_KALLSYMS is missing" + exit ${KSFT_SKIP} + fi +} + +# Internal: use mptcp_lib_kallsyms_has() instead +__mptcp_lib_kallsyms_has() { + local sym="${1}" + + mptcp_lib_check_kallsyms + + grep -q " ${sym}" /proc/kallsyms +} + +# $1: part of a symbol to look at, add '$' at the end for full name +mptcp_lib_kallsyms_has() { + local sym="${1}" + + if __mptcp_lib_kallsyms_has "${sym}"; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${sym} symbol not found" +} + +# $1: part of a symbol to look at, add '$' at the end for full name +mptcp_lib_kallsyms_doesnt_have() { + local sym="${1}" + + if ! __mptcp_lib_kallsyms_has "${sym}"; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${sym} symbol has been found" +} From 07bf49401909264a38fa3427c3cce43e8304436a Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:44 +0200 Subject: [PATCH 622/934] selftests: mptcp: connect: skip transp tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the support of IP(V6)_TRANSPARENT socket option with MPTCP connections introduced by commit c9406a23c116 ("mptcp: sockopt: add SOL_IP freebind & transparent options"). It is possible to look for "__ip_sock_set_tos" in kallsyms because IP(V6)_TRANSPARENT socket option support has been added after TOS support which came with the required infrastructure in MPTCP sockopt code. To support TOS, the following function has been exported (T). Not great but better than checking for a specific kernel version. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 5fb62e9cd3ad ("selftests: mptcp: add tproxy test case") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index c1f7bac19942..f9c36c6929cc 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -144,6 +144,7 @@ cleanup() } mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then @@ -695,6 +696,15 @@ run_test_transparent() return 0 fi + # IP(V6)_TRANSPARENT has been added after TOS support which came with + # the required infrastructure in MPTCP sockopt code. To support TOS, the + # following function has been exported (T). Not great but better than + # checking for a specific kernel version. + if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then + echo "INFO: ${msg} not supported by the kernel: SKIP" + return + fi + ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" flush ruleset table inet mangle { From 4ad39a42da2e9770c8e4c37fe632ed8898419129 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:45 +0200 Subject: [PATCH 623/934] selftests: mptcp: connect: skip disconnect tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the full support of disconnections from the userspace introduced by commit b29fcfb54cd7 ("mptcp: full disconnect implementation"). It is possible to look for "mptcp_pm_data_reset" in kallsyms because a preparation patch added it to ease the introduction of the mentioned feature. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 05be5e273c84 ("selftests: mptcp: add disconnect tests") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index f9c36c6929cc..895114fb6832 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -797,6 +797,11 @@ run_tests_disconnect() local old_cin=$cin local old_sin=$sin + if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then + echo "INFO: Full disconnect not supported: SKIP" + return + fi + cat $cin $cin $cin > "$cin".disconnect # force do_transfer to cope with the multiple tranmissions From 06b03083158e90d57866fa220de92c8dd8b9598b Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:46 +0200 Subject: [PATCH 624/934] selftests: mptcp: connect: skip TFO tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the support of TCP_FASTOPEN socket option with MPTCP connections introduced by commit 4ffb0a02346c ("mptcp: add TCP_FASTOPEN sock option"). It is possible to look for "mptcp_fastopen_" in kallsyms to know if the feature is supported or not. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: ca7ae8916043 ("selftests: mptcp: mptfo Initiator/Listener") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 895114fb6832..773dd770a567 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -777,6 +777,11 @@ run_tests_peekmode() run_tests_mptfo() { + if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then + echo "INFO: TFO not supported by the kernel: SKIP" + return + fi + echo "INFO: with MPTFO start" ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2 ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1 From dc97251bf0b70549c76ba261516c01b8096771c5 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:47 +0200 Subject: [PATCH 625/934] selftests: mptcp: diag: skip listen tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the listen diag dump support introduced by commit 4fa39b701ce9 ("mptcp: listen diag dump support"). It looks like there is no good pre-check to do here, i.e. dedicated function available in kallsyms. Instead, we try to get info if nothing is returned, the test is marked as skipped. That's not ideal because something could be wrong with the feature and instead of reporting an error, the test could be marked as skipped. If we know in advanced that the feature is supposed to be supported, the tester can set SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var to 1: in this case the test will report an error instead of marking the test as skipped if nothing is returned. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: f2ae0fa68e28 ("selftests/mptcp: add diag listen tests") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 42 +++++++++-------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 4eacdb1ab962..4a6165389b74 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -55,16 +55,20 @@ __chk_nr() { local command="$1" local expected=$2 - local msg nr + local msg="$3" + local skip="${4:-SKIP}" + local nr - shift 2 - msg=$* nr=$(eval $command) printf "%-50s" "$msg" if [ $nr != $expected ]; then - echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then + echo "[ skip ] Feature probably not supported" + else + echo "[ fail ] expected $expected found $nr" + ret=$test_cnt + fi else echo "[ ok ]" fi @@ -76,12 +80,12 @@ __chk_msk_nr() local condition=$1 shift 1 - __chk_nr "ss -inmHMN $ns | $condition" $* + __chk_nr "ss -inmHMN $ns | $condition" "$@" } chk_msk_nr() { - __chk_msk_nr "grep -c token:" $* + __chk_msk_nr "grep -c token:" "$@" } wait_msk_nr() @@ -119,37 +123,26 @@ wait_msk_nr() chk_msk_fallback_nr() { - __chk_msk_nr "grep -c fallback" $* + __chk_msk_nr "grep -c fallback" "$@" } chk_msk_remote_key_nr() { - __chk_msk_nr "grep -c remote_key" $* + __chk_msk_nr "grep -c remote_key" "$@" } __chk_listen() { local filter="$1" local expected=$2 + local msg="$3" - shift 2 - msg=$* - - nr=$(ss -N $ns -Ml "$filter" | grep -c LISTEN) - printf "%-50s" "$msg" - - if [ $nr != $expected ]; then - echo "[ fail ] expected $expected found $nr" - ret=$test_cnt - else - echo "[ ok ]" - fi + __chk_nr "ss -N $ns -Ml '$filter' | grep -c LISTEN" "$expected" "$msg" 0 } chk_msk_listen() { lport=$1 - local msg="check for listen socket" # destination port search should always return empty list __chk_listen "dport $lport" 0 "listen match for dport $lport" @@ -167,10 +160,9 @@ chk_msk_listen() chk_msk_inuse() { local expected=$1 + local msg="$2" local listen_nr - shift 1 - listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) expected=$((expected + listen_nr)) @@ -181,7 +173,7 @@ chk_msk_inuse() sleep 0.1 done - __chk_nr get_msk_inuse $expected $* + __chk_nr get_msk_inuse $expected "$msg" } # $1: ns, $2: port From dc93086aff040349b5b2a4608c71ea01286dc2cc Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:48 +0200 Subject: [PATCH 626/934] selftests: mptcp: diag: skip inuse tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the reporting of the MPTCP sockets being used, introduced by commit c558246ee73e ("mptcp: add statistics for mptcp socket in use"). Similar to the parent commit, it looks like there is no good pre-check to do here, i.e. dedicated function available in kallsyms. Instead, we try to get info and if nothing is returned, the test is marked as skipped. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: e04a30f78809 ("selftest: mptcp: add test for mptcp socket in use") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/diag.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 4a6165389b74..fa9e09ad97d9 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -173,7 +173,7 @@ chk_msk_inuse() sleep 0.1 done - __chk_nr get_msk_inuse $expected "$msg" + __chk_nr get_msk_inuse $expected "$msg" 0 } # $1: ns, $2: port From 2177d0b08e421971e035672b70f3228d9485c650 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:49 +0200 Subject: [PATCH 627/934] selftests: mptcp: pm nl: remove hardcoded default limits Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the checks of the default limits returned by the MPTCP in-kernel path-manager. The default values have been modified by commit 72bcbc46a5c3 ("mptcp: increase default max additional subflows to 2"). Instead of comparing with hardcoded values, we can get the default one and compare with them. Note that if we expect to have the latest version, we continue to check the hardcoded values to avoid unexpected behaviour changes. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: eedbc685321b ("selftests: add PM netlink functional tests") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 32f7533e0919..664cafc60705 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -73,8 +73,12 @@ check() } check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 + +default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)" +if mptcp_lib_expect_all_features; then + check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 subflows 2" "defaults limits" +fi ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo @@ -121,12 +125,10 @@ ip netns exec $ns1 ./pm_nl_ctl flush check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" ip netns exec $ns1 ./pm_nl_ctl limits 9 1 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 2" "rcv addrs above hard limit" +check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit" ip netns exec $ns1 ./pm_nl_ctl limits 1 9 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 2" "subflows above hard limit" +check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit" ip netns exec $ns1 ./pm_nl_ctl limits 8 8 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 From f3761b50b8e4cb4807b5d41e02144c8c8a0f2512 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:50 +0200 Subject: [PATCH 628/934] selftests: mptcp: pm nl: skip fullmesh flag checks if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the fullmesh flag that can be given to the MPTCP in-kernel path-manager and introduced in commit 2843ff6f36db ("mptcp: remote addresses fullmesh"). If the flag is not visible in the dump after having set it, we don't check the content. Note that if we expect to have this feature and SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var is set to 1, we always check the content to avoid regressions. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 6da1dfdd037e ("selftests: mptcp: add set_flags tests in pm_netlink.sh") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 664cafc60705..d02e0d63a8f9 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -178,14 +178,19 @@ subflow,backup 10.0.1.1" "set flags (backup)" ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow 10.0.1.1" " (nobackup)" + +# fullmesh support has been added later ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" || + mptcp_lib_expect_all_features; then + check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow,fullmesh 10.0.1.1" " (fullmesh)" -ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ + ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh + check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow 10.0.1.1" " (nofullmesh)" -ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ + ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh + check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" +fi exit $ret From 8dee6ca2ac1e5630a7bb6a98bc0b686916fc2000 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:51 +0200 Subject: [PATCH 629/934] selftests: mptcp: sockopt: relax expected returned size Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the getsockopt(SOL_MPTCP) to get info about the MPTCP connections introduced by commit 55c42fa7fa33 ("mptcp: add MPTCP_INFO getsockopt") and the following ones. We cannot guess in advance which sizes the kernel will returned: older kernel can returned smaller sizes, e.g. recently the tcp_info structure has been modified in commit 71fc704768f6 ("tcp: add rcv_wnd and plb_rehash to TCP_INFO") where a new field has been added. The userspace can also expect a smaller size if it is compiled with old uAPI kernel headers. So for these sizes, we can only check if they are above a certain threshold, 0 for the moment. We can also only compared sizes with the ones set by the kernel. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: ce9979129a0b ("selftests: mptcp: add mptcp getsockopt test cases") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- .../selftests/net/mptcp/mptcp_sockopt.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index ae61f39556ca..b35148edbf02 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -87,6 +87,10 @@ struct so_state { uint64_t tcpi_rcv_delta; }; +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + static void die_perror(const char *msg) { perror(msg); @@ -349,13 +353,14 @@ static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t xerror("getsockopt MPTCP_TCPINFO (tries %d, %m)"); assert(olen <= sizeof(ti)); - assert(ti.d.size_user == ti.d.size_kernel); - assert(ti.d.size_user == sizeof(struct tcp_info)); + assert(ti.d.size_kernel > 0); + assert(ti.d.size_user == + MIN(ti.d.size_kernel, sizeof(struct tcp_info))); assert(ti.d.num_subflows == 1); assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data)); olen -= sizeof(struct mptcp_subflow_data); - assert(olen == sizeof(struct tcp_info)); + assert(olen == ti.d.size_user); if (ti.ti[0].tcpi_bytes_sent == w && ti.ti[0].tcpi_bytes_received == r) @@ -401,13 +406,14 @@ static void do_getsockopt_subflow_addrs(int fd) die_perror("getsockopt MPTCP_SUBFLOW_ADDRS"); assert(olen <= sizeof(addrs)); - assert(addrs.d.size_user == addrs.d.size_kernel); - assert(addrs.d.size_user == sizeof(struct mptcp_subflow_addrs)); + assert(addrs.d.size_kernel > 0); + assert(addrs.d.size_user == + MIN(addrs.d.size_kernel, sizeof(struct mptcp_subflow_addrs))); assert(addrs.d.num_subflows == 1); assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data)); olen -= sizeof(struct mptcp_subflow_data); - assert(olen == sizeof(struct mptcp_subflow_addrs)); + assert(olen == addrs.d.size_user); llen = sizeof(local); ret = getsockname(fd, (struct sockaddr *)&local, &llen); From c6f7eccc519837ebde1d099d9610c4f1d5bd975e Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:52 +0200 Subject: [PATCH 630/934] selftests: mptcp: sockopt: skip getsockopt checks if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the getsockopt(SOL_MPTCP) to get info about the MPTCP connections introduced by commit 55c42fa7fa33 ("mptcp: add MPTCP_INFO getsockopt") and the following ones. It is possible to look for "mptcp_diag_fill_info" in kallsyms because it is introduced by the mentioned feature. So we can know in advance if the feature is supported and skip the sub-test if not. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: ce9979129a0b ("selftests: mptcp: add mptcp getsockopt test cases") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index ff5adbb9c7f2..1d4ae8792227 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -87,6 +87,7 @@ cleanup() } mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then @@ -253,6 +254,11 @@ do_mptcp_sockopt_tests() { local lret=0 + if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then + echo "INFO: MPTCP sockopt not supported: SKIP" + return + fi + ip netns exec "$ns_sbox" ./mptcp_sockopt lret=$? From b631e3a4e94c77c9007d60b577a069c203ce9594 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:53 +0200 Subject: [PATCH 631/934] selftests: mptcp: sockopt: skip TCP_INQ checks if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is TCP_INQ cmsg support introduced in commit 2c9e77659a0c ("mptcp: add TCP_INQ cmsg support"). It is possible to look for "mptcp_ioctl" in kallsyms because it was needed to introduce the mentioned feature. We can skip these tests and not set TCPINQ option if the feature is not supported. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 5cbd886ce2a9 ("selftests: mptcp: add TCP_INQ support") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 1d4ae8792227..f295a371ff14 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -187,9 +187,14 @@ do_transfer() local_addr="0.0.0.0" fi + cmsg="TIMESTAMPNS" + if mptcp_lib_kallsyms_has "mptcp_ioctl$"; then + cmsg+=",TCPINQ" + fi + timeout ${timeout_test} \ ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS,TCPINQ \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ ${local_addr} < "$sin" > "$sout" & local spid=$! @@ -197,7 +202,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${connector_ns} \ - $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS,TCPINQ \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ $connect_addr < "$cin" > "$cout" & local cpid=$! @@ -313,6 +318,11 @@ do_tcpinq_tests() { local lret=0 + if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then + echo "INFO: TCP_INQ not supported: SKIP" + return + fi + local args for args in "-t tcp" "-r tcp"; do do_tcpinq_test $args From 723d6b9b12338c1caf06bf6fe269962ef04e2c71 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:54 +0200 Subject: [PATCH 632/934] selftests: mptcp: userspace pm: skip if 'ip' tool is unavailable When a required tool is missing, the return code 4 (SKIP) should be returned instead of 1 (FAIL). Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 259a834fadda ("selftests: mptcp: functional tests for the userspace PM type") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 8092399d911f..192ab818f292 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -8,7 +8,7 @@ mptcp_lib_check_mptcp ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Cannot not run test without ip tool" - exit 1 + exit ${KSFT_SKIP} fi ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED From f90adb033891d418c5dafef34a9aa49f3c860991 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:55 +0200 Subject: [PATCH 633/934] selftests: mptcp: userspace pm: skip if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the MPTCP Userspace PM introduced by commit 4638de5aefe5 ("mptcp: handle local addrs announced by userspace PMs"). We can skip all these tests if the feature is not supported simply by looking for the MPTCP pm_type's sysctl knob. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 259a834fadda ("selftests: mptcp: functional tests for the userspace PM type") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 192ab818f292..38a1d34f7b4d 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -5,6 +5,11 @@ mptcp_lib_check_mptcp +if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + echo "userspace pm tests are not supported by the kernel: SKIP" + exit ${KSFT_SKIP} +fi + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Cannot not run test without ip tool" From 626cb7a5f6b892e48f27a76d11af040c538e03dc Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 8 Jun 2023 18:38:56 +0200 Subject: [PATCH 634/934] selftests: mptcp: userspace pm: skip PM listener events tests if unavailable Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the new listener events linked to the path-manager introduced by commit f8c9dfbd875b ("mptcp: add pm listener events"). It is possible to look for "mptcp_event_pm_listener" in kallsyms to know in advance if the kernel supports this feature and skip these sub-tests if the feature is not supported. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 6c73008aa301 ("selftests: mptcp: listener test for userspace PM") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 38a1d34f7b4d..98d9e4d2d3fc 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -4,6 +4,7 @@ . "$(dirname "${0}")/mptcp_lib.sh" mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then echo "userspace pm tests are not supported by the kernel: SKIP" @@ -914,6 +915,11 @@ test_listener() { print_title "Listener tests" + if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + stdbuf -o0 -e0 printf "LISTENER events \t[SKIP] Not supported\n" + return + fi + # Capture events on the network namespace running the client :>$client_evts From c37cf54c12cfaa51e7aaf88708167b0d3259e64e Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 8 Jun 2023 13:02:26 -0700 Subject: [PATCH 635/934] iavf: remove mask from iavf_irq_enable_queues() Enable more than 32 IRQs by removing the u32 bit mask in iavf_irq_enable_queues(). There is no need for the mask as there are no callers that select individual IRQs through the bitmask. Also, if the PF allocates more than 32 IRQs, this mask will prevent us from using all of them. Modify the comment in iavf_register.h to show that the maximum number allowed for the IRQ index is 63 as per the iAVF standard 1.0 [1]. link: [1] https://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/ethernet-adaptive-virtual-function-hardware-spec.pdf Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Ahmed Zaki Tested-by: Rafal Romanowski Reviewed-by: Simon Horman Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230608200226.451861-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf.h | 2 +- drivers/net/ethernet/intel/iavf/iavf_main.c | 15 ++++++--------- drivers/net/ethernet/intel/iavf/iavf_register.h | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 9abaff1f2aff..39d0fe76a38f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -525,7 +525,7 @@ void iavf_set_ethtool_ops(struct net_device *netdev); void iavf_update_stats(struct iavf_adapter *adapter); void iavf_reset_interrupt_capability(struct iavf_adapter *adapter); int iavf_init_interrupt_scheme(struct iavf_adapter *adapter); -void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask); +void iavf_irq_enable_queues(struct iavf_adapter *adapter); void iavf_free_all_tx_resources(struct iavf_adapter *adapter); void iavf_free_all_rx_resources(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2de4baff4c20..4a66873882d1 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -359,21 +359,18 @@ static void iavf_irq_disable(struct iavf_adapter *adapter) } /** - * iavf_irq_enable_queues - Enable interrupt for specified queues + * iavf_irq_enable_queues - Enable interrupt for all queues * @adapter: board private structure - * @mask: bitmap of queues to enable **/ -void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask) +void iavf_irq_enable_queues(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; int i; for (i = 1; i < adapter->num_msix_vectors; i++) { - if (mask & BIT(i - 1)) { - wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1), - IAVF_VFINT_DYN_CTLN1_INTENA_MASK | - IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK); - } + wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1), + IAVF_VFINT_DYN_CTLN1_INTENA_MASK | + IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK); } } @@ -387,7 +384,7 @@ void iavf_irq_enable(struct iavf_adapter *adapter, bool flush) struct iavf_hw *hw = &adapter->hw; iavf_misc_irq_enable(adapter); - iavf_irq_enable_queues(adapter, ~0); + iavf_irq_enable_queues(adapter); if (flush) iavf_flush(hw); diff --git a/drivers/net/ethernet/intel/iavf/iavf_register.h b/drivers/net/ethernet/intel/iavf/iavf_register.h index bf793332fc9d..a19e88898a0b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_register.h +++ b/drivers/net/ethernet/intel/iavf/iavf_register.h @@ -40,7 +40,7 @@ #define IAVF_VFINT_DYN_CTL01_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTL01_INTENA_SHIFT) #define IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT 3 #define IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK IAVF_MASK(0x3, IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT) -#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */ +#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...63 */ /* Reset: VFR */ #define IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT 0 #define IAVF_VFINT_DYN_CTLN1_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT) #define IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT 2 From 4e635f9d86165e47f5440196f2ebdb258efb8341 Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 8 Jun 2023 17:12:00 +0530 Subject: [PATCH 636/934] octeontx2-af: fixed resource availability check txschq_alloc response have two different arrays to store continuous and non-continuous schedulers of each level. Requested count should be checked for each array separately. Fixes: 5d9b976d4480 ("octeontx2-af: Support fixed transmit scheduler topology") Signed-off-by: Satha Rao Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Naveen Mamindlapalli Reviewed-by: Sridhar Samudrala Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 4ad707e758b9..1e058b96cbe2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1878,7 +1878,8 @@ static int nix_check_txschq_alloc_req(struct rvu *rvu, int lvl, u16 pcifunc, free_cnt = rvu_rsrc_free_count(&txsch->schq); } - if (free_cnt < req_schq || req_schq > MAX_TXSCHQ_PER_FUNC) + if (free_cnt < req_schq || req->schq[lvl] > MAX_TXSCHQ_PER_FUNC || + req->schq_contig[lvl] > MAX_TXSCHQ_PER_FUNC) return NIX_AF_ERR_TLX_ALLOC_FAIL; /* If contiguous queues are needed, check for availability */ From 87e12a17eef476bbf768dc3a74419ad461f36fbc Mon Sep 17 00:00:00 2001 From: Nithin Dabilpuram Date: Thu, 8 Jun 2023 17:12:01 +0530 Subject: [PATCH 637/934] octeontx2-af: fix lbk link credits on cn10k Fix LBK link credits on CN10K to be same as CN9K i.e 16 * MAX_LBK_DATA_RATE instead of current scheme of calculation based on LBK buf length / FIFO size. Fixes: 6e54e1c5399a ("octeontx2-af: cn10K: Add MTU configuration") Signed-off-by: Nithin Dabilpuram Signed-off-by: Naveen Mamindlapalli Reviewed-by: Sridhar Samudrala Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 1e058b96cbe2..f01d057ad025 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4081,10 +4081,6 @@ int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req, static u64 rvu_get_lbk_link_credits(struct rvu *rvu, u16 lbk_max_frs) { - /* CN10k supports 72KB FIFO size and max packet size of 64k */ - if (rvu->hw->lbk_bufsize == 0x12000) - return (rvu->hw->lbk_bufsize - lbk_max_frs) / 16; - return 1600; /* 16 * max LBK datarate = 16 * 100Gbps */ } From b403643d154d15176b060b82f7fc605210033edd Mon Sep 17 00:00:00 2001 From: Dmitry Mastykin Date: Thu, 8 Jun 2023 16:57:54 +0300 Subject: [PATCH 638/934] netlabel: fix shift wrapping bug in netlbl_catmap_setlong() There is a shift wrapping bug in this code on 32-bit architectures. NETLBL_CATMAP_MAPTYPE is u64, bitmap is unsigned long. Every second 32-bit word of catmap becomes corrupted. Signed-off-by: Dmitry Mastykin Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/netlabel/netlabel_kapi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 54c083003947..27511c90a26f 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -857,7 +857,8 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap, offset -= iter->startbit; idx = offset / NETLBL_CATMAP_MAPSIZE; - iter->bitmap[idx] |= bitmap << (offset % NETLBL_CATMAP_MAPSIZE); + iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap + << (offset % NETLBL_CATMAP_MAPSIZE); return 0; } From 7ebe4eda4265642859507d1b3ca330d8c196cfe5 Mon Sep 17 00:00:00 2001 From: David Christensen Date: Thu, 8 Jun 2023 16:01:43 -0400 Subject: [PATCH 639/934] bnx2x: fix page fault following EEH recovery In the last step of the EEH recovery process, the EEH driver calls into bnx2x_io_resume() to re-initialize the NIC hardware via the function bnx2x_nic_load(). If an error occurs during bnx2x_nic_load(), OS and hardware resources are released and an error code is returned to the caller. When called from bnx2x_io_resume(), the return code is ignored and the network interface is brought up unconditionally. Later attempts to send a packet via this interface result in a page fault due to a null pointer reference. This patch checks the return code of bnx2x_nic_load(), prints an error message if necessary, and does not enable the interface. Signed-off-by: David Christensen Reviewed-by: Sridhar Samudrala Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 637d162bbcfa..1e7a6f1d4223 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -14294,11 +14294,16 @@ static void bnx2x_io_resume(struct pci_dev *pdev) bp->fw_seq = SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK; - if (netif_running(dev)) - bnx2x_nic_load(bp, LOAD_NORMAL); + if (netif_running(dev)) { + if (bnx2x_nic_load(bp, LOAD_NORMAL)) { + netdev_err(bp->dev, "Error during driver initialization, try unloading/reloading the driver\n"); + goto done; + } + } netif_device_attach(dev); +done: rtnl_unlock(); } From ee4d269eccfea6c17b18281bef482700d898e86f Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 5 Jun 2023 13:33:17 +0300 Subject: [PATCH 640/934] RDMA/mlx5: Initiate dropless RQ for RAW Ethernet functions Delay drop data is initiated for PFs that have the capability of rq_delay_drop and are in roce profile. However, PFs with RAW ethernet profile do not initiate delay drop data on function load, causing kernel panic if delay drop struct members are accessed later on in case a dropless RQ is created. Thus, stage the delay drop initialization as part of RAW ethernet PF loading process. Fixes: b5ca15ad7e61 ("IB/mlx5: Add proper representors support") Signed-off-by: Maher Sanalla Reviewed-by: Maor Gottlieb Link: https://lore.kernel.org/r/2e9d386785043d48c38711826eb910315c1de141.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5d45de223c43..f0b394ed7452 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4275,6 +4275,9 @@ const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), + STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, + mlx5_ib_stage_delay_drop_init, + mlx5_ib_stage_delay_drop_cleanup), STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, mlx5_ib_restrack_init, NULL), From e1f4a52ac171dd863fe89055e749ef5e0a0bc5ce Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 5 Jun 2023 13:33:18 +0300 Subject: [PATCH 641/934] RDMA/mlx5: Create an indirect flow table for steering anchor A misbehaved user can create a steering anchor that points to a kernel flow table and then destroy the anchor without freeing the associated STC. This creates a problem as the kernel can't destroy the flow table since there is still a reference to it. As a result, this can exhaust all available flow table resources, preventing other users from using the RDMA device. To prevent this problem, a solution is implemented where a special flow table with two steering rules is created when a user creates a steering anchor for the first time. The rules include one that drops all traffic and another that points to the kernel flow table. If the steering anchor is destroyed, only the rule pointing to the kernel's flow table is removed. Any traffic reaching the special flow table after that is dropped. Since the special flow table is not destroyed when the steering anchor is destroyed, any issues are prevented from occurring. The remaining resources are only destroyed when the RDMA device is destroyed, which happens after all DEVX objects are freed, including the STCs, thus mitigating the issue. Fixes: 0c6ab0ca9a66 ("RDMA/mlx5: Expose steering anchor to userspace") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Link: https://lore.kernel.org/r/b4a88a871d651fa4e8f98d552553c1cfe9ba2cd6.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/fs.c | 276 ++++++++++++++++++++++++++- drivers/infiniband/hw/mlx5/fs.h | 16 ++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 11 ++ 3 files changed, 296 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 3008632a6c20..1e419e080b53 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -695,8 +695,6 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev, struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; - if (mlx5_ib_shared_ft_allowed(&dev->ib_dev)) - ft_attr.uid = MLX5_SHARED_RESOURCE_UID; ft_attr.prio = priority; ft_attr.max_fte = num_entries; ft_attr.flags = flags; @@ -2025,6 +2023,237 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, return 0; } +static int steering_anchor_create_ft(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + + if (ft_prio->anchor.ft) + return 0; + + ns = mlx5_get_flow_namespace(dev->mdev, ns_type); + if (!ns) + return -EOPNOTSUPP; + + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.uid = MLX5_SHARED_RESOURCE_UID; + ft_attr.prio = 0; + ft_attr.max_fte = 2; + ft_attr.level = 1; + + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + ft_prio->anchor.ft = ft; + + return 0; +} + +static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio) +{ + if (ft_prio->anchor.ft) { + mlx5_destroy_flow_table(ft_prio->anchor.ft); + ft_prio->anchor.ft = NULL; + } +} + +static int +steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + void *flow_group_in; + int err = 0; + + if (ft_prio->anchor.fg_drop) + return 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + goto out; + } + + ft_prio->anchor.fg_drop = fg; + +out: + kvfree(flow_group_in); + + return err; +} + +static void +steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio) +{ + if (ft_prio->anchor.fg_drop) { + mlx5_destroy_flow_group(ft_prio->anchor.fg_drop); + ft_prio->anchor.fg_drop = NULL; + } +} + +static int +steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + void *flow_group_in; + int err = 0; + + if (ft_prio->anchor.fg_goto_table) + return 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + goto out; + } + ft_prio->anchor.fg_goto_table = fg; + +out: + kvfree(flow_group_in); + + return err; +} + +static void +steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio) +{ + if (ft_prio->anchor.fg_goto_table) { + mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table); + ft_prio->anchor.fg_goto_table = NULL; + } +} + +static int +steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *handle; + + if (ft_prio->anchor.rule_drop) + return 0; + + flow_act.fg = ft_prio->anchor.fg_drop; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + + handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act, + NULL, 0); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + ft_prio->anchor.rule_drop = handle; + + return 0; +} + +static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio) +{ + if (ft_prio->anchor.rule_drop) { + mlx5_del_flow_rules(ft_prio->anchor.rule_drop); + ft_prio->anchor.rule_drop = NULL; + } +} + +static int +steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *handle; + + if (ft_prio->anchor.rule_goto_table) + return 0; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + flow_act.fg = ft_prio->anchor.fg_goto_table; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = ft_prio->flow_table; + + handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act, + &dest, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + ft_prio->anchor.rule_goto_table = handle; + + return 0; +} + +static void +steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio) +{ + if (ft_prio->anchor.rule_goto_table) { + mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table); + ft_prio->anchor.rule_goto_table = NULL; + } +} + +static int steering_anchor_create_res(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_prio, + enum mlx5_flow_namespace_type ns_type) +{ + int err; + + err = steering_anchor_create_ft(dev, ft_prio, ns_type); + if (err) + return err; + + err = steering_anchor_create_fg_drop(ft_prio); + if (err) + goto destroy_ft; + + err = steering_anchor_create_fg_goto_table(ft_prio); + if (err) + goto destroy_fg_drop; + + err = steering_anchor_create_rule_drop(ft_prio); + if (err) + goto destroy_fg_goto_table; + + err = steering_anchor_create_rule_goto_table(ft_prio); + if (err) + goto destroy_rule_drop; + + return 0; + +destroy_rule_drop: + steering_anchor_destroy_rule_drop(ft_prio); +destroy_fg_goto_table: + steering_anchor_destroy_fg_goto_table(ft_prio); +destroy_fg_drop: + steering_anchor_destroy_fg_drop(ft_prio); +destroy_ft: + steering_anchor_destroy_ft(ft_prio); + + return err; +} + +static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio) +{ + steering_anchor_destroy_rule_goto_table(ft_prio); + steering_anchor_destroy_rule_drop(ft_prio); + steering_anchor_destroy_fg_goto_table(ft_prio); + steering_anchor_destroy_fg_drop(ft_prio); + steering_anchor_destroy_ft(ft_prio); +} + static int steering_anchor_cleanup(struct ib_uobject *uobject, enum rdma_remove_reason why, struct uverbs_attr_bundle *attrs) @@ -2035,6 +2264,9 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject, return -EBUSY; mutex_lock(&obj->dev->flow_db->lock); + if (!--obj->ft_prio->anchor.rule_goto_table_ref) + steering_anchor_destroy_rule_goto_table(obj->ft_prio); + put_flow_table(obj->dev, obj->ft_prio, true); mutex_unlock(&obj->dev->flow_db->lock); @@ -2042,6 +2274,24 @@ static int steering_anchor_cleanup(struct ib_uobject *uobject, return 0; } +static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio, + int count) +{ + while (count--) + mlx5_steering_anchor_destroy_res(&prio[count]); +} + +void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) +{ + fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT); + fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT); + fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS); + fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS); + fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS); + fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT); + fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT); +} + static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, struct mlx5_ib_flow_matcher *obj) { @@ -2182,21 +2432,31 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)( return -ENOMEM; mutex_lock(&dev->flow_db->lock); + ft_prio = _get_flow_table(dev, priority, ns_type, 0); if (IS_ERR(ft_prio)) { - mutex_unlock(&dev->flow_db->lock); err = PTR_ERR(ft_prio); goto free_obj; } ft_prio->refcount++; - ft_id = mlx5_flow_table_id(ft_prio->flow_table); - mutex_unlock(&dev->flow_db->lock); + + if (!ft_prio->anchor.rule_goto_table_ref) { + err = steering_anchor_create_res(dev, ft_prio, ns_type); + if (err) + goto put_flow_table; + } + + ft_prio->anchor.rule_goto_table_ref++; + + ft_id = mlx5_flow_table_id(ft_prio->anchor.ft); err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID, &ft_id, sizeof(ft_id)); if (err) - goto put_flow_table; + goto destroy_res; + + mutex_unlock(&dev->flow_db->lock); uobj->object = obj; obj->dev = dev; @@ -2205,8 +2465,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)( return 0; +destroy_res: + --ft_prio->anchor.rule_goto_table_ref; + mlx5_steering_anchor_destroy_res(ft_prio); put_flow_table: - mutex_lock(&dev->flow_db->lock); put_flow_table(dev, ft_prio, true); mutex_unlock(&dev->flow_db->lock); free_obj: diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h index ad320adaf321..b9734904f5f0 100644 --- a/drivers/infiniband/hw/mlx5/fs.h +++ b/drivers/infiniband/hw/mlx5/fs.h @@ -10,6 +10,7 @@ #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_fs_init(struct mlx5_ib_dev *dev); +void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev); #else static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) { @@ -21,9 +22,24 @@ static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev) mutex_init(&dev->flow_db->lock); return 0; } + +inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {} #endif + static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev) { + /* When a steering anchor is created, a special flow table is also + * created for the user to reference. Since the user can reference it, + * the kernel cannot trust that when the user destroys the steering + * anchor, they no longer reference the flow table. + * + * To address this issue, when a user destroys a steering anchor, only + * the flow steering rule in the table is destroyed, but the table + * itself is kept to deal with the above scenario. The remaining + * resources are only removed when the RDMA device is destroyed, which + * is a safe assumption that all references are gone. + */ + mlx5_ib_fs_cleanup_anchor(dev); kfree(dev->flow_db); } #endif /* _MLX5_IB_FS_H */ diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index efa4dc6e7dee..91fc0cdf377d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -237,8 +237,19 @@ enum { #define MLX5_IB_NUM_SNIFFER_FTS 2 #define MLX5_IB_NUM_EGRESS_FTS 1 #define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS + +struct mlx5_ib_anchor { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg_goto_table; + struct mlx5_flow_group *fg_drop; + struct mlx5_flow_handle *rule_goto_table; + struct mlx5_flow_handle *rule_drop; + unsigned int rule_goto_table_ref; +}; + struct mlx5_ib_flow_prio { struct mlx5_flow_table *flow_table; + struct mlx5_ib_anchor anchor; unsigned int refcount; }; From c2ea687e5e0e29802714a981a1ccdc17c2c4b2be Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 5 Jun 2023 13:33:19 +0300 Subject: [PATCH 642/934] RDMA/mlx5: Fix Q-counters per vport allocation Previously Q-counters data was being allocated over the PF for all of the available vports, however that isn't necessary. Since each VF or SF has a Q-counter allocated for itself. So we only need to allocate two counters data structures, one for the device counters, and one for all the other vports to expose the representors, since they only need to read from it in order to determine mainly counters numbers and names, so they can all share. This in turn also solves a bug we previously had where we couldn't switch the device to switchdev mode when there were more than 128 SF/VFs configured, since that is the maximum amount of Q-counters available for a single port Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad Reviewed-by: Mark Zhang Link: https://lore.kernel.org/r/f54671df16e2227a069b229b33b62cd9ee24c475.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/counters.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 1c06920505d2..3d7ef81a50b8 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -209,7 +209,8 @@ static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, !vport_qcounters_supported(dev)) || !port_num) return &dev->port[0].cnts; - return &dev->port[port_num - 1].cnts; + return is_mdev_switchdev_mode(dev->mdev) ? + &dev->port[1].cnts : &dev->port[port_num - 1].cnts; } /** @@ -262,7 +263,7 @@ static struct rdma_hw_stats * mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts; + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); return do_alloc_stats(cnts); } @@ -725,11 +726,11 @@ err: static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; - int num_cnt_ports; + int num_cnt_ports = dev->num_ports; int i, j; - num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) || - vport_qcounters_supported(dev)) ? dev->num_ports : 1; + if (is_mdev_switchdev_mode(dev->mdev)) + num_cnt_ports = min(2, num_cnt_ports); MLX5_SET(dealloc_q_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); @@ -761,15 +762,22 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) { u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; - int num_cnt_ports; + int num_cnt_ports = dev->num_ports; int err = 0; int i; bool is_shared; MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; - num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) || - vport_qcounters_supported(dev)) ? dev->num_ports : 1; + + /* + * In switchdev we need to allocate two ports, one that is used for + * the device Q_counters and it is essentially the real Q_counters of + * this device, while the other is used as a helper for PF to be able to + * query all other vports. + */ + if (is_mdev_switchdev_mode(dev->mdev)) + num_cnt_ports = min(2, num_cnt_ports); for (i = 0; i < num_cnt_ports; i++) { err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i); From e80ef139488fb4f481c877a81f6ba54f1f0ee416 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 5 Jun 2023 13:33:20 +0300 Subject: [PATCH 643/934] RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters Previously the Q-counters initialization assumed that the vport Q-counters structures and the normal Q-counters structures are identical in size, and hence when a Q-counter was added to normal Q-counters structure but not to the vport Q-counters struct it would lead to that counter name being NULL in switchdev mode, which could cause the kernel crash below. Currently break the dependency between those two structure and always use the appropriate struct size, in order to remove the assumption that both structure sizes are equal. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 20c64a067 P4D 20c64a067 PUD 20152b067 PMD 0 Oops: 0000 [#1] SMP CPU: 19 PID: 11717 Comm: devlink Tainted: G OE 6.2.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: kernfs_name_hash+0x12/0x80 kernfs_find_ns+0x35/0xb0 kernfs_remove_by_name_ns+0x46/0xc0 remove_files.isra.1+0x30/0x70 internal_create_group+0x253/0x380 internal_create_groups.part.4+0x3e/0xa0 setup_port+0x27a/0x8c0 [ib_core] ib_setup_port_attrs+0x9d/0x300 [ib_core] ib_register_device+0x48e/0x550 [ib_core] __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5_ib_vport_rep_load+0x141/0x360 [mlx5_ib] mlx5_esw_offloads_rep_load+0x48/0xa0 [mlx5_core] esw_offloads_enable+0x41e/0xd10 [mlx5_core] mlx5_eswitch_enable_locked+0x1e3/0x340 [mlx5_core] ? __cond_resched+0x15/0x30 mlx5_devlink_eswitch_mode_set+0x204/0x3c0 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0x8d/0x100 genl_family_rcv_msg_doit.isra.19+0xea/0x110 genl_rcv_msg+0x19b/0x290 ? devlink_nl_cmd_region_read_dumpit+0x760/0x760 ? devlink_nl_cmd_port_param_get_doit+0x30/0x30 ? devlink_put+0x50/0x50 ? genl_get_cmd_both+0x60/0x60 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1be/0x2a0 netlink_sendmsg+0x361/0x4d0 sock_sendmsg+0x30/0x40 __sys_sendto+0x11a/0x150 ? handle_mm_fault+0x101/0x2b0 ? do_user_addr_fault+0x21d/0x720 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fa533611cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffdb6a898a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000daab00 RCX: 00007fa533611cba RDX: 0000000000000038 RSI: 0000000000daab00 RDI: 0000000000000003 RBP: 0000000000daa910 R08: 00007fa533822000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 Modules linked in: rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) mlx5_core(OE) mlxdevm(OE) ib_uverbs(OE) ib_core(OE) mlx_compat(OE) mlxfw(OE) memtrack(OE) pci_hyperv_intf nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_filter iptable_nat dns_resolver nf_nat br_netfilter nfs bridge stp llc lockd grace fscache netfs rfkill overlay iTCO_wdt iTCO_vendor_support kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel i2c_i801 sunrpc lpc_ich sha512_ssse3 pcspkr i2c_smbus mfd_core drm sch_fq_codel i2c_core ip_tables fuse crc32c_intel serio_raw virtio_net net_failover failover [last unloaded: mlxfw] CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:strlen+0x0/0x20 Code: 66 2e 0f 1f 84 00 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 48 89 f8 74 10 48 83 c7 01 80 3f 00 75 f7 48 29 c7 48 89 RSP: 0018:ffffc9000318b618 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000002c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888211918110 R09: ffff888211918000 R10: 000000000000001e R11: ffff888211918000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8881038ec250 FS: 00007fa53342fe80(0000) GS:ffff88885fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000002042b2003 CR4: 0000000000770ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception ]--- Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad Reviewed-by: Mark Zhang Link: https://lore.kernel.org/r/016777b7f16eb6bb178999ff59097d0c0f91f68a.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/counters.c | 58 ++++++++++++++++++--------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 3d7ef81a50b8..f40d9c61e30b 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -576,43 +576,53 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, bool is_vport = is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF; const struct mlx5_ib_counter *names; - int j = 0, i; + int j = 0, i, size; names = is_vport ? vport_basic_q_cnts : basic_q_cnts; - for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { + size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) : + ARRAY_SIZE(basic_q_cnts); + for (i = 0; i < size; i++, j++) { descs[j].name = names[i].name; - offsets[j] = basic_q_cnts[i].offset; + offsets[j] = names[i].offset; } names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts; + size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) : + ARRAY_SIZE(out_of_seq_q_cnts); if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { - for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { + for (i = 0; i < size; i++, j++) { descs[j].name = names[i].name; - offsets[j] = out_of_seq_q_cnts[i].offset; + offsets[j] = names[i].offset; } } names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts; + size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) : + ARRAY_SIZE(retrans_q_cnts); if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { - for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { + for (i = 0; i < size; i++, j++) { descs[j].name = names[i].name; - offsets[j] = retrans_q_cnts[i].offset; + offsets[j] = names[i].offset; } } names = is_vport ? vport_extended_err_cnts : extended_err_cnts; + size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) : + ARRAY_SIZE(extended_err_cnts); if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { - for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { + for (i = 0; i < size; i++, j++) { descs[j].name = names[i].name; - offsets[j] = extended_err_cnts[i].offset; + offsets[j] = names[i].offset; } } names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts; + size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) : + ARRAY_SIZE(roce_accl_cnts); if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { - for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { + for (i = 0; i < size; i++, j++) { descs[j].name = names[i].name; - offsets[j] = roce_accl_cnts[i].offset; + offsets[j] = names[i].offset; } } @@ -662,25 +672,37 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, struct mlx5_ib_counters *cnts, u32 port_num) { - u32 num_counters, num_op_counters = 0; + bool is_vport = is_mdev_switchdev_mode(dev->mdev) && + port_num != MLX5_VPORT_PF; + u32 num_counters, num_op_counters = 0, size; - num_counters = ARRAY_SIZE(basic_q_cnts); + size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) : + ARRAY_SIZE(basic_q_cnts); + num_counters = size; + size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) : + ARRAY_SIZE(out_of_seq_q_cnts); if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) - num_counters += ARRAY_SIZE(out_of_seq_q_cnts); + num_counters += size; + size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) : + ARRAY_SIZE(retrans_q_cnts); if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) - num_counters += ARRAY_SIZE(retrans_q_cnts); + num_counters += size; + size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) : + ARRAY_SIZE(extended_err_cnts); if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) - num_counters += ARRAY_SIZE(extended_err_cnts); + num_counters += size; + size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) : + ARRAY_SIZE(roce_accl_cnts); if (MLX5_CAP_GEN(dev->mdev, roce_accl)) - num_counters += ARRAY_SIZE(roce_accl_cnts); + num_counters += size; cnts->num_q_counters = num_counters; - if (is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF) + if (is_vport) goto skip_non_qcounters; if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { From 2de43f5b5137e03ef64a2c3f064a01992830c67f Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 5 Jun 2023 13:33:21 +0300 Subject: [PATCH 644/934] RDMA/mlx5: Fix Q-counters query in LAG mode Previously we used the core device associated to the IB device in order to do the Q-counters query to the FW, but in LAG mode it is possible that the core device isn't the one that created this VF. Hence instead of using the core device to query the Q-counters we use the ESW core device which is guaranteed to be that of the VF. Fixes: d22467a71ebe ("RDMA/mlx5: Expand switchdev Q-counters to expose representor statistics") Signed-off-by: Patrisious Haddad Reviewed-by: Mark Zhang Link: https://lore.kernel.org/r/778d7d7a24892348d0bdef17d2e5f9e044717e86.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/counters.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index f40d9c61e30b..93257fa5aae8 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -330,6 +330,7 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev, { u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; + struct mlx5_core_dev *mdev; __be32 val; int ret, i; @@ -337,12 +338,16 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev, dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK) return 0; + mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw); + if (!mdev) + return -EOPNOTSUPP; + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); MLX5_SET(query_q_counter_in, in, other_vport, 1); MLX5_SET(query_q_counter_in, in, vport_number, dev->port[port_num].rep->vport); MLX5_SET(query_q_counter_in, in, aggregate, 1); - ret = mlx5_cmd_exec_inout(dev->mdev, query_q_counter, in, out); + ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); if (ret) return ret; From 58030c76cce473b6cfd630bbecb97215def0dff8 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 5 Jun 2023 13:33:23 +0300 Subject: [PATCH 645/934] RDMA/cma: Always set static rate to 0 for RoCE Set static rate to 0 as it should be discovered by path query and has no meaning for RoCE. This also avoid of using the rtnl lock and ethtool API, which is a bottleneck when try to setup many rdma-cm connections at the same time, especially with multiple processes. Fixes: 3c86aa70bf67 ("RDMA/cm: Add RDMA CM support for IBoE devices") Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/f72a4f8b667b803aee9fa794069f61afb5839ce4.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 4 ++-- include/rdma/ib_addr.h | 23 ----------------------- 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 93a1c48d0c32..6b3f4384e46a 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3295,7 +3295,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->traffic_class = tos; route->path_rec->mtu = iboe_get_mtu(ndev->mtu); route->path_rec->rate_selector = IB_SA_EQ; - route->path_rec->rate = iboe_get_rate(ndev); + route->path_rec->rate = IB_RATE_PORT_CURRENT; dev_put(ndev); route->path_rec->packet_life_time_selector = IB_SA_EQ; /* In case ACK timeout is set, use this value to calculate @@ -4964,7 +4964,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (!ndev) return -ENODEV; - ib.rec.rate = iboe_get_rate(ndev); + ib.rec.rate = IB_RATE_PORT_CURRENT; ib.rec.hop_limit = 1; ib.rec.mtu = iboe_get_mtu(ndev->mtu); diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index d808dc3d239e..811a0f11d0db 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -194,29 +194,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu) return 0; } -static inline int iboe_get_rate(struct net_device *dev) -{ - struct ethtool_link_ksettings cmd; - int err; - - rtnl_lock(); - err = __ethtool_get_link_ksettings(dev, &cmd); - rtnl_unlock(); - if (err) - return IB_RATE_PORT_CURRENT; - - if (cmd.base.speed >= 40000) - return IB_RATE_40_GBPS; - else if (cmd.base.speed >= 30000) - return IB_RATE_30_GBPS; - else if (cmd.base.speed >= 20000) - return IB_RATE_20_GBPS; - else if (cmd.base.speed >= 10000) - return IB_RATE_10_GBPS; - else - return IB_RATE_PORT_CURRENT; -} - static inline int rdma_link_local_addr(struct in6_addr *addr) { if (addr->s6_addr32[0] == htonl(0xfe800000) && From 0cadb4db79e1d9eea66711c4031e435c2191907e Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Mon, 5 Jun 2023 13:33:24 +0300 Subject: [PATCH 646/934] RDMA/uverbs: Restrict usage of privileged QKEYs According to the IB specification rel-1.6, section 3.5.3: "QKEYs with the most significant bit set are considered controlled QKEYs, and a HCA does not allow a consumer to arbitrarily specify a controlled QKEY." Thus, block non-privileged users from setting such a QKEY. Cc: stable@vger.kernel.org Fixes: bc38a6abdd5a ("[PATCH] IB uverbs: core implementation") Signed-off-by: Edward Srouji Link: https://lore.kernel.org/r/c00c809ddafaaf87d6f6cb827978670989a511b3.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_cmd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4796f6a8828c..e836c9c477f6 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1850,8 +1850,13 @@ static int modify_qp(struct uverbs_attr_bundle *attrs, attr->path_mtu = cmd->base.path_mtu; if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE) attr->path_mig_state = cmd->base.path_mig_state; - if (cmd->base.attr_mask & IB_QP_QKEY) + if (cmd->base.attr_mask & IB_QP_QKEY) { + if (cmd->base.qkey & IB_QP_SET_QKEY && !capable(CAP_NET_RAW)) { + ret = -EPERM; + goto release_qp; + } attr->qkey = cmd->base.qkey; + } if (cmd->base.attr_mask & IB_QP_RQ_PSN) attr->rq_psn = cmd->base.rq_psn; if (cmd->base.attr_mask & IB_QP_SQ_PSN) From 62fab312fa1683e812e605db20d4f22de3e3fb2f Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Mon, 5 Jun 2023 13:33:25 +0300 Subject: [PATCH 647/934] IB/uverbs: Fix to consider event queue closing also upon non-blocking mode Fix ib_uverbs_event_read() to consider event queue closing also upon non-blocking mode. Once the queue is closed (e.g. hot-plug flow) all the existing events are cleaned-up as part of ib_uverbs_free_event_queue(). An application that uses the non-blocking FD mode should get -EIO in that case to let it knows that the device was removed already. Otherwise, it can loose the indication that the device was removed and won't recover. As part of that, refactor the code to have a single flow with regards to 'is_closed' for both blocking and non-blocking modes. Fixes: 14e23bd6d221 ("RDMA/core: Fix locking in ib_uverbs_event_read") Reviewed-by: Maor Gottlieb Signed-off-by: Yishai Hadas Link: https://lore.kernel.org/r/97b00116a1e1e13f8dc4ec38a5ea81cf8c030210.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_main.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index fbace69672ca..7c9c79c13941 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -222,8 +222,12 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, spin_lock_irq(&ev_queue->lock); while (list_empty(&ev_queue->event_list)) { - spin_unlock_irq(&ev_queue->lock); + if (ev_queue->is_closed) { + spin_unlock_irq(&ev_queue->lock); + return -EIO; + } + spin_unlock_irq(&ev_queue->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; @@ -233,12 +237,6 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, return -ERESTARTSYS; spin_lock_irq(&ev_queue->lock); - - /* If device was disassociated and no event exists set an error */ - if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) { - spin_unlock_irq(&ev_queue->lock); - return -EIO; - } } event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); From 617f5db1a626f18d5cbb7c7faf7bf8f9ea12be78 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 5 Jun 2023 13:33:26 +0300 Subject: [PATCH 648/934] RDMA/mlx5: Fix affinity assignment The cited commit aimed to ensure that Virtual Functions (VFs) assign a queue affinity to a Queue Pair (QP) to distribute traffic when the LAG master creates a hardware LAG. If the affinity was set while the hardware was not in LAG, the firmware would ignore the affinity value. However, this commit unintentionally assigned an affinity to QPs on the LAG master's VPORT even if the RDMA device was not marked as LAG-enabled. In most cases, this was not an issue because when the hardware entered hardware LAG configuration, the RDMA device of the LAG master would be destroyed and a new one would be created, marked as LAG-enabled. The problem arises when a user configures Equal-Cost Multipath (ECMP). In ECMP mode, traffic can be directed to different physical ports based on the queue affinity, which is intended for use by VPORTS other than the E-Switch manager. ECMP mode is supported only if both E-Switch managers are in switchdev mode and the appropriate route is configured via IP. In this configuration, the RDMA device is not destroyed, and we retain the RDMA device that is not marked as LAG-enabled. To ensure correct behavior, Send Queues (SQs) opened by the E-Switch manager through verbs should be assigned strict affinity. This means they will only be able to communicate through the native physical port associated with the E-Switch manager. This will prevent the firmware from assigning affinity and will not allow the SQs to be remapped in case of failover. Fixes: 802dcc7fc5ec ("RDMA/mlx5: Support TX port affinity for VF drivers in LAG mode") Reviewed-by: Maor Gottlieb Signed-off-by: Mark Bloch Link: https://lore.kernel.org/r/425b05f4da840bc684b0f7e8ebf61aeb5cef09b0.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 12 ------------ include/linux/mlx5/driver.h | 12 ++++++++++++ 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 91fc0cdf377d..2dfa6f49a6f4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1598,6 +1598,9 @@ static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass)) return 0; + if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active) + return 0; + return dev->lag_active || (MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 && MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity)); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 70ca8ffa9256..78b96bfb4e6a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1237,6 +1237,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid); MLX5_SET(tisc, tisc, transport_domain, tdn); + if (!mlx5_ib_lag_should_assign_affinity(dev) && + mlx5_lag_is_lacp_owner(dev->mdev)) + MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); if (qp->flags & IB_QP_CREATE_SOURCE_QPN) MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1d879374acaa..229520405d4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -276,18 +276,6 @@ static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return pci_num_vf(dev->pdev) ? true : false; } -static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) -{ - /* LACP owner conditions: - * 1) Function is physical. - * 2) LAG is supported by FW. - * 3) LAG is managed by driver (currently the only option). - */ - return MLX5_CAP_GEN(dev, vport_group_manager) && - (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && - MLX5_CAP_GEN(dev, lag_master); -} - int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev); static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a4c4f737f9c1..8ad16b779898 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1237,6 +1237,18 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev) return dev->priv.sriov.max_vfs; } +static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) +{ + /* LACP owner conditions: + * 1) Function is physical. + * 2) LAG is supported by FW. + * 3) LAG is managed by driver (currently the only option). + */ + return MLX5_CAP_GEN(dev, vport_group_manager) && + (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && + MLX5_CAP_GEN(dev, lag_master); +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { From 691b0480933f0ce88a81ed1d1a0aff340ff6293a Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Tue, 6 Jun 2023 03:25:29 -0700 Subject: [PATCH 649/934] IB/isert: Fix dead lock in ib_isert - When a iSER session is released, ib_isert module is taking a mutex lock and releasing all pending connections. As part of this, ib_isert is destroying rdma cm_id. To destroy cm_id, rdma_cm module is sending CM events to CMA handler of ib_isert. This handler is taking same mutex lock. Hence it leads to deadlock between ib_isert & rdma_cm modules. - For fix, created local list of pending connections and release the connection outside of mutex lock. Calltrace: --------- [ 1229.791410] INFO: task kworker/10:1:642 blocked for more than 120 seconds. [ 1229.791416] Tainted: G OE --------- - - 4.18.0-372.9.1.el8.x86_64 #1 [ 1229.791418] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1229.791419] task:kworker/10:1 state:D stack: 0 pid: 642 ppid: 2 flags:0x80004000 [ 1229.791424] Workqueue: ib_cm cm_work_handler [ib_cm] [ 1229.791436] Call Trace: [ 1229.791438] __schedule+0x2d1/0x830 [ 1229.791445] ? select_idle_sibling+0x23/0x6f0 [ 1229.791449] schedule+0x35/0xa0 [ 1229.791451] schedule_preempt_disabled+0xa/0x10 [ 1229.791453] __mutex_lock.isra.7+0x310/0x420 [ 1229.791456] ? select_task_rq_fair+0x351/0x990 [ 1229.791459] isert_cma_handler+0x224/0x330 [ib_isert] [ 1229.791463] ? ttwu_queue_wakelist+0x159/0x170 [ 1229.791466] cma_cm_event_handler+0x25/0xd0 [rdma_cm] [ 1229.791474] cma_ib_handler+0xa7/0x2e0 [rdma_cm] [ 1229.791478] cm_process_work+0x22/0xf0 [ib_cm] [ 1229.791483] cm_work_handler+0xf4/0xf30 [ib_cm] [ 1229.791487] ? move_linked_works+0x6e/0xa0 [ 1229.791490] process_one_work+0x1a7/0x360 [ 1229.791491] ? create_worker+0x1a0/0x1a0 [ 1229.791493] worker_thread+0x30/0x390 [ 1229.791494] ? create_worker+0x1a0/0x1a0 [ 1229.791495] kthread+0x10a/0x120 [ 1229.791497] ? set_kthread_struct+0x40/0x40 [ 1229.791499] ret_from_fork+0x1f/0x40 [ 1229.791739] INFO: task targetcli:28666 blocked for more than 120 seconds. [ 1229.791740] Tainted: G OE --------- - - 4.18.0-372.9.1.el8.x86_64 #1 [ 1229.791741] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1229.791742] task:targetcli state:D stack: 0 pid:28666 ppid: 5510 flags:0x00004080 [ 1229.791743] Call Trace: [ 1229.791744] __schedule+0x2d1/0x830 [ 1229.791746] schedule+0x35/0xa0 [ 1229.791748] schedule_preempt_disabled+0xa/0x10 [ 1229.791749] __mutex_lock.isra.7+0x310/0x420 [ 1229.791751] rdma_destroy_id+0x15/0x20 [rdma_cm] [ 1229.791755] isert_connect_release+0x115/0x130 [ib_isert] [ 1229.791757] isert_free_np+0x87/0x140 [ib_isert] [ 1229.791761] iscsit_del_np+0x74/0x120 [iscsi_target_mod] [ 1229.791776] lio_target_np_driver_store+0xe9/0x140 [iscsi_target_mod] [ 1229.791784] configfs_write_file+0xb2/0x110 [ 1229.791788] vfs_write+0xa5/0x1a0 [ 1229.791792] ksys_write+0x4f/0xb0 [ 1229.791794] do_syscall_64+0x5b/0x1a0 [ 1229.791798] entry_SYSCALL_64_after_hwframe+0x65/0xca Fixes: bd3792205aae ("iser-target: Fix pending connections handling in target stack shutdown sequnce") Reviewed-by: Sagi Grimberg Signed-off-by: Selvin Xavier Signed-off-by: Saravanan Vajravel Link: https://lore.kernel.org/r/20230606102531.162967-2-saravanan.vajravel@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/isert/ib_isert.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index f290cd49698e..b4809d237250 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2431,6 +2431,7 @@ isert_free_np(struct iscsi_np *np) { struct isert_np *isert_np = np->np_context; struct isert_conn *isert_conn, *n; + LIST_HEAD(drop_conn_list); if (isert_np->cm_id) rdma_destroy_id(isert_np->cm_id); @@ -2450,7 +2451,7 @@ isert_free_np(struct iscsi_np *np) node) { isert_info("cleaning isert_conn %p state (%d)\n", isert_conn, isert_conn->state); - isert_connect_release(isert_conn); + list_move_tail(&isert_conn->node, &drop_conn_list); } } @@ -2461,11 +2462,16 @@ isert_free_np(struct iscsi_np *np) node) { isert_info("cleaning isert_conn %p state (%d)\n", isert_conn, isert_conn->state); - isert_connect_release(isert_conn); + list_move_tail(&isert_conn->node, &drop_conn_list); } } mutex_unlock(&isert_np->mutex); + list_for_each_entry_safe(isert_conn, n, &drop_conn_list, node) { + list_del_init(&isert_conn->node); + isert_connect_release(isert_conn); + } + np->np_context = NULL; kfree(isert_np); } From 7651e2d6c5b359a28c2d4c904fec6608d1021ca8 Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Tue, 6 Jun 2023 03:25:30 -0700 Subject: [PATCH 650/934] IB/isert: Fix possible list corruption in CMA handler When ib_isert module receives connection error event, it is releasing the isert session and removes corresponding list node but it doesn't take appropriate mutex lock to remove the list node. This can lead to linked list corruption Fixes: bd3792205aae ("iser-target: Fix pending connections handling in target stack shutdown sequnce") Signed-off-by: Selvin Xavier Signed-off-by: Saravanan Vajravel Link: https://lore.kernel.org/r/20230606102531.162967-3-saravanan.vajravel@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/isert/ib_isert.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b4809d237250..00a7303c8cc6 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -657,9 +657,13 @@ static int isert_connect_error(struct rdma_cm_id *cma_id) { struct isert_conn *isert_conn = cma_id->qp->qp_context; + struct isert_np *isert_np = cma_id->context; ib_drain_qp(isert_conn->qp); + + mutex_lock(&isert_np->mutex); list_del_init(&isert_conn->node); + mutex_unlock(&isert_np->mutex); isert_conn->cm_id = NULL; isert_put_conn(isert_conn); From 699826f4e30ab76a62c238c86fbef7e826639c8d Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Tue, 6 Jun 2023 03:25:31 -0700 Subject: [PATCH 651/934] IB/isert: Fix incorrect release of isert connection The ib_isert module is releasing the isert connection both in isert_wait_conn() handler as well as isert_free_conn() handler. In isert_wait_conn() handler, it is expected to wait for iSCSI session logout operation to complete. It should free the isert connection only in isert_free_conn() handler. When a bunch of iSER target is cleared, this issue can lead to use-after-free memory issue as isert conn is twice released Fixes: b02efbfc9a05 ("iser-target: Fix implicit termination of connections") Reviewed-by: Sagi Grimberg Signed-off-by: Saravanan Vajravel Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/20230606102531.162967-4-saravanan.vajravel@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/isert/ib_isert.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 00a7303c8cc6..92e1e7587af8 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2570,8 +2570,6 @@ static void isert_wait_conn(struct iscsit_conn *conn) isert_put_unsol_pending_cmds(conn); isert_wait4cmds(conn); isert_wait4logout(isert_conn); - - queue_work(isert_release_wq, &isert_conn->release_work); } static void isert_free_conn(struct iscsit_conn *conn) From 858fd168a95c5b9669aac8db6c14a9aeab446375 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Jun 2023 14:35:30 -0700 Subject: [PATCH 652/934] Linux 6.4-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 09866a85bc2a..0d3a9d3e73c1 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 20cb1c2fb7568a6054c55defe044311397e01ddb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 10 Jun 2023 07:42:49 +0800 Subject: [PATCH 653/934] blk-cgroup: Flush stats before releasing blkcg_gq As noted by Michal, the blkg_iostat_set's in the lockless list hold reference to blkg's to protect against their removal. Those blkg's hold reference to blkcg. When a cgroup is being destroyed, cgroup_rstat_flush() is only called at css_release_work_fn() which is called when the blkcg reference count reaches 0. This circular dependency will prevent blkcg and some blkgs from being freed after they are made offline. It is less a problem if the cgroup to be destroyed also has other controllers like memory that will call cgroup_rstat_flush() which will clean up the reference count. If block is the only controller that uses rstat, these offline blkcg and blkgs may never be freed leaking more and more memory over time. To prevent this potential memory leak: - flush blkcg per-cpu stats list in __blkg_release(), when no new stat can be added - add global blkg_stat_lock for covering concurrent parent blkg stat update - don't grab bio->bi_blkg reference when adding the stats into blkcg's per-cpu stat list since all stats are guaranteed to be consumed before releasing blkg instance, and grabbing blkg reference for stats was the most fragile part of original patch Based on Waiman's patch: https://lore.kernel.org/linux-block/20221215033132.230023-3-longman@redhat.com/ Fixes: 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()") Cc: stable@vger.kernel.org Reported-by: Jay Shin Acked-by: Tejun Heo Cc: Waiman Long Cc: mkoutny@suse.com Cc: Yosry Ahmed Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230609234249.1412858-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0ce64dd73cfe..f0b5c9c41cde 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -34,6 +34,8 @@ #include "blk-ioprio.h" #include "blk-throttle.h" +static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu); + /* * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. * blkcg_pol_register_mutex nests outside of it and synchronizes entire @@ -56,6 +58,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ bool blkcg_debug_stats = false; +static DEFINE_RAW_SPINLOCK(blkg_stat_lock); + #define BLKG_DESTROY_BATCH_SIZE 64 /* @@ -163,10 +167,20 @@ static void blkg_free(struct blkcg_gq *blkg) static void __blkg_release(struct rcu_head *rcu) { struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); + struct blkcg *blkcg = blkg->blkcg; + int cpu; #ifdef CONFIG_BLK_CGROUP_PUNT_BIO WARN_ON(!bio_list_empty(&blkg->async_bios)); #endif + /* + * Flush all the non-empty percpu lockless lists before releasing + * us, given these stat belongs to us. + * + * blkg_stat_lock is for serializing blkg stat update + */ + for_each_possible_cpu(cpu) + __blkcg_rstat_flush(blkcg, cpu); /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); @@ -951,23 +965,26 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } -static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) +static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) { - struct blkcg *blkcg = css_to_blkcg(css); struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu); struct llist_node *lnode; struct blkg_iostat_set *bisc, *next_bisc; - /* Root-level stats are sourced from system-wide IO stats */ - if (!cgroup_parent(css->cgroup)) - return; - rcu_read_lock(); lnode = llist_del_all(lhead); if (!lnode) goto out; + /* + * For covering concurrent parent blkg update from blkg_release(). + * + * When flushing from cgroup, cgroup_rstat_lock is always held, so + * this lock won't cause contention most of time. + */ + raw_spin_lock(&blkg_stat_lock); + /* * Iterate only the iostat_cpu's queued in the lockless list. */ @@ -991,13 +1008,19 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) if (parent && parent->parent) blkcg_iostat_update(parent, &blkg->iostat.cur, &blkg->iostat.last); - percpu_ref_put(&blkg->refcnt); } - + raw_spin_unlock(&blkg_stat_lock); out: rcu_read_unlock(); } +static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) +{ + /* Root-level stats are sourced from system-wide IO stats */ + if (cgroup_parent(css->cgroup)) + __blkcg_rstat_flush(css_to_blkcg(css), cpu); +} + /* * We source root cgroup stats from the system-wide stats to avoid * tracking the same information twice and incurring overhead when no @@ -2075,7 +2098,6 @@ void blk_cgroup_bio_start(struct bio *bio) llist_add(&bis->lnode, lhead); WRITE_ONCE(bis->lqueued, true); - percpu_ref_get(&bis->blkg->refcnt); } u64_stats_update_end_irqrestore(&bis->sync, flags); From a5998a9ec3c03297a5b7f3df5c1979cd469a0da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=91=AB=E5=8D=8E?= Date: Tue, 6 Jun 2023 23:34:56 +0800 Subject: [PATCH 654/934] smb: remove obsolete comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because do_gettimeofday has been removed and replaced by ktime_get_real_ts64, So just remove the comment as it's not needed now. Signed-off-by: 鑫华 Signed-off-by: Steve French --- fs/smb/client/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 24bdd5f4d3bc..0474d0bba0a2 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -55,7 +55,7 @@ alloc_mid(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command); - /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ + /* easier to use jiffies */ /* when mid allocated can be before when sent */ temp->when_alloc = jiffies; temp->server = server; From 91f4480c41f56f7c723323cf7f581f1d95d9ffbc Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 9 Jun 2023 17:46:54 +0000 Subject: [PATCH 655/934] cifs: fix status checks in cifs_tree_connect The ordering of status checks at the beginning of cifs_tree_connect is wrong. As a result, a tcon which is good may stay marked as needing reconnect infinitely. Fixes: 2f0e4f034220 ("cifs: check only tcon status on tcon related functions") Cc: stable@vger.kernel.org # 6.3 Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/connect.c | 9 +++++---- fs/smb/client/dfs.c | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 8e9a672320ab..1250d156619b 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -4086,16 +4086,17 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); + if (tcon->status == TID_GOOD) { + spin_unlock(&tcon->tc_lock); + return 0; + } + if (tcon->status != TID_NEW && tcon->status != TID_NEED_TCON) { spin_unlock(&tcon->tc_lock); return -EHOSTDOWN; } - if (tcon->status == TID_GOOD) { - spin_unlock(&tcon->tc_lock); - return 0; - } tcon->status = TID_IN_TCON; spin_unlock(&tcon->tc_lock); diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index 2f93bf8c3325..2390b2fedd6a 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -575,16 +575,17 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru /* only send once per connect */ spin_lock(&tcon->tc_lock); + if (tcon->status == TID_GOOD) { + spin_unlock(&tcon->tc_lock); + return 0; + } + if (tcon->status != TID_NEW && tcon->status != TID_NEED_TCON) { spin_unlock(&tcon->tc_lock); return -EHOSTDOWN; } - if (tcon->status == TID_GOOD) { - spin_unlock(&tcon->tc_lock); - return 0; - } tcon->status = TID_IN_TCON; spin_unlock(&tcon->tc_lock); From 2a44b389664c193dc06cb37d9663d3c5a2a2b743 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 9 Jun 2023 17:46:55 +0000 Subject: [PATCH 656/934] cifs: print all credit counters in DebugData Output of /proc/fs/cifs/DebugData shows only the per-connection counter for the number of credits of regular type. i.e. the credits reserved for echo and oplocks are not displayed. There have been situations recently where having this info would have been useful. This change prints the credit counters of all three types: regular, echo, oplocks. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 5034b862cec2..17c884724590 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -130,12 +130,14 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) struct TCP_Server_Info *server = chan->server; seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx" - "\n\t\tNumber of credits: %d Dialect 0x%x" + "\n\t\tNumber of credits: %d,%d,%d Dialect 0x%x" "\n\t\tTCP status: %d Instance: %d" "\n\t\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d" "\n\t\tIn Send: %d In MaxReq Wait: %d", i+1, server->conn_id, server->credits, + server->echo_credits, + server->oplock_credits, server->dialect, server->tcpStatus, server->reconnect_instance, @@ -350,8 +352,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) atomic_read(&server->smbd_conn->mr_used_count)); skip_rdma: #endif - seq_printf(m, "\nNumber of credits: %d Dialect 0x%x", - server->credits, server->dialect); + seq_printf(m, "\nNumber of credits: %d,%d,%d Dialect 0x%x", + server->credits, + server->echo_credits, + server->oplock_credits, + server->dialect); if (server->compress_algorithm == SMB3_COMPRESS_LZNT1) seq_printf(m, " COMPRESS_LZNT1"); else if (server->compress_algorithm == SMB3_COMPRESS_LZ77) From 50e63d6db6fd30a6dd9a33c49aa5b0bba36e1a92 Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Fri, 9 Jun 2023 18:29:59 -0300 Subject: [PATCH 657/934] smb/client: print "Unknown" instead of bogus link speed value The virtio driver for Linux guests will not set a link speed to its paravirtualized NICs. This will be seen as -1 in the ethernet layer, and when some servers (e.g. samba) fetches it, it's converted to an unsigned value (and multiplied by 1000 * 1000), so in client side we end up with: 1) Speed: 4294967295000000 bps in DebugData. This patch introduces a helper that returns a speed string (in Mbps or Gbps) if interface speed is valid (>= SPEED_10 and <= SPEED_800000), or "Unknown" otherwise. The reason to not change the value in iface->speed is because we don't know the real speed of the HW backing the server NIC, so let's keep considering these as the fastest NICs available. Also print "Capabilities: None" when the interface doesn't support any. Signed-off-by: Enzo Matsumiya Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 47 +++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 17c884724590..b279f745466e 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -148,18 +149,62 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan) atomic_read(&server->num_waiters)); } +static inline const char *smb_speed_to_str(size_t bps) +{ + size_t mbps = bps / 1000 / 1000; + + switch (mbps) { + case SPEED_10: + return "10Mbps"; + case SPEED_100: + return "100Mbps"; + case SPEED_1000: + return "1Gbps"; + case SPEED_2500: + return "2.5Gbps"; + case SPEED_5000: + return "5Gbps"; + case SPEED_10000: + return "10Gbps"; + case SPEED_14000: + return "14Gbps"; + case SPEED_20000: + return "20Gbps"; + case SPEED_25000: + return "25Gbps"; + case SPEED_40000: + return "40Gbps"; + case SPEED_50000: + return "50Gbps"; + case SPEED_56000: + return "56Gbps"; + case SPEED_100000: + return "100Gbps"; + case SPEED_200000: + return "200Gbps"; + case SPEED_400000: + return "400Gbps"; + case SPEED_800000: + return "800Gbps"; + default: + return "Unknown"; + } +} + static void cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface) { struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr; struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)&iface->sockaddr; - seq_printf(m, "\tSpeed: %zu bps\n", iface->speed); + seq_printf(m, "\tSpeed: %s\n", smb_speed_to_str(iface->speed)); seq_puts(m, "\t\tCapabilities: "); if (iface->rdma_capable) seq_puts(m, "rdma "); if (iface->rss_capable) seq_puts(m, "rss "); + if (!iface->rdma_capable && !iface->rss_capable) + seq_puts(m, "None"); seq_putc(m, '\n'); if (iface->sockaddr.ss_family == AF_INET) seq_printf(m, "\t\tIPv4: %pI4\n", &ipv4->sin_addr); From 2991b77409891e14a10b96899755c004b0c07edb Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 9 Jun 2023 17:46:59 +0000 Subject: [PATCH 658/934] cifs: fix sockaddr comparison in iface_cmp iface_cmp used to simply do a memcmp of the two provided struct sockaddrs. The comparison needs to do more based on the address family. Similar logic was already present in cifs_match_ipaddr. Doing something similar now. Signed-off-by: Shyam Prasad N Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 37 ----------------------------- fs/smb/client/cifsproto.h | 1 + fs/smb/client/connect.c | 50 +++++++++++++++++++++++++++++++++++++++ fs/smb/client/smb2ops.c | 37 +++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 37 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 0d84bb1a8cd9..b212a4e16b39 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -970,43 +970,6 @@ release_iface(struct kref *ref) kfree(iface); } -/* - * compare two interfaces a and b - * return 0 if everything matches. - * return 1 if a has higher link speed, or rdma capable, or rss capable - * return -1 otherwise. - */ -static inline int -iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b) -{ - int cmp_ret = 0; - - WARN_ON(!a || !b); - if (a->speed == b->speed) { - if (a->rdma_capable == b->rdma_capable) { - if (a->rss_capable == b->rss_capable) { - cmp_ret = memcmp(&a->sockaddr, &b->sockaddr, - sizeof(a->sockaddr)); - if (!cmp_ret) - return 0; - else if (cmp_ret > 0) - return 1; - else - return -1; - } else if (a->rss_capable > b->rss_capable) - return 1; - else - return -1; - } else if (a->rdma_capable > b->rdma_capable) - return 1; - else - return -1; - } else if (a->speed > b->speed) - return 1; - else - return -1; -} - struct cifs_chan { unsigned int in_reconnect : 1; /* if session setup in progress for this channel */ struct TCP_Server_Info *server; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c1c704990b98..d127aded2f28 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -87,6 +87,7 @@ extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); extern int smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx); extern int smb3_parse_opt(const char *options, const char *key, char **val); +extern int cifs_ipaddr_cmp(struct sockaddr *srcaddr, struct sockaddr *rhs); extern bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs); extern int cifs_discard_remaining_data(struct TCP_Server_Info *server); extern int cifs_call_async(struct TCP_Server_Info *server, diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 1250d156619b..9d16626e7a66 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1288,6 +1288,56 @@ next_pdu: module_put_and_kthread_exit(0); } +int +cifs_ipaddr_cmp(struct sockaddr *srcaddr, struct sockaddr *rhs) +{ + struct sockaddr_in *saddr4 = (struct sockaddr_in *)srcaddr; + struct sockaddr_in *vaddr4 = (struct sockaddr_in *)rhs; + struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr; + struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs; + + switch (srcaddr->sa_family) { + case AF_UNSPEC: + switch (rhs->sa_family) { + case AF_UNSPEC: + return 0; + case AF_INET: + case AF_INET6: + return 1; + default: + return -1; + } + case AF_INET: { + switch (rhs->sa_family) { + case AF_UNSPEC: + return -1; + case AF_INET: + return memcmp(saddr4, vaddr4, + sizeof(struct sockaddr_in)); + case AF_INET6: + return 1; + default: + return -1; + } + } + case AF_INET6: { + switch (rhs->sa_family) { + case AF_UNSPEC: + case AF_INET: + return -1; + case AF_INET6: + return memcmp(saddr6, + vaddr6, + sizeof(struct sockaddr_in6)); + default: + return -1; + } + } + default: + return -1; /* don't expect to be here */ + } +} + /* * Returns true if srcaddr isn't specified and rhs isn't specified, or * if srcaddr is specified and matches the IP address of the rhs argument diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 6e3be58cfe49..8bceb54ab061 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -510,6 +510,43 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) return rsize; } +/* + * compare two interfaces a and b + * return 0 if everything matches. + * return 1 if a is rdma capable, or rss capable, or has higher link speed + * return -1 otherwise. + */ +static int +iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b) +{ + int cmp_ret = 0; + + WARN_ON(!a || !b); + if (a->rdma_capable == b->rdma_capable) { + if (a->rss_capable == b->rss_capable) { + if (a->speed == b->speed) { + cmp_ret = cifs_ipaddr_cmp((struct sockaddr *) &a->sockaddr, + (struct sockaddr *) &b->sockaddr); + if (!cmp_ret) + return 0; + else if (cmp_ret > 0) + return 1; + else + return -1; + } else if (a->speed > b->speed) + return 1; + else + return -1; + } else if (a->rss_capable > b->rss_capable) + return 1; + else + return -1; + } else if (a->rdma_capable > b->rdma_capable) + return 1; + else + return -1; +} + static int parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, size_t buf_len, struct cifs_ses *ses, bool in_mount) From 5e90aa21eb1372736e08cee0c0bf47735c5c4b95 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 9 Jun 2023 17:46:58 +0000 Subject: [PATCH 659/934] cifs: fix max_credits implementation The current implementation of max_credits on the client does not work because the CreditRequest logic for several commands does not take max_credits into account. Still, we can end up asking the server for more credits, depending on the number of credits in flight. For this, we need to limit the credits while parsing the responses too. Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 2 ++ fs/smb/client/smb2pdu.c | 32 ++++++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 8bceb54ab061..bbb19bbb14c9 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -34,6 +34,8 @@ static int change_conf(struct TCP_Server_Info *server) { server->credits += server->echo_credits + server->oplock_credits; + if (server->credits > server->max_credits) + server->credits = server->max_credits; server->oplock_credits = server->echo_credits = 0; switch (server->credits) { case 0: diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 7063b395d22f..17fe212ab895 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -1305,7 +1305,12 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) } /* enough to enable echos and oplocks and one max size write */ - req->hdr.CreditRequest = cpu_to_le16(130); + if (server->credits >= server->max_credits) + req->hdr.CreditRequest = cpu_to_le16(0); + else + req->hdr.CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, 130)); /* only one of SMB2 signing flags may be set in SMB2 request */ if (server->sign) @@ -1899,7 +1904,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, rqst.rq_nvec = 2; /* Need 64 for max size write so ask for more in case not there yet */ - req->hdr.CreditRequest = cpu_to_le16(64); + if (server->credits >= server->max_credits) + req->hdr.CreditRequest = cpu_to_le16(0); + else + req->hdr.CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, 64)); rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -4227,6 +4237,7 @@ smb2_async_readv(struct cifs_readdata *rdata) struct TCP_Server_Info *server; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); unsigned int total_len; + int credit_request; cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", __func__, rdata->offset, rdata->bytes); @@ -4258,7 +4269,13 @@ smb2_async_readv(struct cifs_readdata *rdata) if (rdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8); + credit_request = le16_to_cpu(shdr->CreditCharge) + 8; + if (server->credits >= server->max_credits) + shdr->CreditRequest = cpu_to_le16(0); + else + shdr->CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, credit_request)); rc = adjust_credits(server, &rdata->credits, rdata->bytes); if (rc) @@ -4468,6 +4485,7 @@ smb2_async_writev(struct cifs_writedata *wdata, unsigned int total_len; struct cifs_io_parms _io_parms; struct cifs_io_parms *io_parms = NULL; + int credit_request; if (!wdata->server) server = wdata->server = cifs_pick_channel(tcon->ses); @@ -4572,7 +4590,13 @@ smb2_async_writev(struct cifs_writedata *wdata, if (wdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8); + credit_request = le16_to_cpu(shdr->CreditCharge) + 8; + if (server->credits >= server->max_credits) + shdr->CreditRequest = cpu_to_le16(0); + else + shdr->CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, credit_request)); rc = adjust_credits(server, &wdata->credits, io_parms->length); if (rc) From 52f79609c0c5b25fddb88e85f25ce08aa7e3fb42 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 8 Jun 2023 09:23:44 -0700 Subject: [PATCH 660/934] net: ethtool: correct MAX attribute value for stats When compiling YNL generated code compiler complains about array-initializer-out-of-bounds. Turns out the MAX value for STATS_GRP uses the value for STATS. This may lead to random corruptions in user space (kernel itself doesn't use this value as it never parses stats). Fixes: f09ea6fb1272 ("ethtool: add a new command for reading standard stats") Signed-off-by: Jakub Kicinski Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 1ebf8d455f07..73e2c10dc2cc 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -783,7 +783,7 @@ enum { /* add new constants above here */ __ETHTOOL_A_STATS_GRP_CNT, - ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_CNT - 1) + ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) }; enum { From d094482c9974a543851a18a1c587a7d132a81659 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Sun, 11 Jun 2023 12:14:29 +0300 Subject: [PATCH 661/934] wifi: mac80211: fragment per STA profile correctly When fragmenting the ML per STA profile, the element ID should be IEEE80211_MLE_SUBELEM_PER_STA_PROFILE rather than WLAN_EID_FRAGMENT. Change the helper function to take the to be used element ID and pass the appropriate value for each of the fragmentation levels. Fixes: 81151ce462e5 ("wifi: mac80211: support MLO authentication/association with one link") Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230611121219.9b5c793d904b.I7dad952bea8e555e2f3139fbd415d0cd2b3a08c3@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 5 +++-- net/mac80211/util.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b0372e76f373..4159fb65038b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2312,7 +2312,7 @@ ieee802_11_parse_elems(const u8 *start, size_t len, bool action, return ieee802_11_parse_elems_crc(start, len, action, 0, 0, bss); } -void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos); +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id); extern const int ieee802_1d_to_ac[8]; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bd8d6f9545f5..5a4303130ef2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1372,10 +1372,11 @@ static void ieee80211_assoc_add_ml_elem(struct ieee80211_sub_if_data *sdata, ieee80211_add_non_inheritance_elem(skb, outer_present_elems, link_present_elems); - ieee80211_fragment_element(skb, subelem_len); + ieee80211_fragment_element(skb, subelem_len, + IEEE80211_MLE_SUBELEM_FRAGMENT); } - ieee80211_fragment_element(skb, ml_elem_len); + ieee80211_fragment_element(skb, ml_elem_len, WLAN_EID_FRAGMENT); } static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 4bf76150925d..3bd07a0a782f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -5049,7 +5049,7 @@ u8 *ieee80211_ie_build_eht_cap(u8 *pos, return pos; } -void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos) +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id) { unsigned int elem_len; @@ -5069,7 +5069,7 @@ void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos) memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len); /* place the fragment ID */ len_pos += 255 + 1; - *len_pos = WLAN_EID_FRAGMENT; + *len_pos = frag_id; /* and point to fragment length to update later */ len_pos++; } From ce57adc222aba32431c42632b396e9213d0eb0b8 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 9 Jun 2023 17:15:02 +0800 Subject: [PATCH 662/934] ipvlan: fix bound dev checking for IPv6 l3s mode The commit 59a0b022aa24 ("ipvlan: Make skb->skb_iif track skb->dev for l3s mode") fixed ipvlan bonded dev checking by updating skb skb_iif. This fix works for IPv4, as in raw_v4_input() the dif is from inet_iif(skb), which is skb->skb_iif when there is no route. But for IPv6, the fix is not enough, because in ipv6_raw_deliver() -> raw_v6_match(), the dif is inet6_iif(skb), which is returns IP6CB(skb)->iif instead of skb->skb_iif if it's not a l3_slave. To fix the IPv6 part issue. Let's set IP6CB(skb)->iif to correct ifindex. BTW, ipvlan handles NS/NA specifically. Since it works fine, I will not reset IP6CB(skb)->iif when addr->atype is IPVL_ICMPV6. Fixes: c675e06a98a4 ("ipvlan: decouple l3s mode dependencies from other modes") Link: https://bugzilla.redhat.com/show_bug.cgi?id=2196710 Signed-off-by: Hangbin Liu Reviewed-by: Larysa Zaremba Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_l3s.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c index 71712ea25403..d5b05e803219 100644 --- a/drivers/net/ipvlan/ipvlan_l3s.c +++ b/drivers/net/ipvlan/ipvlan_l3s.c @@ -102,6 +102,10 @@ static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, skb->dev = addr->master->dev; skb->skb_iif = skb->dev->ifindex; +#if IS_ENABLED(CONFIG_IPV6) + if (addr->atype == IPVL_IPV6) + IP6CB(skb)->iif = skb->dev->ifindex; +#endif len = skb->len + ETH_HLEN; ipvlan_count_rx(addr->master, len, true, false); out: From a0067dfcd9418fd3b0632bc59210d120d038a9c6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Jun 2023 14:04:43 +0300 Subject: [PATCH 663/934] sctp: handle invalid error codes without calling BUG() The sctp_sf_eat_auth() function is supposed to return enum sctp_disposition values but if the call to sctp_ulpevent_make_authkey() fails, it returns -ENOMEM. This results in calling BUG() inside the sctp_side_effects() function. Calling BUG() is an over reaction and not helpful. Call WARN_ON_ONCE() instead. This code predates git. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 7fbeb99d8d32..23d6633966b1 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1250,7 +1250,10 @@ static int sctp_side_effects(enum sctp_event_type event_type, default: pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n", status, state, event_type, subtype.chunk); - BUG(); + error = status; + if (error >= 0) + error = -EINVAL; + WARN_ON_ONCE(1); break; } From 75e6def3b26736e7ff80639810098c9074229737 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Jun 2023 14:05:19 +0300 Subject: [PATCH 664/934] sctp: fix an error code in sctp_sf_eat_auth() The sctp_sf_eat_auth() function is supposed to enum sctp_disposition values and returning a kernel error code will cause issues in the caller. Change -ENOMEM to SCTP_DISPOSITION_NOMEM. Fixes: 65b07e5d0d09 ("[SCTP]: API updates to suport SCTP-AUTH extensions.") Signed-off-by: Dan Carpenter Acked-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 97f1155a2045..08fdf1251f46 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4482,7 +4482,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net, SCTP_AUTH_NEW_KEY, GFP_ATOMIC); if (!ev) - return -ENOMEM; + return SCTP_DISPOSITION_NOMEM; sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); From 842665a9008a53ff13ac22a4e4b8ae2f10e92aca Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Jun 2023 16:38:47 +0800 Subject: [PATCH 665/934] xfrm: Use xfrm_state selector for BEET input For BEET the inner address and therefore family is stored in the xfrm_state selector. Use that when decapsulating an input packet instead of incorrectly relying on a non-existent tunnel protocol. Fixes: 5f24f41e8ea6 ("xfrm: Remove inner/outer modes from input path") Reported-by: Steffen Klassert Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index b731687b5f3e..815b38080401 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -331,11 +331,10 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x, { switch (x->props.mode) { case XFRM_MODE_BEET: - switch (XFRM_MODE_SKB_CB(skb)->protocol) { - case IPPROTO_IPIP: - case IPPROTO_BEETPH: + switch (x->sel.family) { + case AF_INET: return xfrm4_remove_beet_encap(x, skb); - case IPPROTO_IPV6: + case AF_INET6: return xfrm6_remove_beet_encap(x, skb); } break; From e6f9e590b72e12bbb86b1b8be7e1981f357392ad Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 18 May 2023 11:39:36 +0200 Subject: [PATCH 666/934] mmc: sdhci-msm: Disable broken 64-bit DMA on MSM8916 While SDHCI claims to support 64-bit DMA on MSM8916 it does not seem to be properly functional. It is not immediately obvious because SDHCI is usually used with IOMMU bypassed on this SoC, and all physical memory has 32-bit addresses. But when trying to enable the IOMMU it quickly fails with an error such as the following: arm-smmu 1e00000.iommu: Unhandled context fault: fsr=0x402, iova=0xfffff200, fsynr=0xe0000, cbfrsynra=0x140, cb=3 mmc1: ADMA error: 0x02000000 mmc1: sdhci: ============ SDHCI REGISTER DUMP =========== mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00002e02 mmc1: sdhci: Blk size: 0x00000008 | Blk cnt: 0x00000000 mmc1: sdhci: Argument: 0x00000000 | Trn mode: 0x00000013 mmc1: sdhci: Present: 0x03f80206 | Host ctl: 0x00000019 mmc1: sdhci: Power: 0x0000000f | Blk gap: 0x00000000 mmc1: sdhci: Wake-up: 0x00000000 | Clock: 0x00000007 mmc1: sdhci: Timeout: 0x0000000a | Int stat: 0x00000001 mmc1: sdhci: Int enab: 0x03ff900b | Sig enab: 0x03ff100b mmc1: sdhci: ACmd stat: 0x00000000 | Slot int: 0x00000000 mmc1: sdhci: Caps: 0x322dc8b2 | Caps_1: 0x00008007 mmc1: sdhci: Cmd: 0x0000333a | Max curr: 0x00000000 mmc1: sdhci: Resp[0]: 0x00000920 | Resp[1]: 0x5b590000 mmc1: sdhci: Resp[2]: 0xe6487f80 | Resp[3]: 0x0a404094 mmc1: sdhci: Host ctl2: 0x00000008 mmc1: sdhci: ADMA Err: 0x00000001 | ADMA Ptr: 0x0000000ffffff224 mmc1: sdhci_msm: ----------- VENDOR REGISTER DUMP ----------- mmc1: sdhci_msm: DLL sts: 0x00000000 | DLL cfg: 0x60006400 | DLL cfg2: 0x00000000 mmc1: sdhci_msm: DLL cfg3: 0x00000000 | DLL usr ctl: 0x00000000 | DDR cfg: 0x00000000 mmc1: sdhci_msm: Vndr func: 0x00018a9c | Vndr func2 : 0xf88018a8 Vndr func3: 0x00000000 mmc1: sdhci: ============================================ mmc1: sdhci: fffffffff200: DMA 0x0000ffffffffe100, LEN 0x0008, Attr=0x21 mmc1: sdhci: fffffffff20c: DMA 0x0000000000000000, LEN 0x0000, Attr=0x03 Looking closely it's obvious that only the 32-bit part of the address (0xfffff200) arrives at the SMMU, the higher 16-bit (0xffff...) get lost somewhere. This might not be a limitation of the SDHCI itself but perhaps the bus/interconnect it is connected to, or even the connection to the SMMU. Work around this by setting SDHCI_QUIRK2_BROKEN_64_BIT_DMA to avoid using 64-bit addresses. Signed-off-by: Stephan Gerhold Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230518-msm8916-64bit-v1-1-5694b0f35211@gerhold.net Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 8ac81d57a3df..1877d583fe8c 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -2479,6 +2479,9 @@ static inline void sdhci_msm_get_of_property(struct platform_device *pdev, msm_host->ddr_config = DDR_CONFIG_POR_VAL; of_property_read_u32(node, "qcom,dll-config", &msm_host->dll_config); + + if (of_device_is_compatible(node, "qcom,msm8916-sdhci")) + host->quirks2 |= SDHCI_QUIRK2_BROKEN_64_BIT_DMA; } static int sdhci_msm_gcc_reset(struct device *dev, struct sdhci_host *host) From ad96f1c9138e0897bee7f7c5e54b3e24f8b62f57 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 8 Jun 2023 17:54:39 -0700 Subject: [PATCH 667/934] bpf: Fix a bpf_jit_dump issue for x86_64 with sysctl bpf_jit_enable. The sysctl net/core/bpf_jit_enable does not work now due to commit 1022a5498f6f ("bpf, x86_64: Use bpf_jit_binary_pack_alloc"). The commit saved the jitted insns into 'rw_image' instead of 'image' which caused bpf_jit_dump not dumping proper content. With 'echo 2 > /proc/sys/net/core/bpf_jit_enable', run './test_progs -t fentry_test'. Without this patch, one of jitted image for one particular prog is: flen=17 proglen=92 pass=4 image=0000000014c64883 from=test_progs pid=1807 00000000: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 00000010: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 00000020: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 00000030: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 00000040: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 00000050: cc cc cc cc cc cc cc cc cc cc cc cc With this patch, the jitte image for the same prog is: flen=17 proglen=92 pass=4 image=00000000b90254b7 from=test_progs pid=1809 00000000: f3 0f 1e fa 0f 1f 44 00 00 66 90 55 48 89 e5 f3 00000010: 0f 1e fa 31 f6 48 8b 57 00 48 83 fa 07 75 2b 48 00000020: 8b 57 10 83 fa 09 75 22 48 8b 57 08 48 81 e2 ff 00000030: 00 00 00 48 83 fa 08 75 11 48 8b 7f 18 be 01 00 00000040: 00 00 48 83 ff 0a 74 02 31 f6 48 bf 18 d0 14 00 00000050: 00 c9 ff ff 48 89 77 00 31 c0 c9 c3 Fixes: 1022a5498f6f ("bpf, x86_64: Use bpf_jit_binary_pack_alloc") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20230609005439.3173569-1-yhs@fb.com --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 1056bbf55b17..438adb695daa 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2570,7 +2570,7 @@ out_image: } if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, proglen, pass + 1, image); + bpf_jit_dump(prog->len, proglen, pass + 1, rw_image); if (image) { if (!prog->is_func || extra_pass) { From f0cc749254d12c78e93dae3b27b21dc9546843d0 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 11 Jun 2023 22:48:12 +0900 Subject: [PATCH 668/934] cgroup,freezer: hold cpu_hotplug_lock before freezer_mutex in freezer_css_{online,offline}() syzbot is again reporting circular locking dependency between cpu_hotplug_lock and freezer_mutex. Do like what we did with commit 57dcd64c7e036299 ("cgroup,freezer: hold cpu_hotplug_lock before freezer_mutex"). Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=2ab700fe1829880a2ec6 Signed-off-by: Tetsuo Handa Tested-by: syzbot Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Tejun Heo --- kernel/cgroup/legacy_freezer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index 936473203a6b..122dacb3a443 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -108,16 +108,18 @@ static int freezer_css_online(struct cgroup_subsys_state *css) struct freezer *freezer = css_freezer(css); struct freezer *parent = parent_freezer(freezer); + cpus_read_lock(); mutex_lock(&freezer_mutex); freezer->state |= CGROUP_FREEZER_ONLINE; if (parent && (parent->state & CGROUP_FREEZING)) { freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; - static_branch_inc(&freezer_active); + static_branch_inc_cpuslocked(&freezer_active); } mutex_unlock(&freezer_mutex); + cpus_read_unlock(); return 0; } @@ -132,14 +134,16 @@ static void freezer_css_offline(struct cgroup_subsys_state *css) { struct freezer *freezer = css_freezer(css); + cpus_read_lock(); mutex_lock(&freezer_mutex); if (freezer->state & CGROUP_FREEZING) - static_branch_dec(&freezer_active); + static_branch_dec_cpuslocked(&freezer_active); freezer->state = 0; mutex_unlock(&freezer_mutex); + cpus_read_unlock(); } static void freezer_css_free(struct cgroup_subsys_state *css) From 6f363f5aa845561f7ea496d8b1175e3204470486 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Sat, 10 Jun 2023 17:26:43 +0800 Subject: [PATCH 669/934] cgroup: Do not corrupt task iteration when rebinding subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We found a refcount UAF bug as follows: refcount_t: addition on 0; use-after-free. WARNING: CPU: 1 PID: 342 at lib/refcount.c:25 refcount_warn_saturate+0xa0/0x148 Workqueue: events cpuset_hotplug_workfn Call trace: refcount_warn_saturate+0xa0/0x148 __refcount_add.constprop.0+0x5c/0x80 css_task_iter_advance_css_set+0xd8/0x210 css_task_iter_advance+0xa8/0x120 css_task_iter_next+0x94/0x158 update_tasks_root_domain+0x58/0x98 rebuild_root_domains+0xa0/0x1b0 rebuild_sched_domains_locked+0x144/0x188 cpuset_hotplug_workfn+0x138/0x5a0 process_one_work+0x1e8/0x448 worker_thread+0x228/0x3e0 kthread+0xe0/0xf0 ret_from_fork+0x10/0x20 then a kernel panic will be triggered as below: Unable to handle kernel paging request at virtual address 00000000c0000010 Call trace: cgroup_apply_control_disable+0xa4/0x16c rebind_subsystems+0x224/0x590 cgroup_destroy_root+0x64/0x2e0 css_free_rwork_fn+0x198/0x2a0 process_one_work+0x1d4/0x4bc worker_thread+0x158/0x410 kthread+0x108/0x13c ret_from_fork+0x10/0x18 The race that cause this bug can be shown as below: (hotplug cpu) | (umount cpuset) mutex_lock(&cpuset_mutex) | mutex_lock(&cgroup_mutex) cpuset_hotplug_workfn | rebuild_root_domains | rebind_subsystems update_tasks_root_domain | spin_lock_irq(&css_set_lock) css_task_iter_start | list_move_tail(&cset->e_cset_node[ss->id] while(css_task_iter_next) | &dcgrp->e_csets[ss->id]); css_task_iter_end | spin_unlock_irq(&css_set_lock) mutex_unlock(&cpuset_mutex) | mutex_unlock(&cgroup_mutex) Inside css_task_iter_start/next/end, css_set_lock is hold and then released, so when iterating task(left side), the css_set may be moved to another list(right side), then it->cset_head points to the old list head and it->cset_pos->next points to the head node of new list, which can't be used as struct css_set. To fix this issue, switch from all css_sets to only scgrp's css_sets to patch in-flight iterators to preserve correct iteration, and then update it->cset_head as well. Reported-by: Gaosheng Cui Link: https://www.spinics.net/lists/cgroups/msg37935.html Suggested-by: Michal Koutný Link: https://lore.kernel.org/all/20230526114139.70274-1-xiujianfeng@huaweicloud.com/ Signed-off-by: Xiu Jianfeng Fixes: 2d8f243a5e6e ("cgroup: implement cgroup->e_csets[]") Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 245cf62ce85a..4d42f0cbc11e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1798,7 +1798,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) { struct cgroup *dcgrp = &dst_root->cgrp; struct cgroup_subsys *ss; - int ssid, i, ret; + int ssid, ret; u16 dfl_disable_ss_mask = 0; lockdep_assert_held(&cgroup_mutex); @@ -1842,7 +1842,8 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) struct cgroup_root *src_root = ss->root; struct cgroup *scgrp = &src_root->cgrp; struct cgroup_subsys_state *css = cgroup_css(scgrp, ss); - struct css_set *cset; + struct css_set *cset, *cset_pos; + struct css_task_iter *it; WARN_ON(!css || cgroup_css(dcgrp, ss)); @@ -1860,9 +1861,22 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) css->cgroup = dcgrp; spin_lock_irq(&css_set_lock); - hash_for_each(css_set_table, i, cset, hlist) + WARN_ON(!list_empty(&dcgrp->e_csets[ss->id])); + list_for_each_entry_safe(cset, cset_pos, &scgrp->e_csets[ss->id], + e_cset_node[ss->id]) { list_move_tail(&cset->e_cset_node[ss->id], &dcgrp->e_csets[ss->id]); + /* + * all css_sets of scgrp together in same order to dcgrp, + * patch in-flight iterators to preserve correct iteration. + * since the iterator is always advanced right away and + * finished when it->cset_pos meets it->cset_head, so only + * update it->cset_head is enough here. + */ + list_for_each_entry(it, &cset->task_iters, iters_node) + if (it->cset_head == &scgrp->e_csets[ss->id]) + it->cset_head = &dcgrp->e_csets[ss->id]; + } spin_unlock_irq(&css_set_lock); if (ss->css_rstat_flush) { From fd37b884003c7e46a0337b6e9212326d3ee1f40d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 12 Jun 2023 12:11:57 -0600 Subject: [PATCH 670/934] io_uring/io-wq: don't clear PF_IO_WORKER on exit A recent commit gated the core dumping task exit logic on current->flags remaining consistent in terms of PF_{IO,USER}_WORKER at task exit time. This exposed a problem with the io-wq handling of that, which explicitly clears PF_IO_WORKER before calling do_exit(). The reasons for this manual clear of PF_IO_WORKER is historical, where io-wq used to potentially trigger a sleep on exit. As the io-wq thread is exiting, it should not participate any further accounting. But these days we don't need to rely on current->flags anymore, so we can safely remove the PF_IO_WORKER clearing. Reported-by: Zorro Lang Reported-by: Dave Chinner Link: https://lore.kernel.org/all/ZIZSPyzReZkGBEFy@dread.disaster.area/ Fixes: f9010dbdce91 ("fork, vhost: Use CLONE_THREAD to fix freezer/ps regression") Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- io_uring/io-wq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index b2715988791e..fe38eb0cbc82 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -221,9 +221,6 @@ static void io_worker_exit(struct io_worker *worker) raw_spin_unlock(&wq->lock); io_wq_dec_running(worker); worker->flags = 0; - preempt_disable(); - current->flags &= ~PF_IO_WORKER; - preempt_enable(); kfree_rcu(worker, rcu); io_worker_ref_put(wq); From 3d6f6d2b584e7a629158be8a3f44f5de00b337ee Mon Sep 17 00:00:00 2001 From: Alexandre Mergnat Date: Fri, 26 May 2023 15:10:43 +0200 Subject: [PATCH 671/934] clk: mediatek: mt8365: Fix index issue Before the patch [1], the clock probe was done directly in the clk-mt8365 driver. In this probe function, the array which stores the data clocks is sized using the higher defined numbers (*_NR_CLOCK) in the clock lists [2]. Currently, with the patch [1], the specific clk-mt8365 probe function is replaced by the mtk generic one [3], which size the clock data array by adding all the clock descriptor array size provided by the clk-mt8365 driver. Actually, all clock indexes come from the header file [2], that mean, if there are more clock (then more index) in the header file [2] than the number of clock declared in the clock descriptor arrays (which is the case currently), the clock data array will be undersized and then the generic probe function will overflow when it will try to write in "clk_data[CLK_INDEX]". Actually, instead of crashing at boot, the probe function returns an error in the log which looks like: "of_clk_hw_onecell_get: invalid index 135", then this clock isn't enabled. Solve this issue by adding in the driver the missing clocks declared in the header clock file [2]. [1]: Commit ffe91cb28f6a ("clk: mediatek: mt8365: Convert to mtk_clk_simple_{probe,remove}()") [2]: include/dt-bindings/clock/mediatek,mt8365-clk.h [3]: drivers/clk/mediatek/clk-mtk.c Fixes: ffe91cb28f6a ("clk: mediatek: mt8365: Convert to mtk_clk_simple_{probe,remove}()") Signed-off-by: Alexandre Mergnat Link: https://lore.kernel.org/r/20230517-fix-clk-index-v3-1-be4df46065c4@baylibre.com Tested-by: Markus Schneider-Pargmann Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/clk-mt8365.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/clk/mediatek/clk-mt8365.c b/drivers/clk/mediatek/clk-mt8365.c index 6b4e193f648d..c366ac1611e6 100644 --- a/drivers/clk/mediatek/clk-mt8365.c +++ b/drivers/clk/mediatek/clk-mt8365.c @@ -23,6 +23,7 @@ static DEFINE_SPINLOCK(mt8365_clk_lock); static const struct mtk_fixed_clk top_fixed_clks[] = { + FIXED_CLK(CLK_TOP_CLK_NULL, "clk_null", NULL, 0), FIXED_CLK(CLK_TOP_I2S0_BCK, "i2s0_bck", NULL, 26000000), FIXED_CLK(CLK_TOP_DSI0_LNTC_DSICK, "dsi0_lntc_dsick", "clk26m", 75000000), @@ -559,6 +560,14 @@ static const struct mtk_clk_divider top_adj_divs[] = { 0x324, 16, 8, CLK_DIVIDER_ROUND_CLOSEST), DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV3, "apll12_ck_div3", "apll_i2s3_sel", 0x324, 24, 8, CLK_DIVIDER_ROUND_CLOSEST), + DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV4, "apll12_ck_div4", "apll_tdmout_sel", + 0x328, 0, 8, CLK_DIVIDER_ROUND_CLOSEST), + DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV4B, "apll12_ck_div4b", "apll_tdmout_sel", + 0x328, 8, 8, CLK_DIVIDER_ROUND_CLOSEST), + DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV5, "apll12_ck_div5", "apll_tdmin_sel", + 0x328, 16, 8, CLK_DIVIDER_ROUND_CLOSEST), + DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV5B, "apll12_ck_div5b", "apll_tdmin_sel", + 0x328, 24, 8, CLK_DIVIDER_ROUND_CLOSEST), DIV_ADJ_F(CLK_TOP_APLL12_CK_DIV6, "apll12_ck_div6", "apll_spdif_sel", 0x32c, 0, 8, CLK_DIVIDER_ROUND_CLOSEST), }; @@ -696,6 +705,7 @@ static const struct mtk_gate ifr_clks[] = { GATE_IFR3(CLK_IFR_GCPU, "ifr_gcpu", "axi_sel", 8), GATE_IFR3(CLK_IFR_TRNG, "ifr_trng", "axi_sel", 9), GATE_IFR3(CLK_IFR_AUXADC, "ifr_auxadc", "clk26m", 10), + GATE_IFR3(CLK_IFR_CPUM, "ifr_cpum", "clk26m", 11), GATE_IFR3(CLK_IFR_AUXADC_MD, "ifr_auxadc_md", "clk26m", 14), GATE_IFR3(CLK_IFR_AP_DMA, "ifr_ap_dma", "axi_sel", 18), GATE_IFR3(CLK_IFR_DEBUGSYS, "ifr_debugsys", "axi_sel", 24), @@ -717,6 +727,8 @@ static const struct mtk_gate ifr_clks[] = { GATE_IFR5(CLK_IFR_PWRAP_TMR, "ifr_pwrap_tmr", "clk26m", 12), GATE_IFR5(CLK_IFR_PWRAP_SPI, "ifr_pwrap_spi", "clk26m", 13), GATE_IFR5(CLK_IFR_PWRAP_SYS, "ifr_pwrap_sys", "clk26m", 14), + GATE_MTK_FLAGS(CLK_IFR_MCU_PM_BK, "ifr_mcu_pm_bk", NULL, &ifr5_cg_regs, + 17, &mtk_clk_gate_ops_setclr, CLK_IGNORE_UNUSED), GATE_IFR5(CLK_IFR_IRRX_26M, "ifr_irrx_26m", "clk26m", 22), GATE_IFR5(CLK_IFR_IRRX_32K, "ifr_irrx_32k", "clk32k", 23), GATE_IFR5(CLK_IFR_I2C0_AXI, "ifr_i2c0_axi", "i2c_sel", 24), From e43516f5978d11d36511ce63d31d1da4db916510 Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Mon, 15 May 2023 23:49:36 +0800 Subject: [PATCH 672/934] igc: Clean the TX buffer and TX descriptor ring There could be a race condition during link down where interrupt being generated and igc_clean_tx_irq() been called to perform the TX completion. Properly clear the TX buffer/descriptor ring and disable the TX Queue ring in igc_free_tx_resources() to avoid that. Kernel trace: [ 108.237177] Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLIFUI1.R00.4204.A00.2105270302 05/27/2021 [ 108.237178] RIP: 0010:refcount_warn_saturate+0x55/0x110 [ 108.242143] RSP: 0018:ffff9e7980003db0 EFLAGS: 00010286 [ 108.245555] Code: 84 bc 00 00 00 c3 cc cc cc cc 85 f6 74 46 80 3d 20 8c 4d 01 00 75 ee 48 c7 c7 88 f4 03 ab c6 05 10 8c 4d 01 01 e8 0b 10 96 ff <0f> 0b c3 cc cc cc cc 80 3d fc 8b 4d 01 00 75 cb 48 c7 c7 b0 f4 03 [ 108.250434] [ 108.250434] RSP: 0018:ffff9e798125f910 EFLAGS: 00010286 [ 108.254358] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 108.259325] [ 108.259325] RAX: 0000000000000000 RBX: ffff8ddb935b8000 RCX: 0000000000000027 [ 108.261868] RDX: ffff8de250a28800 RSI: ffff8de250a1c580 RDI: ffff8de250a1c580 [ 108.265538] RDX: 0000000000000027 RSI: 0000000000000002 RDI: ffff8de250a9c588 [ 108.265539] RBP: ffff8ddb935b8000 R08: ffffffffab2655a0 R09: ffff9e798125f898 [ 108.267914] RBP: ffff8ddb8a5b8d80 R08: 0000005648eba354 R09: 0000000000000000 [ 108.270196] R10: 0000000000000001 R11: 000000002d2d2d2d R12: ffff9e798125f948 [ 108.270197] R13: ffff9e798125fa1c R14: ffff8ddb8a5b8d80 R15: 7fffffffffffffff [ 108.273001] R10: 000000002d2d2d2d R11: 000000002d2d2d2d R12: ffff8ddb8a5b8ed4 [ 108.276410] FS: 00007f605851b740(0000) GS:ffff8de250a80000(0000) knlGS:0000000000000000 [ 108.280597] R13: 00000000000002ac R14: 00000000ffffff99 R15: ffff8ddb92561b80 [ 108.282966] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 108.282967] CR2: 00007f053c039248 CR3: 0000000185850003 CR4: 0000000000f70ee0 [ 108.286206] FS: 0000000000000000(0000) GS:ffff8de250a00000(0000) knlGS:0000000000000000 [ 108.289701] PKRU: 55555554 [ 108.289702] Call Trace: [ 108.289704] [ 108.293977] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 108.297562] sock_alloc_send_pskb+0x20c/0x240 [ 108.301494] CR2: 00007f053c03a168 CR3: 0000000184394002 CR4: 0000000000f70ef0 [ 108.301495] PKRU: 55555554 [ 108.306464] __ip_append_data.isra.0+0x96f/0x1040 [ 108.309441] Call Trace: [ 108.309443] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 108.314927] [ 108.314928] sock_wfree+0x1c7/0x1d0 [ 108.318078] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 108.320276] skb_release_head_state+0x32/0x90 [ 108.324812] ip_make_skb+0xf6/0x130 [ 108.327188] skb_release_all+0x16/0x40 [ 108.330775] ? udp_sendmsg+0x9f3/0xcb0 [ 108.332626] napi_consume_skb+0x48/0xf0 [ 108.334134] ? xfrm_lookup_route+0x23/0xb0 [ 108.344285] igc_poll+0x787/0x1620 [igc] [ 108.346659] udp_sendmsg+0x9f3/0xcb0 [ 108.360010] ? ttwu_do_activate+0x40/0x220 [ 108.365237] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 108.366744] ? try_to_wake_up+0x289/0x5e0 [ 108.376987] ? sock_sendmsg+0x81/0x90 [ 108.395698] ? __pfx_process_timeout+0x10/0x10 [ 108.395701] sock_sendmsg+0x81/0x90 [ 108.409052] __napi_poll+0x29/0x1c0 [ 108.414279] ____sys_sendmsg+0x284/0x310 [ 108.419507] net_rx_action+0x257/0x2d0 [ 108.438216] ___sys_sendmsg+0x7c/0xc0 [ 108.439723] __do_softirq+0xc1/0x2a8 [ 108.444950] ? finish_task_switch+0xb4/0x2f0 [ 108.452077] irq_exit_rcu+0xa9/0xd0 [ 108.453584] ? __schedule+0x372/0xd00 [ 108.460713] common_interrupt+0x84/0xa0 [ 108.467840] ? clockevents_program_event+0x95/0x100 [ 108.474968] [ 108.482096] ? do_nanosleep+0x88/0x130 [ 108.489224] [ 108.489225] asm_common_interrupt+0x26/0x40 [ 108.496353] ? __rseq_handle_notify_resume+0xa9/0x4f0 [ 108.503478] RIP: 0010:cpu_idle_poll+0x2c/0x100 [ 108.510607] __sys_sendmsg+0x5d/0xb0 [ 108.518687] Code: 05 e1 d9 c8 00 65 8b 15 de 64 85 55 85 c0 7f 57 e8 b9 ef ff ff fb 65 48 8b 1c 25 00 cc 02 00 48 8b 03 a8 08 74 0b eb 1c f3 90 <48> 8b 03 a8 08 75 13 8b 05 77 63 cd 00 85 c0 75 ed e8 ce ec ff ff [ 108.525817] do_syscall_64+0x44/0xa0 [ 108.531563] RSP: 0018:ffffffffab203e70 EFLAGS: 00000202 [ 108.538693] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 108.546775] [ 108.546777] RIP: 0033:0x7f605862b7f7 [ 108.549495] RAX: 0000000000000001 RBX: ffffffffab20c940 RCX: 000000000000003b [ 108.551955] Code: 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 108.554068] RDX: 4000000000000000 RSI: 000000002da97f6a RDI: 00000000002b8ff4 [ 108.559816] RSP: 002b:00007ffc99264058 EFLAGS: 00000246 [ 108.564178] RBP: 0000000000000000 R08: 00000000002b8ff4 R09: ffff8ddb01554c80 [ 108.571302] ORIG_RAX: 000000000000002e [ 108.571303] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f605862b7f7 [ 108.574023] R10: 000000000000015b R11: 000000000000000f R12: ffffffffab20c940 [ 108.574024] R13: 0000000000000000 R14: ffff8de26fbeef40 R15: ffffffffab20c940 [ 108.578727] RDX: 0000000000000000 RSI: 00007ffc992640a0 RDI: 0000000000000003 [ 108.578728] RBP: 00007ffc99264110 R08: 0000000000000000 R09: 175f48ad1c3a9c00 [ 108.581187] do_idle+0x62/0x230 [ 108.585890] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc992642d8 [ 108.585891] R13: 00005577814ab2ba R14: 00005577814addf0 R15: 00007f605876d000 [ 108.587920] cpu_startup_entry+0x1d/0x20 [ 108.591422] [ 108.596127] rest_init+0xc5/0xd0 [ 108.600490] ---[ end trace 0000000000000000 ]--- Test Setup: DUT: - Change mac address on DUT Side. Ensure NIC not having same MAC Address - Running udp_tai on DUT side. Let udp_tai running throughout the test Example: ./udp_tai -i enp170s0 -P 100000 -p 90 -c 1 -t 0 -u 30004 Host: - Perform link up/down every 5 second. Result: Kernel panic will happen on DUT Side. Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings") Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 1c4676882082..f986e88be5c1 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -254,6 +254,13 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) /* reset BQL for queue */ netdev_tx_reset_queue(txring_txq(tx_ring)); + /* Zero out the buffer ring */ + memset(tx_ring->tx_buffer_info, 0, + sizeof(*tx_ring->tx_buffer_info) * tx_ring->count); + + /* Zero out the descriptor ring */ + memset(tx_ring->desc, 0, tx_ring->size); + /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; @@ -267,7 +274,7 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) */ void igc_free_tx_resources(struct igc_ring *tx_ring) { - igc_clean_tx_ring(tx_ring); + igc_disable_tx_ring(tx_ring); vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; From c080fe262f9e73a00934b70c16b1479cf40cd2bd Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 17 Apr 2023 15:18:39 -0700 Subject: [PATCH 673/934] igc: Fix possible system crash when loading module Guarantee that when probe() is run again, PTM and PCI busmaster will be in the same state as it was if the driver was never loaded. Avoid an i225/i226 hardware issue that PTM requests can be made even though PCI bus mastering is not enabled. These unexpected PTM requests can crash some systems. So, "force" disable PTM and busmastering before removing the driver, so they can be re-enabled in the right order during probe(). This is more like a workaround and should be applicable for i225 and i226, in any platform. Fixes: 1b5d73fb8624 ("igc: Enable PCIe PTM") Signed-off-by: Vinicius Costa Gomes Reviewed-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f986e88be5c1..fa764190f270 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6730,6 +6730,9 @@ static void igc_remove(struct pci_dev *pdev) igc_ptp_stop(adapter); + pci_disable_ptm(pdev); + pci_clear_master(pdev); + set_bit(__IGC_DOWN, &adapter->state); del_timer_sync(&adapter->watchdog_timer); From 48a821fd58837800750ec1b3962f0f799630a844 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Tue, 25 Apr 2023 17:44:14 +0200 Subject: [PATCH 674/934] igb: fix nvm.ops.read() error handling Add error handling into igb_set_eeprom() function, in case nvm.ops.read() fails just quit with error code asap. Fixes: 9d5c824399de ("igb: PCI-Express 82575 Gigabit Ethernet driver") Signed-off-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 7d60da1b7bf4..319ed601eaa1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -822,6 +822,8 @@ static int igb_set_eeprom(struct net_device *netdev, */ ret_val = hw->nvm.ops.read(hw, last_word, 1, &eeprom_buff[last_word - first_word]); + if (ret_val) + goto out; } /* Device's eeprom is always little-endian, word addressable */ @@ -841,6 +843,7 @@ static int igb_set_eeprom(struct net_device *netdev, hw->nvm.ops.update(hw); igb_set_fw_version(adapter); +out: kfree(eeprom_buff); return ret_val; } From 7833b865953c8e62abc76a3261c04132b2fb69de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jun 2023 11:10:25 +0200 Subject: [PATCH 675/934] btrfs: fix iomap_begin length for nocow writes can_nocow_extent can reduce the len passed in, which needs to be propagated to btrfs_dio_iomap_begin so that iomap does not submit more data then is mapped. This problems exists since the btrfs_get_blocks_direct helper was added in commit c5794e51784a ("btrfs: Factor out write portion of btrfs_get_blocks_direct"), but the ordered_extent splitting added in commit b73a6fd1b1ef ("btrfs: split partial dio bios before submit") added a WARN_ON that made a syzkaller test fail. Reported-by: syzbot+ee90502d5c8fd1d0dd93@syzkaller.appspotmail.com Fixes: c5794e51784a ("btrfs: Factor out write portion of btrfs_get_blocks_direct") CC: stable@vger.kernel.org # 6.1+ Tested-by: syzbot+ee90502d5c8fd1d0dd93@syzkaller.appspotmail.com Reviewed-by: Filipe Manana Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/inode.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 19c707bc8801..3f99f02dc1fe 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7264,7 +7264,7 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, static int btrfs_get_blocks_direct_write(struct extent_map **map, struct inode *inode, struct btrfs_dio_data *dio_data, - u64 start, u64 len, + u64 start, u64 *lenp, unsigned int iomap_flags) { const bool nowait = (iomap_flags & IOMAP_NOWAIT); @@ -7275,6 +7275,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, struct btrfs_block_group *bg; bool can_nocow = false; bool space_reserved = false; + u64 len = *lenp; u64 prev_len; int ret = 0; @@ -7345,15 +7346,19 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, free_extent_map(em); *map = NULL; - if (nowait) - return -EAGAIN; + if (nowait) { + ret = -EAGAIN; + goto out; + } /* * If we could not allocate data space before locking the file * range and we can't do a NOCOW write, then we have to fail. */ - if (!dio_data->data_space_reserved) - return -ENOSPC; + if (!dio_data->data_space_reserved) { + ret = -ENOSPC; + goto out; + } /* * We have to COW and we have already reserved data space before, @@ -7394,6 +7399,7 @@ out: btrfs_delalloc_release_extents(BTRFS_I(inode), len); btrfs_delalloc_release_metadata(BTRFS_I(inode), len, true); } + *lenp = len; return ret; } @@ -7570,7 +7576,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, if (write) { ret = btrfs_get_blocks_direct_write(&em, inode, dio_data, - start, len, flags); + start, &len, flags); if (ret < 0) goto unlock_err; unlock_extents = true; From deccae40e4b30f98837e44225194d80c8baf2233 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 9 Jun 2023 10:53:41 -0700 Subject: [PATCH 676/934] btrfs: can_nocow_file_extent should pass down args->strict from callers Commit 619104ba453ad0 ("btrfs: move common NOCOW checks against a file extent into a helper") changed our call to btrfs_cross_ref_exist() to always pass false for the 'strict' parameter. We're passing this down through the stack so that we can do a full check for cross references during swapfile activation. With strict always false, this test fails: btrfs subvol create swappy chattr +C swappy fallocate -l1G swappy/swapfile chmod 600 swappy/swapfile mkswap swappy/swapfile btrfs subvol snap swappy swapsnap btrfs subvol del -C swapsnap btrfs fi sync / sync;sync;sync swapon swappy/swapfile The fix is to just use args->strict, and everyone except swapfile activation is passing false. Fixes: 619104ba453ad0 ("btrfs: move common NOCOW checks against a file extent into a helper") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Filipe Manana Signed-off-by: Chris Mason Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3f99f02dc1fe..7fcafcc5292c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1864,7 +1864,7 @@ static int can_nocow_file_extent(struct btrfs_path *path, ret = btrfs_cross_ref_exist(root, btrfs_ino(inode), key->offset - args->extent_offset, - args->disk_bytenr, false, path); + args->disk_bytenr, args->strict, path); WARN_ON_ONCE(ret > 0 && is_freespace_inode); if (ret != 0) goto out; From 745806fb4554f334e6406fa82b328562aa48f08f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 11 Jun 2023 08:09:13 +0800 Subject: [PATCH 677/934] btrfs: do not ASSERT() on duplicated global roots [BUG] Syzbot reports a reproducible ASSERT() when using rescue=usebackuproot mount option on a corrupted fs. The full report can be found here: https://syzkaller.appspot.com/bug?extid=c4614eae20a166c25bf0 BTRFS error (device loop0: state C): failed to load root csum assertion failed: !tmp, in fs/btrfs/disk-io.c:1103 ------------[ cut here ]------------ kernel BUG at fs/btrfs/ctree.h:3664! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 3608 Comm: syz-executor356 Not tainted 6.0.0-rc7-syzkaller-00029-g3800a713b607 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 RIP: 0010:assertfail+0x1a/0x1c fs/btrfs/ctree.h:3663 RSP: 0018:ffffc90003aaf250 EFLAGS: 00010246 RAX: 0000000000000032 RBX: 0000000000000000 RCX: f21c13f886638400 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffff888021c640a0 R08: ffffffff816bd38d R09: ffffed10173667f1 R10: ffffed10173667f1 R11: 1ffff110173667f0 R12: dffffc0000000000 R13: ffff8880229c21f7 R14: ffff888021c64060 R15: ffff8880226c0000 FS: 0000555556a73300(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055a2637d7a00 CR3: 00000000709c4000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_global_root_insert+0x1a7/0x1b0 fs/btrfs/disk-io.c:1103 load_global_roots_objectid+0x482/0x8c0 fs/btrfs/disk-io.c:2467 load_global_roots fs/btrfs/disk-io.c:2501 [inline] btrfs_read_roots fs/btrfs/disk-io.c:2528 [inline] init_tree_roots+0xccb/0x203c fs/btrfs/disk-io.c:2939 open_ctree+0x1e53/0x33df fs/btrfs/disk-io.c:3574 btrfs_fill_super+0x1c6/0x2d0 fs/btrfs/super.c:1456 btrfs_mount_root+0x885/0x9a0 fs/btrfs/super.c:1824 legacy_get_tree+0xea/0x180 fs/fs_context.c:610 vfs_get_tree+0x88/0x270 fs/super.c:1530 fc_mount fs/namespace.c:1043 [inline] vfs_kern_mount+0xc9/0x160 fs/namespace.c:1073 btrfs_mount+0x3d3/0xbb0 fs/btrfs/super.c:1884 [CAUSE] Since the introduction of global roots, we handle csum/extent/free-space-tree roots as global roots, even if no extent-tree-v2 feature is enabled. So for regular csum/extent/fst roots, we load them into fs_info::global_root_tree rb tree. And we should not expect any conflicts in that rb tree, thus we have an ASSERT() inside btrfs_global_root_insert(). But rescue=usebackuproot can break the assumption, as we will try to load those trees again and again as long as we have bad roots and have backup roots slot remaining. So in that case we can have conflicting roots in the rb tree, and triggering the ASSERT() crash. [FIX] We can safely remove that ASSERT(), as the caller will properly put the offending root. To make further debugging easier, also add two explicit error messages: - Error message for conflicting global roots - Error message when using backup roots slot Reported-by: syzbot+a694851c6ab28cbcfb9c@syzkaller.appspotmail.com Fixes: abed4aaae4f7 ("btrfs: track the csum, extent, and free space trees in a rb tree") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 88e6d1072a35..dabc79c1af1b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -996,13 +996,18 @@ int btrfs_global_root_insert(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct rb_node *tmp; + int ret = 0; write_lock(&fs_info->global_root_lock); tmp = rb_find_add(&root->rb_node, &fs_info->global_root_tree, global_root_cmp); write_unlock(&fs_info->global_root_lock); - ASSERT(!tmp); - return tmp ? -EEXIST : 0; + if (tmp) { + ret = -EEXIST; + btrfs_warn(fs_info, "global root %llu %llu already exists", + root->root_key.objectid, root->root_key.offset); + } + return ret; } void btrfs_global_root_delete(struct btrfs_root *root) @@ -2842,6 +2847,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) /* We can't trust the free space cache either */ btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE); + btrfs_warn(fs_info, "try to load backup roots slot %d", i); ret = read_backup_root(fs_info, i); backup_index = ret; if (ret < 0) From b9dc1046edfeb7d9dbc2272c8d9ad5a8c47f3199 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 9 Jun 2023 10:03:04 +0200 Subject: [PATCH 678/934] net: phylink: report correct max speed for QUSGMII Q-USGMII is the quad port version of USGMII, and supports a max speed of 1Gbps on each line. Make so that phylink_interface_max_speed() reports this information correctly. Fixes: ae0e4bb2a0e0 ("net: phylink: Adjust link settings based on rate matching") Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index b4831110003c..809e6d5216dc 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -188,6 +188,7 @@ static int phylink_interface_max_speed(phy_interface_t interface) case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_QUSGMII: case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_GMII: return SPEED_1000; @@ -204,7 +205,6 @@ static int phylink_interface_max_speed(phy_interface_t interface) case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_10GKR: case PHY_INTERFACE_MODE_USXGMII: - case PHY_INTERFACE_MODE_QUSGMII: return SPEED_10000; case PHY_INTERFACE_MODE_25GBASER: From 923454c0368b8092e9d05c020f50abca577e7290 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 9 Jun 2023 10:03:05 +0200 Subject: [PATCH 679/934] net: phylink: use a dedicated helper to parse usgmii control word Q-USGMII is a derivative of USGMII, that uses a specific formatting for the control word. The layout is close to the USXGMII control word, but doesn't support speeds over 1Gbps. Use a dedicated decoding logic for the USGMII control word, re-using USXGMII definitions but only considering 10/100/1000Mbps speeds Fixes: 5e61fe157a27 ("net: phy: Introduce QUSGMII PHY mode") Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 809e6d5216dc..5efdeb59f4b2 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -3298,6 +3298,41 @@ void phylink_decode_usxgmii_word(struct phylink_link_state *state, } EXPORT_SYMBOL_GPL(phylink_decode_usxgmii_word); +/** + * phylink_decode_usgmii_word() - decode the USGMII word from a MAC PCS + * @state: a pointer to a struct phylink_link_state. + * @lpa: a 16 bit value which stores the USGMII auto-negotiation word + * + * Helper for MAC PCS supporting the USGMII protocol and the auto-negotiation + * code word. Decode the USGMII code word and populate the corresponding fields + * (speed, duplex) into the phylink_link_state structure. The structure for this + * word is the same as the USXGMII word, except it only supports speeds up to + * 1Gbps. + */ +static void phylink_decode_usgmii_word(struct phylink_link_state *state, + uint16_t lpa) +{ + switch (lpa & MDIO_USXGMII_SPD_MASK) { + case MDIO_USXGMII_10: + state->speed = SPEED_10; + break; + case MDIO_USXGMII_100: + state->speed = SPEED_100; + break; + case MDIO_USXGMII_1000: + state->speed = SPEED_1000; + break; + default: + state->link = false; + return; + } + + if (lpa & MDIO_USXGMII_FULL_DUPLEX) + state->duplex = DUPLEX_FULL; + else + state->duplex = DUPLEX_HALF; +} + /** * phylink_mii_c22_pcs_decode_state() - Decode MAC PCS state from MII registers * @state: a pointer to a &struct phylink_link_state. @@ -3335,9 +3370,11 @@ void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: - case PHY_INTERFACE_MODE_QUSGMII: phylink_decode_sgmii_word(state, lpa); break; + case PHY_INTERFACE_MODE_QUSGMII: + phylink_decode_usgmii_word(state, lpa); + break; default: state->link = false; From b1a6a38ab8a633546cefae890da842f19e006c74 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:36 +0200 Subject: [PATCH 680/934] selftests: mptcp: lib: skip if not below kernel version Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. A new function is now available to easily detect if a feature is missing by looking at the kernel version. That's clearly not ideal and this kind of check should be avoided as soon as possible. But sometimes, there are no external sign that a "feature" is available or not: internal behaviours can change without modifying the uAPI and these selftests are verifying the internal behaviours. Sometimes, the only (easy) way to verify if the feature is present is to run the test but then the validation cannot determine if there is a failure with the feature or if the feature is missing. Then it looks better to check the kernel version instead of having tests that can never fail. In any case, we need a solution not to have a whole selftest being marked as failed just because one sub-test has failed. Note that this env var car be set to 1 not to do such check and run the linked sub-test: SELFTESTS_MPTCP_LIB_NO_KVERSION_CHECK. This new helper is going to be used in the following commits. In order to ease the backport of such future patches, it would be good if this patch is backported up to the introduction of MPTCP selftests, hence the Fixes tag below: this type of check was supposed to be done from the beginning. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 048d19d444be ("mptcp: add basic kselftest for mptcp") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_lib.sh | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 29b65f4b73b2..f32045b23b89 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -76,3 +76,29 @@ mptcp_lib_kallsyms_doesnt_have() { mptcp_lib_fail_if_expected_feature "${sym} symbol has been found" } + +# !!!AVOID USING THIS!!! +# Features might not land in the expected version and features can be backported +# +# $1: kernel version, e.g. 6.3 +mptcp_lib_kversion_ge() { + local exp_maj="${1%.*}" + local exp_min="${1#*.}" + local v maj min + + # If the kernel has backported features, set this env var to 1: + if [ "${SELFTESTS_MPTCP_LIB_NO_KVERSION_CHECK:-}" = "1" ]; then + return 0 + fi + + v=$(uname -r | cut -d'.' -f1,2) + maj=${v%.*} + min=${v#*.} + + if [ "${maj}" -gt "${exp_maj}" ] || + { [ "${maj}" -eq "${exp_maj}" ] && [ "${min}" -ge "${exp_min}" ]; }; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" +} From 0c4cd3f86a40028845ad6f8af5b37165666404cd Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:37 +0200 Subject: [PATCH 681/934] selftests: mptcp: join: use 'iptables-legacy' if available IPTables commands using 'iptables-nft' fail on old kernels, at least 5.15 because it doesn't see the default IPTables chains: $ iptables -L iptables/1.8.2 Failed to initialize nft: Protocol not supported As a first step before switching to NFTables, we can use iptables-legacy if available. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 8d014eaa9254 ("selftests: mptcp: add ADD_ADDR timeout test case") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 29f0c99d9a46..74cc8a74a9d6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -25,6 +25,8 @@ capout="" ns1="" ns2="" ksft_skip=4 +iptables="iptables" +ip6tables="ip6tables" timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) capture=0 @@ -146,7 +148,11 @@ check_tools() exit $ksft_skip fi - if ! iptables -V &> /dev/null; then + # Use the legacy version if available to support old kernel versions + if iptables-legacy -V &> /dev/null; then + iptables="iptables-legacy" + ip6tables="ip6tables-legacy" + elif ! iptables -V &> /dev/null; then echo "SKIP: Could not run all tests without iptables tool" exit $ksft_skip fi @@ -247,9 +253,9 @@ reset_with_add_addr_timeout() reset "${1}" || return 1 - tables="iptables" + tables="${iptables}" if [ $ip -eq 6 ]; then - tables="ip6tables" + tables="${ip6tables}" fi ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 @@ -314,9 +320,9 @@ reset_with_fail() local ip="${3:-4}" local tables - tables="iptables" + tables="${iptables}" if [ $ip -eq 6 ]; then - tables="ip6tables" + tables="${ip6tables}" fi ip netns exec $ns2 $tables \ @@ -704,7 +710,7 @@ filter_tcp_from() local src="${2}" local target="${3}" - ip netns exec "${ns}" iptables -A INPUT -s "${src}" -p tcp -j "${target}" + ip netns exec "${ns}" ${iptables} -A INPUT -s "${src}" -p tcp -j "${target}" } do_transfer() From cdb50525345cf5a8359ee391032ef606a7826f08 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:38 +0200 Subject: [PATCH 682/934] selftests: mptcp: join: helpers to skip tests Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. Here are some helpers that will be used to mark subtests as skipped if a feature is not supported. Marking as a fix for the commit introducing this selftest to help with the backports. While at it, also check if kallsyms feature is available as it will also be used in the following commits to check if MPTCP features are available before starting a test. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: b08fbf241064 ("selftests: add test-cases for MPTCP MP_JOIN") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 74cc8a74a9d6..a63aed145393 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -142,6 +142,7 @@ cleanup_partial() check_tools() { mptcp_lib_check_mptcp + mptcp_lib_check_kallsyms if ! ip -Version &> /dev/null; then echo "SKIP: Could not run test without ip tool" @@ -191,6 +192,32 @@ cleanup() cleanup_partial } +# $1: msg +print_title() +{ + printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${1}" +} + +# [ $1: fail msg ] +mark_as_skipped() +{ + local msg="${1:-"Feature not supported"}" + + mptcp_lib_fail_if_expected_feature "${msg}" + + print_title "[ skip ] ${msg}" + printf "\n" +} + +# $@: condition +continue_if() +{ + if ! "${@}"; then + mark_as_skipped + return 1 + fi +} + skip_test() { if [ "${#only_tests_ids[@]}" -eq 0 ] && [ "${#only_tests_names[@]}" -eq 0 ]; then From 47867f0a7e831e24e5eab3330667ce9682d50fb1 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:39 +0200 Subject: [PATCH 683/934] selftests: mptcp: join: skip check if MIB counter not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the MPTCP MIB counters introduced in commit fc518953bc9c ("mptcp: add and use MIB counter infrastructure") and more later. The MPTCP Join selftest heavily relies on these counters. If a counter is not supported by the kernel, it is not displayed when using 'nstat -z'. We can then detect that and skip the verification. A new helper (get_counter()) has been added to do the required checks and return an error if the counter is not available. Note that if we expect to have these features available and if SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var is set to 1, the tests will be marked as failed instead of skipped. This new helper also makes sure we get the exact counter we want to avoid issues we had in the past, e.g. with MPTcpExtRmAddr and MPTcpExtRmAddrDrop sharing the same prefix. While at it, we uniform the way we fetch a MIB counter. Note for the backports: we rarely change these modified blocks so if there is are conflicts, it is very likely because a counter is not used in the older kernels and we don't need that chunk. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: b08fbf241064 ("selftests: add test-cases for MPTCP MP_JOIN") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 230 ++++++++++-------- 1 file changed, 130 insertions(+), 100 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a63aed145393..276396cbe60c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -500,11 +500,25 @@ wait_local_port_listen() done } +# $1: ns ; $2: counter +get_counter() +{ + local ns="${1}" + local counter="${2}" + local count + + count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}') + if [ -z "${count}" ]; then + mptcp_lib_fail_if_expected_feature "${counter} counter" + return 1 + fi + + echo "${count}" +} + rm_addr_count() { - local ns=${1} - - ip netns exec ${ns} nstat -as | grep MPTcpExtRmAddr | awk '{print $2}' + get_counter "${1}" "MPTcpExtRmAddr" } # $1: ns, $2: old rm_addr counter in $ns @@ -527,11 +541,11 @@ wait_mpj() local ns="${1}" local cnt old_cnt - old_cnt=$(ip netns exec ${ns} nstat -as | grep MPJoinAckRx | awk '{print $2}') + old_cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") local i for i in $(seq 10); do - cnt=$(ip netns exec ${ns} nstat -as | grep MPJoinAckRx | awk '{print $2}') + cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") [ "$cnt" = "${old_cnt}" ] || break sleep 0.1 done @@ -1190,12 +1204,13 @@ chk_csum_nr() fi printf "%-${nr_blank}s %s" " " "sum" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}') - [ -z "$count" ] && count=0 + count=$(get_counter ${ns1} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns1" ]; then extra_msg="$extra_msg ns1=$count" fi - if { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || + if [ -z "$count" ]; then + echo -n "[skip]" + elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then echo "[fail] got $count data checksum error[s] expected $csum_ns1" fail_test @@ -1204,12 +1219,13 @@ chk_csum_nr() echo -n "[ ok ]" fi echo -n " - csum " - count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}') - [ -z "$count" ] && count=0 + count=$(get_counter ${ns2} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns2" ]; then extra_msg="$extra_msg ns2=$count" fi - if { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || + if [ -z "$count" ]; then + echo -n "[skip]" + elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then echo "[fail] got $count data checksum error[s] expected $csum_ns2" fail_test @@ -1251,12 +1267,13 @@ chk_fail_nr() fi printf "%-${nr_blank}s %s" " " "ftx" - count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}') - [ -z "$count" ] && count=0 + count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx") if [ "$count" != "$fail_tx" ]; then extra_msg="$extra_msg,tx=$count" fi - if { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || + if [ -z "$count" ]; then + echo -n "[skip]" + elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx" fail_test @@ -1266,12 +1283,13 @@ chk_fail_nr() fi echo -n " - failrx" - count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}') - [ -z "$count" ] && count=0 + count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx") if [ "$count" != "$fail_rx" ]; then extra_msg="$extra_msg,rx=$count" fi - if { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || + if [ -z "$count" ]; then + echo -n "[skip]" + elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx" fail_test @@ -1303,10 +1321,11 @@ chk_fclose_nr() fi printf "%-${nr_blank}s %s" " " "ctx" - count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFastcloseTx | awk '{print $2}') - [ -z "$count" ] && count=0 - [ "$count" != "$fclose_tx" ] && extra_msg="$extra_msg,tx=$count" - if [ "$count" != "$fclose_tx" ]; then + count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$fclose_tx" ]; then + extra_msg="$extra_msg,tx=$count" echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx" fail_test dump_stats=1 @@ -1315,10 +1334,11 @@ chk_fclose_nr() fi echo -n " - fclzrx" - count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFastcloseRx | awk '{print $2}') - [ -z "$count" ] && count=0 - [ "$count" != "$fclose_rx" ] && extra_msg="$extra_msg,rx=$count" - if [ "$count" != "$fclose_rx" ]; then + count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$fclose_rx" ]; then + extra_msg="$extra_msg,rx=$count" echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx" fail_test dump_stats=1 @@ -1349,9 +1369,10 @@ chk_rst_nr() fi printf "%-${nr_blank}s %s" " " "rtx" - count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPRstTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ $count -lt $rst_tx ]; then + count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ $count -lt $rst_tx ]; then echo "[fail] got $count MP_RST[s] TX expected $rst_tx" fail_test dump_stats=1 @@ -1360,9 +1381,10 @@ chk_rst_nr() fi echo -n " - rstrx " - count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPRstRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" -lt "$rst_rx" ]; then + count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" -lt "$rst_rx" ]; then echo "[fail] got $count MP_RST[s] RX expected $rst_rx" fail_test dump_stats=1 @@ -1383,9 +1405,10 @@ chk_infi_nr() local dump_stats printf "%-${nr_blank}s %s" " " "itx" - count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$infi_tx" ]; then + count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$infi_tx" ]; then echo "[fail] got $count infinite map[s] TX expected $infi_tx" fail_test dump_stats=1 @@ -1394,9 +1417,10 @@ chk_infi_nr() fi echo -n " - infirx" - count=$(ip netns exec $ns1 nstat -as | grep InfiniteMapRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$infi_rx" ]; then + count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$infi_rx" ]; then echo "[fail] got $count infinite map[s] RX expected $infi_rx" fail_test dump_stats=1 @@ -1428,9 +1452,10 @@ chk_join_nr() fi printf "%03u %-36s %s" "${TEST_COUNT}" "${title}" "syn" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$syn_nr" ]; then echo "[fail] got $count JOIN[s] syn expected $syn_nr" fail_test dump_stats=1 @@ -1440,9 +1465,10 @@ chk_join_nr() echo -n " - synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) - count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_ack_nr" ]; then + count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$syn_ack_nr" ]; then # simult connections exceeding the limit with cookie enabled could go up to # synack validation as the conn limit can be enforced reliably only after # the subflow creation @@ -1458,9 +1484,10 @@ chk_join_nr() fi echo -n " - ack" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$ack_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$ack_nr" ]; then echo "[fail] got $count JOIN[s] ack expected $ack_nr" fail_test dump_stats=1 @@ -1492,12 +1519,12 @@ chk_stale_nr() local recover_nr printf "%-${nr_blank}s %-18s" " " "stale" - stale_nr=$(ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}') - [ -z "$stale_nr" ] && stale_nr=0 - recover_nr=$(ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}') - [ -z "$recover_nr" ] && recover_nr=0 - if [ $stale_nr -lt $stale_min ] || + stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale") + recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover") + if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then + echo "[skip]" + elif [ $stale_nr -lt $stale_min ] || { [ $stale_max -gt 0 ] && [ $stale_nr -gt $stale_max ]; } || [ $((stale_nr - recover_nr)) -ne $stale_delta ]; then echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \ @@ -1533,12 +1560,12 @@ chk_add_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) printf "%-${nr_blank}s %s" " " "add" - count=$(ip netns exec $ns2 nstat -as MPTcpExtAddAddr | grep MPTcpExtAddAddr | awk '{print $2}') - [ -z "$count" ] && count=0 - + count=$(get_counter ${ns2} "MPTcpExtAddAddr") + if [ -z "$count" ]; then + echo -n "[skip]" # if the test configured a short timeout tolerate greater then expected # add addrs options, due to retransmissions - if [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then + elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then echo "[fail] got $count ADD_ADDR[s] expected $add_nr" fail_test dump_stats=1 @@ -1547,9 +1574,10 @@ chk_add_nr() fi echo -n " - echo " - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$echo_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtEchoAdd") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$echo_nr" ]; then echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr" fail_test dump_stats=1 @@ -1559,9 +1587,10 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then echo -n " - pt " - count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtPortAdd | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$port_nr" ]; then + count=$(get_counter ${ns2} "MPTcpExtPortAdd") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$port_nr" ]; then echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr" fail_test dump_stats=1 @@ -1570,10 +1599,10 @@ chk_add_nr() fi printf "%-${nr_blank}s %s" " " "syn" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortSynRx | - awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$syn_nr" ]; then echo "[fail] got $count JOIN[s] syn with a different \ port-number expected $syn_nr" fail_test @@ -1583,10 +1612,10 @@ chk_add_nr() fi echo -n " - synack" - count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinPortSynAckRx | - awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_ack_nr" ]; then + count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$syn_ack_nr" ]; then echo "[fail] got $count JOIN[s] synack with a different \ port-number expected $syn_ack_nr" fail_test @@ -1596,10 +1625,10 @@ chk_add_nr() fi echo -n " - ack" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortAckRx | - awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$ack_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$ack_nr" ]; then echo "[fail] got $count JOIN[s] ack with a different \ port-number expected $ack_nr" fail_test @@ -1609,10 +1638,10 @@ chk_add_nr() fi printf "%-${nr_blank}s %s" " " "syn" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortSynRx | - awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$mis_syn_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$mis_syn_nr" ]; then echo "[fail] got $count JOIN[s] syn with a mismatched \ port-number expected $mis_syn_nr" fail_test @@ -1622,10 +1651,10 @@ chk_add_nr() fi echo -n " - ack " - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortAckRx | - awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$mis_ack_nr" ]; then + count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$mis_ack_nr" ]; then echo "[fail] got $count JOIN[s] ack with a mismatched \ port-number expected $mis_ack_nr" fail_test @@ -1669,9 +1698,10 @@ chk_rm_nr() fi printf "%-${nr_blank}s %s" " " "rm " - count=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$rm_addr_nr" ]; then + count=$(get_counter ${addr_ns} "MPTcpExtRmAddr") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$rm_addr_nr" ]; then echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" fail_test dump_stats=1 @@ -1680,29 +1710,27 @@ chk_rm_nr() fi echo -n " - rmsf " - count=$(ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ -n "$simult" ]; then + count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ -n "$simult" ]; then local cnt suffix - cnt=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}') + cnt=$(get_counter ${addr_ns} "MPTcpExtRmSubflow") # in case of simult flush, the subflow removal count on each side is # unreliable - [ -z "$cnt" ] && cnt=0 count=$((count + cnt)) [ "$count" != "$rm_subflow_nr" ] && suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then - echo "[ ok ] $suffix" + echo -n "[ ok ] $suffix" else echo "[fail] got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]" fail_test dump_stats=1 fi - return - fi - if [ "$count" != "$rm_subflow_nr" ]; then + elif [ "$count" != "$rm_subflow_nr" ]; then echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" fail_test dump_stats=1 @@ -1723,9 +1751,10 @@ chk_prio_nr() local dump_stats printf "%-${nr_blank}s %s" " " "ptx" - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$mp_prio_nr_tx" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPPrioTx") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$mp_prio_nr_tx" ]; then echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx" fail_test dump_stats=1 @@ -1734,9 +1763,10 @@ chk_prio_nr() fi echo -n " - prx " - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$mp_prio_nr_rx" ]; then + count=$(get_counter ${ns1} "MPTcpExtMPPrioRx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$mp_prio_nr_rx" ]; then echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx" fail_test dump_stats=1 @@ -1852,7 +1882,7 @@ wait_attempt_fail() while [ $time -lt $timeout_ms ]; do local cnt - cnt=$(ip netns exec $ns nstat -as TcpAttemptFails | grep TcpAttemptFails | awk '{print $2}') + cnt=$(get_counter ${ns} "TcpAttemptFails") [ "$cnt" = 1 ] && return 1 time=$((time + 100)) From 4a0b866a3f7d3c22033f40e93e94befc6fe51bce Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:40 +0200 Subject: [PATCH 684/934] selftests: mptcp: join: skip test if iptables/tc cmds fail Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. Some tests are using IPTables and/or TC commands to force some behaviours. If one of these commands fails -- likely because some features are not available due to missing kernel config -- we should intercept the error and skip the tests requiring these features. Note that if we expect to have these features available and if SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var is set to 1, the tests will be marked as failed instead of skipped. This patch also replaces the 'exit 1' by 'return 1' not to stop the selftest in the middle without the conclusion if there is an issue with NF or TC. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 8d014eaa9254 ("selftests: mptcp: add ADD_ADDR timeout test case") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 88 ++++++++++++------- 1 file changed, 57 insertions(+), 31 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 276396cbe60c..c471934ad5e0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -286,11 +286,15 @@ reset_with_add_addr_timeout() fi ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 - ip netns exec $ns2 $tables -A OUTPUT -p tcp \ - -m tcp --tcp-option 30 \ - -m bpf --bytecode \ - "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \ - -j DROP + + if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \ + -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \ + -j DROP; then + mark_as_skipped "unable to set the 'add addr' rule" + return 1 + fi } # $1: test name @@ -334,17 +338,12 @@ reset_with_allow_join_id0() # tc action pedit offset 162 out of bounds # # Netfilter is used to mark packets with enough data. -reset_with_fail() +setup_fail_rules() { - reset "${1}" || return 1 - - ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1 - ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1 - check_invert=1 validate_checksum=1 - local i="$2" - local ip="${3:-4}" + local i="$1" + local ip="${2:-4}" local tables tables="${iptables}" @@ -359,15 +358,32 @@ reset_with_fail() -p tcp \ -m length --length 150:9999 \ -m statistic --mode nth --packet 1 --every 99999 \ - -j MARK --set-mark 42 || exit 1 + -j MARK --set-mark 42 || return ${ksft_skip} - tc -n $ns2 qdisc add dev ns2eth$i clsact || exit 1 + tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${ksft_skip} tc -n $ns2 filter add dev ns2eth$i egress \ protocol ip prio 1000 \ handle 42 fw \ action pedit munge offset 148 u8 invert \ pipe csum tcp \ - index 100 || exit 1 + index 100 || return ${ksft_skip} +} + +reset_with_fail() +{ + reset "${1}" || return 1 + shift + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1 + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1 + + local rc=0 + setup_fail_rules "${@}" || rc=$? + + if [ ${rc} -eq ${ksft_skip} ]; then + mark_as_skipped "unable to set the 'fail' rules" + return 1 + fi } reset_with_events() @@ -382,6 +398,25 @@ reset_with_events() evts_ns2_pid=$! } +reset_with_tcp_filter() +{ + reset "${1}" || return 1 + shift + + local ns="${!1}" + local src="${2}" + local target="${3}" + + if ! ip netns exec "${ns}" ${iptables} \ + -A INPUT \ + -s "${src}" \ + -p tcp \ + -j "${target}"; then + mark_as_skipped "unable to set the filter rules" + return 1 + fi +} + fail_test() { ret=1 @@ -745,15 +780,6 @@ pm_nl_check_endpoint() fi } -filter_tcp_from() -{ - local ns="${1}" - local src="${2}" - local target="${3}" - - ip netns exec "${ns}" ${iptables} -A INPUT -s "${src}" -p tcp -j "${target}" -} - do_transfer() { local listener_ns="$1" @@ -1975,23 +2001,23 @@ subflows_error_tests() fi # multiple subflows, with subflow creation error - if reset "multi subflows, with failing subflow"; then + if reset_with_tcp_filter "multi subflows, with failing subflow" ns1 10.0.3.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - filter_tcp_from $ns1 10.0.3.2 REJECT run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr 1 1 1 fi # multiple subflows, with subflow timeout on MPJ - if reset "multi subflows, with subflow timeout"; then + if reset_with_tcp_filter "multi subflows, with subflow timeout" ns1 10.0.3.2 DROP && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - filter_tcp_from $ns1 10.0.3.2 DROP run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr 1 1 1 fi @@ -1999,11 +2025,11 @@ subflows_error_tests() # multiple subflows, check that the endpoint corresponding to # closed subflow (due to reset) is not reused if additional # subflows are added later - if reset "multi subflows, fair usage on close"; then + if reset_with_tcp_filter "multi subflows, fair usage on close" ns1 10.0.3.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - filter_tcp_from $ns1 10.0.3.2 REJECT run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow & # mpj subflow will be in TW after the reset From d4c81bbb8600257fd3076d0196cb08bd2e5bdf24 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:41 +0200 Subject: [PATCH 685/934] selftests: mptcp: join: support local endpoint being tracked or not Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. At some points, a new feature caused internal behaviour changes we are verifying in the selftests, see the Fixes tag below. It was not a uAPI change but because in these selftests, we check some internal behaviours, it is normal we have to adapt them from time to time after having added some features. It is possible to look for "mptcp_pm_subflow_check_next" in kallsyms because it was needed to introduce the mentioned feature. So we can know in advance what the behaviour we are expecting here instead of supporting the two behaviours. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 86e39e04482b ("mptcp: keep track of local endpoint still available for each msk") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index c471934ad5e0..3da39febb09e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2129,11 +2129,18 @@ signal_address_tests() # the peer could possibly miss some addr notification, allow retransmission ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow - chk_join_nr 3 3 3 - # the server will not signal the address terminating - # the MPC subflow - chk_add_nr 3 3 + # It is not directly linked to the commit introducing this + # symbol but for the parent one which is linked anyway. + if ! mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + chk_join_nr 3 3 2 + chk_add_nr 4 4 + else + chk_join_nr 3 3 3 + # the server will not signal the address terminating + # the MPC subflow + chk_add_nr 3 3 + fi fi } From ae947bb2c253ff5f395bb70cb9db8700543bf398 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:42 +0200 Subject: [PATCH 686/934] selftests: mptcp: join: skip Fastclose tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the support of MP_FASTCLOSE introduced in commit f284c0c77321 ("mptcp: implement fastclose xmit path"). If the MIB counter is not available, the test cannot be verified and the behaviour will not be the expected one. So we can skip the test if the counter is missing. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 01542c9bf9ab ("selftests: mptcp: add fastclose testcase") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 3da39febb09e..cfd43037c6d5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -261,6 +261,19 @@ reset() return 0 } +# $1: test name ; $2: counter to check +reset_check_counter() +{ + reset "${1}" || return 1 + + local counter="${2}" + + if ! nstat -asz "${counter}" | grep -wq "${counter}"; then + mark_as_skipped "counter '${counter}' is not available" + return 1 + fi +} + # $1: test name reset_with_cookies() { @@ -3121,14 +3134,14 @@ fullmesh_tests() fastclose_tests() { - if reset "fastclose test"; then + if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_client chk_join_nr 0 0 0 chk_fclose_nr 1 1 chk_rst_nr 1 1 invert fi - if reset "fastclose server test"; then + if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_server chk_join_nr 0 0 0 chk_fclose_nr 1 1 invert From 425ba803124b90cb9124d99f13b372a89dc151d9 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:43 +0200 Subject: [PATCH 687/934] selftests: mptcp: join: support RM_ADDR for used endpoints or not Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. At some points, a new feature caused internal behaviour changes we are verifying in the selftests, see the Fixes tag below. It was not a UAPI change but because in these selftests, we check some internal behaviours, it is normal we have to adapt them from time to time after having added some features. It looks like there is no external sign we can use to predict the expected behaviour. Instead of accepting different behaviours and thus not really checking for the expected behaviour, we are looking here for a specific kernel version. That's not ideal but it looks better than removing the test because it cannot support older kernel versions. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 6fa0174a7c86 ("mptcp: more careful RM_ADDR generation") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index cfd43037c6d5..3d4f22fe8f8c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2394,7 +2394,12 @@ remove_tests() pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow chk_join_nr 3 3 3 - chk_rm_nr 0 3 simult + + if mptcp_lib_kversion_ge 5.18; then + chk_rm_nr 0 3 simult + else + chk_rm_nr 3 3 + fi fi # addresses flush From 36c4127ae8dd0ebac6d56d8a1b272dd483471c40 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:44 +0200 Subject: [PATCH 688/934] selftests: mptcp: join: skip implicit tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the support of the implicit endpoints introduced by commit d045b9eb95a9 ("mptcp: introduce implicit endpoints"). It is possible to look for "mptcp_subflow_send_ack" in kallsyms because it was needed to introduce the mentioned feature. So we can know in advance if the feature is supported instead of trying and accepting any results. Note that here and in the following commits, we re-do the same check for each sub-test of the same function for a few reasons. The main one is not to break the ID assign to each test in order to be able to easily compare results between different kernel versions. Also, we can still run a specific test even if it is skipped. Another reason is that it makes it clear during the review that a specific subtest will be skipped or not under certain conditions. At the end, it looks OK to call the exact same helper multiple times: it is not a critical path and it is the same code that is executed, not really more cases to maintain. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 69c6ce7b6eca ("selftests: mptcp: add implicit endpoint test case") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 3d4f22fe8f8c..7f860a93527f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3272,8 +3272,10 @@ userspace_tests() endpoint_tests() { + # subflow_rebuild_header is needed to support the implicit flag # userspace pm type prevents add_addr - if reset "implicit EP"; then + if reset "implicit EP" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -3293,7 +3295,8 @@ endpoint_tests() kill_tests_wait fi - if reset "delete and re-add"; then + if reset "delete and re-add" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow From 07216a3c5d926bf1b6b360a0073747228a1f9b7f Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:45 +0200 Subject: [PATCH 689/934] selftests: mptcp: join: skip backup if set flag on ID not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. Commit bccefb762439 ("selftests: mptcp: simplify pm_nl_change_endpoint") has simplified the way the backup flag is set on an endpoint. Instead of doing: ./pm_nl_ctl set 10.0.2.1 flags backup Now we do: ./pm_nl_ctl set id 1 flags backup The new way is easier to maintain but it is also incompatible with older kernels not supporting the implicit endpoints putting in place the infrastructure to set flags per ID, hence the second Fixes tag. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: bccefb762439 ("selftests: mptcp: simplify pm_nl_change_endpoint") Cc: stable@vger.kernel.org Fixes: 4cf86ae84c71 ("mptcp: strict local address ID selection") Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 7f860a93527f..adbe297a95cf 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2679,7 +2679,8 @@ mixed_tests() backup_tests() { # single subflow, backup - if reset "single subflow, backup"; then + if reset "single subflow, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup @@ -2689,7 +2690,8 @@ backup_tests() fi # single address, backup - if reset "single address, backup"; then + if reset "single address, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 @@ -2700,7 +2702,8 @@ backup_tests() fi # single address with port, backup - if reset "single address with port, backup"; then + if reset "single address with port, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 1 From 9db34c4294af9999edc773d96744e2d2d4eb5060 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:46 +0200 Subject: [PATCH 690/934] selftests: mptcp: join: skip fullmesh flag tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the support of the fullmesh flag for the in-kernel PM introduced by commit 2843ff6f36db ("mptcp: remote addresses fullmesh") and commit 1a0d6136c5f0 ("mptcp: local addresses fullmesh"). It looks like there is no easy external sign we can use to predict the expected behaviour. We could add the flag and then check if it has been added but for that, and for each fullmesh test, we would need to setup a new environment, do the checks, clean it and then only start the test from yet another clean environment. To keep it simple and avoid introducing new issues, we look for a specific kernel version. That's not ideal but an acceptable solution for this case. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 6a0653b96f5d ("selftests: mptcp: add fullmesh setting tests") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index adbe297a95cf..f8e58ebcdd54 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3098,7 +3098,8 @@ fullmesh_tests() fi # set fullmesh flag - if reset "set fullmesh flag test"; then + if reset "set fullmesh flag test" && + continue_if mptcp_lib_kversion_ge 5.18; then pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow pm_nl_set_limits $ns2 4 4 @@ -3108,7 +3109,8 @@ fullmesh_tests() fi # set nofullmesh flag - if reset "set nofullmesh flag test"; then + if reset "set nofullmesh flag test" && + continue_if mptcp_lib_kversion_ge 5.18; then pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh pm_nl_set_limits $ns2 4 4 @@ -3118,7 +3120,8 @@ fullmesh_tests() fi # set backup,fullmesh flags - if reset "set backup,fullmesh flags test"; then + if reset "set backup,fullmesh flags test" && + continue_if mptcp_lib_kversion_ge 5.18; then pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow pm_nl_set_limits $ns2 4 4 @@ -3129,7 +3132,8 @@ fullmesh_tests() fi # set nobackup,nofullmesh flags - if reset "set nobackup,nofullmesh flags test"; then + if reset "set nobackup,nofullmesh flags test" && + continue_if mptcp_lib_kversion_ge 5.18; then pm_nl_set_limits $ns1 4 4 pm_nl_set_limits $ns2 4 4 pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh From f2b492b04a167261e1c38eb76f78fb4294473a49 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:47 +0200 Subject: [PATCH 691/934] selftests: mptcp: join: skip userspace PM tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the support of the userspace PM introduced by commit 4638de5aefe5 ("mptcp: handle local addrs announced by userspace PMs") and the following ones. It is possible to look for the MPTCP pm_type's sysctl knob to know in advance if the userspace PM is available. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 5ac1d2d63451 ("selftests: mptcp: Add tests for userspace PM type") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f8e58ebcdd54..f9161ed69b86 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -84,7 +84,7 @@ init_partial() ip netns add $netns || exit $ksft_skip ip -net $netns link set lo up ip netns exec $netns sysctl -q net.mptcp.enabled=1 - ip netns exec $netns sysctl -q net.mptcp.pm_type=0 + ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 if [ $checksum -eq 1 ]; then @@ -3191,7 +3191,8 @@ fail_tests() userspace_tests() { # userspace pm type prevents add_addr - if reset "userspace pm type prevents add_addr"; then + if reset "userspace pm type prevents add_addr" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 @@ -3202,7 +3203,8 @@ userspace_tests() fi # userspace pm type does not echo add_addr without daemon - if reset "userspace pm no echo w/o daemon"; then + if reset "userspace pm no echo w/o daemon" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 @@ -3213,7 +3215,8 @@ userspace_tests() fi # userspace pm type rejects join - if reset "userspace pm type rejects join"; then + if reset "userspace pm type rejects join" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 @@ -3223,7 +3226,8 @@ userspace_tests() fi # userspace pm type does not send join - if reset "userspace pm type does not send join"; then + if reset "userspace pm type does not send join" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 @@ -3233,7 +3237,8 @@ userspace_tests() fi # userspace pm type prevents mp_prio - if reset "userspace pm type prevents mp_prio"; then + if reset "userspace pm type prevents mp_prio" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 @@ -3244,7 +3249,8 @@ userspace_tests() fi # userspace pm type prevents rm_addr - if reset "userspace pm type prevents rm_addr"; then + if reset "userspace pm type prevents rm_addr" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 @@ -3256,7 +3262,8 @@ userspace_tests() fi # userspace pm add & remove address - if reset_with_events "userspace pm add & remove address"; then + if reset_with_events "userspace pm add & remove address" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow @@ -3267,7 +3274,8 @@ userspace_tests() fi # userspace pm create destroy subflow - if reset_with_events "userspace pm create destroy subflow"; then + if reset_with_events "userspace pm create destroy subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow From ff8897b5189495b47895ca247b860a29dc04b36b Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:48 +0200 Subject: [PATCH 692/934] selftests: mptcp: join: skip fail tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the support of the MP_FAIL / infinite mapping introduced by commit 1e39e5a32ad7 ("mptcp: infinite mapping sending") and the following ones. It is possible to look for one of the infinite mapping counters to know in advance if the this feature is available. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: b6e074e171bc ("selftests: mptcp: add infinite map testcase") Cc: stable@vger.kernel.org Fixes: 2ba18161d407 ("selftests: mptcp: add MP_FAIL reset testcase") Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f9161ed69b86..7867bad59253 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -384,7 +384,7 @@ setup_fail_rules() reset_with_fail() { - reset "${1}" || return 1 + reset_check_counter "${1}" "MPTcpExtInfiniteMapTx" || return 1 shift ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1 From 632978f0a961b4591a05ba9e39eab24541d83e84 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:49 +0200 Subject: [PATCH 693/934] selftests: mptcp: join: skip MPC backups tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the support of sending an MP_PRIO signal for the initial subflow, introduced by commit c157bbe776b7 ("mptcp: allow the in kernel PM to set MPC subflow priority"). It is possible to look for "mptcp_subflow_send_ack" in kallsyms because it was needed to introduce the mentioned feature. So we can know in advance if the feature is supported instead of trying and accepting any results. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 914f6a59b10f ("selftests: mptcp: add MPC backup tests") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 7867bad59253..554fcafd6e8a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2713,14 +2713,16 @@ backup_tests() chk_prio_nr 1 1 fi - if reset "mpc backup"; then + if reset "mpc backup" && + continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr 0 0 0 chk_prio_nr 0 1 fi - if reset "mpc backup both sides"; then + if reset "mpc backup both sides" && + continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow @@ -2728,14 +2730,16 @@ backup_tests() chk_prio_nr 1 1 fi - if reset "mpc switch to backup"; then + if reset "mpc switch to backup" && + continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup chk_join_nr 0 0 0 chk_prio_nr 0 1 fi - if reset "mpc switch to backup both sides"; then + if reset "mpc switch to backup both sides" && + continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup From 0471bb479af03874b09350fcfe51d3743a5608de Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:50 +0200 Subject: [PATCH 694/934] selftests: mptcp: join: skip PM listener tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the support of PM listener events introduced by commit f8c9dfbd875b ("mptcp: add pm listener events"). It is possible to look for "mptcp_event_pm_listener" in kallsyms to know in advance if the kernel supports this feature. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 178d023208eb ("selftests: mptcp: listener test for in-kernel PM") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 554fcafd6e8a..0c22efeba675 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2774,6 +2774,11 @@ verify_listener_events() $e_saddr $e_sport fi + if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + printf "[skip]: event not supported\n" + return + fi + type=$(grep "type:$e_type," $evt | sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') family=$(grep "type:$e_type," $evt | From 96b84195df61d374d8028cf426a115ae085031ec Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:51 +0200 Subject: [PATCH 695/934] selftests: mptcp: join: uniform listener tests The alignment was different from the other tests because tabs were used instead of spaces. While at it, also use 'echo' instead of 'printf' to print the result to keep the same style as done in the other sub-tests. And, even if it should be better with, also remove 'stdbuf' and sed's '--unbuffered' option because they are not used in the other subtests and they are not available when using a minimal environment with busybox. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 178d023208eb ("selftests: mptcp: listener test for in-kernel PM") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 0c22efeba675..281581d3c8eb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2765,43 +2765,41 @@ verify_listener_events() local family local saddr local sport + local name if [ $e_type = $LISTENER_CREATED ]; then - stdbuf -o0 -e0 printf "\t\t\t\t\t CREATE_LISTENER %s:%s"\ - $e_saddr $e_sport + name="LISTENER_CREATED" elif [ $e_type = $LISTENER_CLOSED ]; then - stdbuf -o0 -e0 printf "\t\t\t\t\t CLOSE_LISTENER %s:%s "\ - $e_saddr $e_sport + name="LISTENER_CLOSED" + else + name="$e_type" fi + printf "%-${nr_blank}s %s %s:%s " " " "$name" "$e_saddr" "$e_sport" + if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then printf "[skip]: event not supported\n" return fi - type=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') - family=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') - sport=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + type=$(grep "type:$e_type," $evt | sed -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') + family=$(grep "type:$e_type," $evt | sed -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') + sport=$(grep "type:$e_type," $evt | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') if [ $family ] && [ $family = $AF_INET6 ]; then - saddr=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') else - saddr=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') fi if [ $type ] && [ $type = $e_type ] && [ $family ] && [ $family = $e_family ] && [ $saddr ] && [ $saddr = $e_saddr ] && [ $sport ] && [ $sport = $e_sport ]; then - stdbuf -o0 -e0 printf "[ ok ]\n" + echo "[ ok ]" return 0 fi fail_test - stdbuf -o0 -e0 printf "[fail]\n" + echo "[fail]" } add_addr_ports_tests() From 6673851be0fc1bfc3353ffb52ff26ae5468f12c9 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Sat, 10 Jun 2023 18:11:52 +0200 Subject: [PATCH 696/934] selftests: mptcp: join: skip mixed tests if not supported Selftests are supposed to run on any kernels, including the old ones not supporting all MPTCP features. One of them is the support of a mix of subflows in v4 and v6 by the in-kernel PM introduced by commit b9d69db87fb7 ("mptcp: let the in-kernel PM use mixed IPv4 and IPv6 addresses"). It looks like there is no external sign we can use to predict the expected behaviour. Instead of accepting different behaviours and thus not really checking for the expected behaviour, we are looking here for a specific kernel version. That's not ideal but it looks better than removing the test because it cannot support older kernel versions. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: ad3493746ebe ("selftests: mptcp: add test-cases for mixed v4/v6 subflows") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 281581d3c8eb..0ae8cafde439 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2637,7 +2637,8 @@ v4mapped_tests() mixed_tests() { - if reset "IPv4 sockets do not use IPv6 addresses"; then + if reset "IPv4 sockets do not use IPv6 addresses" && + continue_if mptcp_lib_kversion_ge 6.3; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal @@ -2646,7 +2647,8 @@ mixed_tests() fi # Need an IPv6 mptcp socket to allow subflows of both families - if reset "simult IPv4 and IPv6 subflows"; then + if reset "simult IPv4 and IPv6 subflows" && + continue_if mptcp_lib_kversion_ge 6.3; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 10.0.1.1 flags signal @@ -2655,7 +2657,8 @@ mixed_tests() fi # cross families subflows will not be created even in fullmesh mode - if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1"; then + if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1" && + continue_if mptcp_lib_kversion_ge 6.3; then pm_nl_set_limits $ns1 0 4 pm_nl_set_limits $ns2 1 4 pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh @@ -2666,7 +2669,8 @@ mixed_tests() # fullmesh still tries to create all the possibly subflows with # matching family - if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2"; then + if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2" && + continue_if mptcp_lib_kversion_ge 6.3; then pm_nl_set_limits $ns1 0 4 pm_nl_set_limits $ns2 2 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal From d54fb4b25a0261bf2f2bb7093fdf11a36718bf25 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Fri, 26 May 2023 19:10:56 +0200 Subject: [PATCH 697/934] clk: composite: Fix handling of high clock rates ULONG_MAX is used by a few drivers to figure out the highest available clock rate via clk_round_rate(clk, ULONG_MAX). Since abs() takes a signed value as input, the current logic effectively calculates with ULONG_MAX = -1, which results in the worst parent clock being chosen instead of the best one. For example on Rockchip RK3588 the eMMC driver tries to figure out the highest available clock rate. There are three parent clocks available resulting in the following rate diffs with the existing logic: GPLL: abs(18446744073709551615 - 1188000000) = 1188000001 CPLL: abs(18446744073709551615 - 1500000000) = 1500000001 XIN24M: abs(18446744073709551615 - 24000000) = 24000001 As a result the clock framework will promote a maximum supported clock rate of 24 MHz, even though 1.5GHz are possible. With the updated logic any casting between signed and unsigned is avoided and the numbers look like this instead: GPLL: 18446744073709551615 - 1188000000 = 18446744072521551615 CPLL: 18446744073709551615 - 1500000000 = 18446744072209551615 XIN24M: 18446744073709551615 - 24000000 = 18446744073685551615 As a result the parent with the highest acceptable rate is chosen instead of the parent clock with the lowest one. Cc: stable@vger.kernel.org Fixes: 49502408007b ("mmc: sdhci-of-dwcmshc: properly determine max clock on Rockchip") Tested-by: Christopher Obbard Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20230526171057.66876-2-sebastian.reichel@collabora.com Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd --- drivers/clk/clk-composite.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c index edfa94641bbf..66759fe28fad 100644 --- a/drivers/clk/clk-composite.c +++ b/drivers/clk/clk-composite.c @@ -119,7 +119,10 @@ static int clk_composite_determine_rate(struct clk_hw *hw, if (ret) continue; - rate_diff = abs(req->rate - tmp_req.rate); + if (req->rate >= tmp_req.rate) + rate_diff = req->rate - tmp_req.rate; + else + rate_diff = tmp_req.rate - req->rate; if (!rate_diff || !req->best_parent_hw || best_rate_diff > rate_diff) { From a1043fbc8f99c7df2d70993eecad3baee07dc180 Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Thu, 11 May 2023 15:32:26 +0200 Subject: [PATCH 698/934] clk: mediatek: mt8365: Fix inverted topclk operations The given operations are inverted for the wrong registers which makes multiple of the mt8365 hardware units unusable. In my setup at least usb did not work. Fixed by swapping the operations with the inverted ones. Reported-by: Alexandre Mergnat Fixes: 905b7430d3cc ("clk: mediatek: mt8365: Convert simple_gate to mtk_gate clocks") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/r/20230511133226.913600-1-msp@baylibre.com Tested-by: Alexandre Mergnat Reviewed-by: Alexandre Mergnat Reviewed-by: Matthias Brugger Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/clk-mt8365.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt8365.c b/drivers/clk/mediatek/clk-mt8365.c index c366ac1611e6..c87a6c4a7967 100644 --- a/drivers/clk/mediatek/clk-mt8365.c +++ b/drivers/clk/mediatek/clk-mt8365.c @@ -592,15 +592,15 @@ static const struct mtk_gate_regs top2_cg_regs = { #define GATE_TOP0(_id, _name, _parent, _shift) \ GATE_MTK(_id, _name, _parent, &top0_cg_regs, \ - _shift, &mtk_clk_gate_ops_no_setclr_inv) + _shift, &mtk_clk_gate_ops_no_setclr) #define GATE_TOP1(_id, _name, _parent, _shift) \ GATE_MTK(_id, _name, _parent, &top1_cg_regs, \ - _shift, &mtk_clk_gate_ops_no_setclr) + _shift, &mtk_clk_gate_ops_no_setclr_inv) #define GATE_TOP2(_id, _name, _parent, _shift) \ GATE_MTK(_id, _name, _parent, &top2_cg_regs, \ - _shift, &mtk_clk_gate_ops_no_setclr) + _shift, &mtk_clk_gate_ops_no_setclr_inv) static const struct mtk_gate top_clk_gates[] = { GATE_TOP0(CLK_TOP_CONN_32K, "conn_32k", "clk32k", 10), From 2a809ddca08d0d1b9ac961815ce04305814e179b Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Wed, 24 May 2023 09:49:24 +0800 Subject: [PATCH 699/934] clk: clk-loongson2: Zero init clk_init_data As clk_core_populate_parent_map() checks clk_init_data.num_parents first, and checks clk_init_data.parent_names[] before clk_init_data.parent_data[] and clk_init_data.parent_hws[]. Therefore the clk_init_data structure needs to be explicitly initialised to prevent an unexpected crash if clk_init_data.parent_names[] is a random value. CPU 0 Unable to handle kernel paging request at virtual address 0000000000000dc0, era == 9000000002986290, ra == 900000000298624c Oops[#1]: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.4.0-rc2+ #4582 pc 9000000002986290 ra 900000000298624c tp 9000000100094000 sp 9000000100097a60 a0 9000000104541e00 a1 0000000000000000 a2 0000000000000dc0 a3 0000000000000001 a4 90000001000979f0 a5 90000001800977d7 a6 0000000000000000 a7 900000000362a000 t0 90000000034f3548 t1 6f8c2a9cb5ab5f64 t2 0000000000011340 t3 90000000031cf5b0 t4 0000000000000dc0 t5 0000000000000004 t6 0000000000011300 t7 9000000104541e40 t8 000000000005a4f8 u0 9000000104541e00 s9 9000000104541e00 s0 9000000104bc4700 s1 9000000104541da8 s2 0000000000000001 s3 900000000356f9d8 s4 ffffffffffffffff s5 0000000000000000 s6 0000000000000dc0 s7 90000000030d0a88 s8 0000000000000000 ra: 900000000298624c __clk_register+0x228/0x84c ERA: 9000000002986290 __clk_register+0x26c/0x84c CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1c (LIE=2-4,10-12 VS=7) ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) BADV: 0000000000000dc0 PRID: 0014a000 (Loongson-64bit, ) Modules linked in: Process swapper/0 (pid: 1, threadinfo=(____ptrval____), task=(____ptrval____)) Stack : 90000000031c1810 90000000030d0a88 900000000325bac0 90000000034f3548 90000001002ab410 9000000104541e00 0000000000000dc0 9000000003150098 90000000031c1810 90000000031a0460 900000000362a000 90000001002ab410 900000000362a000 9000000104541da8 9000000104541de8 90000001002ab410 900000000362a000 9000000002986a68 90000000034f3ed8 90000000030d0aa8 9000000104541da8 900000000298d3b8 90000000031c1810 0000000000000000 90000000034f3ed8 90000000030d0aa8 0000000000000dc0 90000000030d0a88 90000001002ab410 900000000298d401 0000000000000000 6f8c2a9cb5ab5f64 90000000034f4000 90000000030d0a88 9000000003a48a58 90000001002ab410 9000000104bd81a8 900000000298d484 9000000100020260 0000000000000000 ... Call Trace: [<9000000002986290>] __clk_register+0x26c/0x84c [<9000000002986a68>] devm_clk_hw_register+0x5c/0xe0 [<900000000298d3b8>] loongson2_clk_register.constprop.0+0xdc/0x10c [<900000000298d484>] loongson2_clk_probe+0x9c/0x4ac [<9000000002a4eba4>] platform_probe+0x68/0xc8 [<9000000002a4bf80>] really_probe+0xbc/0x2f0 [<9000000002a4c23c>] __driver_probe_device+0x88/0x128 [<9000000002a4c318>] driver_probe_device+0x3c/0x11c [<9000000002a4c5dc>] __driver_attach+0x98/0x18c [<9000000002a49ca0>] bus_for_each_dev+0x80/0xe0 [<9000000002a4b0dc>] bus_add_driver+0xfc/0x1ec [<9000000002a4d4a8>] driver_register+0x68/0x134 [<90000000020f0110>] do_one_initcall+0x50/0x188 [<9000000003150f00>] kernel_init_freeable+0x224/0x294 [<90000000030240fc>] kernel_init+0x20/0x110 [<90000000020f1568>] ret_from_kernel_thread+0xc/0xa4 Fixes: acc0ccffec50 ("clk: clk-loongson2: add clock controller driver support") Cc: stable@vger.kernel.org Cc: Yinbo Zhu Signed-off-by: Binbin Zhou Link: https://lore.kernel.org/r/20230524014924.2869051-1-zhoubinbin@loongson.cn Signed-off-by: Stephen Boyd --- drivers/clk/clk-loongson2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-loongson2.c b/drivers/clk/clk-loongson2.c index 70ae1dd2e474..bacdcbb287ac 100644 --- a/drivers/clk/clk-loongson2.c +++ b/drivers/clk/clk-loongson2.c @@ -40,7 +40,7 @@ static struct clk_hw *loongson2_clk_register(struct device *dev, { int ret; struct clk_hw *hw; - struct clk_init_data init; + struct clk_init_data init = { }; hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL); if (!hw) From 3e6ac852fbc71a234de24b5455086f6b98d3d958 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 30 May 2023 17:17:20 +0100 Subject: [PATCH 700/934] usb: gadget: udc: renesas_usb3: Fix RZ/V2M {modprobe,bind} error Currently {modprobe, bind} after {rmmod, unbind} results in probe failure. genirq: Flags mismatch irq 22. 00000004 (85070400.usb3drd) vs. 00000004 (85070400.usb3drd) renesas_usb3: probe of 85070000.usb3peri failed with error -16 The reason is, it is trying to register an interrupt handler for the same IRQ twice. The devm_request_irq() was called with the parent device. So the interrupt handler won't be unregistered when the usb3-peri device is unbound. Fix this issue by replacing "parent dev"->"dev" as the irq resource is managed by this driver. Fixes: 9cad72dfc556 ("usb: gadget: Add support for RZ/V2M USB3DRD driver") Cc: stable Signed-off-by: Biju Das Reviewed-by: Geert Uytterhoeven Message-ID: <20230530161720.179927-1-biju.das.jz@bp.renesas.com> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/renesas_usb3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index aac8bc185afa..eb008e873361 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2877,9 +2877,9 @@ static int renesas_usb3_probe(struct platform_device *pdev) struct rzv2m_usb3drd *ddata = dev_get_drvdata(pdev->dev.parent); usb3->drd_reg = ddata->reg; - ret = devm_request_irq(ddata->dev, ddata->drd_irq, + ret = devm_request_irq(&pdev->dev, ddata->drd_irq, renesas_usb3_otg_irq, 0, - dev_name(ddata->dev), usb3); + dev_name(&pdev->dev), usb3); if (ret < 0) return ret; } From 00f8205ffcf112dcef14f8151d78075d38d22c08 Mon Sep 17 00:00:00 2001 From: Elson Roy Serrao Date: Thu, 1 Jun 2023 14:27:30 -0700 Subject: [PATCH 701/934] usb: dwc3: gadget: Reset num TRBs before giving back the request Consider a scenario where cable disconnect happens when there is an active usb reqest queued to the UDC. As part of the disconnect we would issue an end transfer with no interrupt-on-completion before giving back this request. Since we are giving back the request without skipping TRBs the num_trbs field of dwc3_request still holds the stale value previously used. Function drivers re-use same request for a given bind-unbind session and hence their dwc3_request context gets preserved across cable disconnect/connect. When such a request gets re-queued after cable connect, we would increase the num_trbs field on top of the previous stale value thus incorrectly representing the number of TRBs used. Fix this by resetting num_trbs field before giving back the request. Fixes: 09fe1f8d7e2f ("usb: dwc3: gadget: track number of TRBs per request") Cc: stable Signed-off-by: Elson Roy Serrao Acked-by: Thinh Nguyen Message-ID: <1685654850-8468-1-git-send-email-quic_eserrao@quicinc.com> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d831f5acf7b5..b78599dd705c 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -198,6 +198,7 @@ static void dwc3_gadget_del_and_unmap_request(struct dwc3_ep *dep, list_del(&req->list); req->remaining = 0; req->needs_extra_trb = false; + req->num_trbs = 0; if (req->request.status == -EINPROGRESS) req->request.status = status; From d2d69354226de0b333d4405981f3d9c41ba8430a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 7 Jun 2023 12:05:39 +0200 Subject: [PATCH 702/934] USB: dwc3: qcom: fix NULL-deref on suspend The Qualcomm dwc3 glue driver is currently accessing the driver data of the child core device during suspend and on wakeup interrupts. This is clearly a bad idea as the child may not have probed yet or could have been unbound from its driver. The first such layering violation was part of the initial version of the driver, but this was later made worse when the hack that accesses the driver data of the grand child xhci device to configure the wakeup interrupts was added. Fixing this properly is not that easily done, so add a sanity check to make sure that the child driver data is non-NULL before dereferencing it for now. Note that this relies on subtleties like the fact that driver core is making sure that the parent is not suspended while the child is probing. Reported-by: Manivannan Sadhasivam Link: https://lore.kernel.org/all/20230325165217.31069-4-manivannan.sadhasivam@linaro.org/ Fixes: d9152161b4bf ("usb: dwc3: Add Qualcomm DWC3 glue layer driver") Fixes: 6895ea55c385 ("usb: dwc3: qcom: Configure wakeup interrupts during suspend") Cc: stable@vger.kernel.org # 3.18: a872ab303d5d: "usb: dwc3: qcom: fix use-after-free on runtime-PM wakeup" Cc: Sandeep Maheswaram Cc: Krishna Kurapati Signed-off-by: Johan Hovold Acked-by: Thinh Nguyen Reviewed-by: Manivannan Sadhasivam Message-ID: <20230607100540.31045-2-johan+linaro@kernel.org> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-qcom.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 959fc925ca7c..79b22abf9727 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -308,7 +308,16 @@ static void dwc3_qcom_interconnect_exit(struct dwc3_qcom *qcom) /* Only usable in contexts where the role can not change. */ static bool dwc3_qcom_is_host(struct dwc3_qcom *qcom) { - struct dwc3 *dwc = platform_get_drvdata(qcom->dwc3); + struct dwc3 *dwc; + + /* + * FIXME: Fix this layering violation. + */ + dwc = platform_get_drvdata(qcom->dwc3); + + /* Core driver may not have probed yet. */ + if (!dwc) + return false; return dwc->xhci; } From e3dbb657571509044be15184a13134fa7c1fdca1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 7 Jun 2023 12:05:40 +0200 Subject: [PATCH 703/934] USB: dwc3: fix use-after-free on core driver unbind Some dwc3 glue drivers are currently accessing the driver data of the child core device directly, which is clearly a bad idea as the child may not have probed yet or may have been unbound from its driver. As a workaround until the glue drivers have been fixed, clear the driver data pointer before allowing the glue parent device to runtime suspend to prevent its driver from accessing data that has been freed during unbind. Fixes: 6dd2565989b4 ("usb: dwc3: add imx8mp dwc3 glue layer driver") Fixes: 6895ea55c385 ("usb: dwc3: qcom: Configure wakeup interrupts during suspend") Cc: stable@vger.kernel.org # 5.12 Cc: Li Jun Cc: Sandeep Maheswaram Cc: Krishna Kurapati Signed-off-by: Johan Hovold Acked-by: Thinh Nguyen Reviewed-by: Manivannan Sadhasivam Message-ID: <20230607100540.31045-3-johan+linaro@kernel.org> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 7b2ce013cc5b..d68958e151a7 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1929,6 +1929,11 @@ static int dwc3_remove(struct platform_device *pdev) pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); + /* + * HACK: Clear the driver data, which is currently accessed by parent + * glue drivers, before allowing the parent to suspend. + */ + platform_set_drvdata(pdev, NULL); pm_runtime_set_suspended(&pdev->dev); dwc3_free_event_buffers(dwc); From c4a8bfabefed706bb9150867db528ceefd5cb5fe Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 6 Jun 2023 14:58:02 +0300 Subject: [PATCH 704/934] usb: typec: ucsi: Fix command cancellation The Cancel command was passed to the write callback as the offset instead of as the actual command which caused NULL pointer dereference. Reported-by: Stephan Bolten Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217517 Fixes: 094902bc6a3c ("usb: typec: ucsi: Always cancel the command if PPM reports BUSY condition") Cc: stable@vger.kernel.org Signed-off-by: Heikki Krogerus Message-ID: <20230606115802.79339-1-heikki.krogerus@linux.intel.com> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 2b472ec01dc4..b664ecbb798b 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -132,10 +132,8 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) if (ret) return ret; - if (cci & UCSI_CCI_BUSY) { - ucsi->ops->async_write(ucsi, UCSI_CANCEL, NULL, 0); - return -EBUSY; - } + if (cmd != UCSI_CANCEL && cci & UCSI_CCI_BUSY) + return ucsi_exec_command(ucsi, UCSI_CANCEL); if (!(cci & UCSI_CCI_COMMAND_COMPLETE)) return -EIO; @@ -149,6 +147,11 @@ static int ucsi_exec_command(struct ucsi *ucsi, u64 cmd) return ucsi_read_error(ucsi); } + if (cmd == UCSI_CANCEL && cci & UCSI_CCI_CANCEL_COMPLETE) { + ret = ucsi_acknowledge_command(ucsi); + return ret ? ret : -EBUSY; + } + return UCSI_CCI_LENGTH(cci); } From 92c9c3baad6b1fd584fbabeaa4756f9b77926cb5 Mon Sep 17 00:00:00 2001 From: Pavan Holla Date: Wed, 7 Jun 2023 19:33:26 +0000 Subject: [PATCH 705/934] usb: typec: Fix fast_role_swap_current show function The current implementation mistakenly performs a & operation on the output of sysfs_emit. This patch performs the & operation before calling sysfs_emit. Fixes: 662a60102c12 ("usb: typec: Separate USB Power Delivery from USB Type-C") Cc: stable Reported-by: Benson Leung Signed-off-by: Pavan Holla Reviewed-by: Heikki Krogerus Reviewed-by: Benson Leung Message-ID: <20230607193328.3359487-1-pholla@chromium.org> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/pd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/pd.c b/drivers/usb/typec/pd.c index 0bcde1ff4d39..8cc66e4467c4 100644 --- a/drivers/usb/typec/pd.c +++ b/drivers/usb/typec/pd.c @@ -95,7 +95,7 @@ peak_current_show(struct device *dev, struct device_attribute *attr, char *buf) static ssize_t fast_role_swap_current_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "%u\n", to_pdo(dev)->pdo >> PDO_FIXED_FRS_CURR_SHIFT) & 3; + return sysfs_emit(buf, "%u\n", (to_pdo(dev)->pdo >> PDO_FIXED_FRS_CURR_SHIFT) & 3); } static DEVICE_ATTR_RO(fast_role_swap_current); From 50966da807c81c5eb3bdfd392990fe0bba94d1ee Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 9 Jun 2023 01:02:26 +0000 Subject: [PATCH 706/934] usb: gadget: udc: core: Offload usb_udc_vbus_handler processing usb_udc_vbus_handler() can be invoked from interrupt context by irq handlers of the gadget drivers, however, usb_udc_connect_control() has to run in non-atomic context due to the following: a. Some of the gadget driver implementations expect the ->pullup callback to be invoked in non-atomic context. b. usb_gadget_disconnect() acquires udc_lock which is a mutex. Hence offload invocation of usb_udc_connect_control() to workqueue. UDC should not be pulled up unless gadget driver is bound. The new flag "allow_connect" is now set by gadget_bind_driver() and cleared by gadget_unbind_driver(). This prevents work item to pull up the gadget even if queued when the gadget driver is already unbound. Cc: stable@vger.kernel.org Fixes: 1016fc0c096c ("USB: gadget: Fix obscure lockdep violation for udc_mutex") Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Alan Stern Message-ID: <20230609010227.978661-1-badhri@google.com> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 52e6d2e84e35..d2e4f78c53e3 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -37,6 +37,9 @@ static const struct bus_type gadget_bus_type; * @vbus: for udcs who care about vbus status, this value is real vbus status; * for udcs who do not care about vbus status, this value is always true * @started: the UDC's started state. True if the UDC had started. + * @allow_connect: Indicates whether UDC is allowed to be pulled up. + * Set/cleared by gadget_(un)bind_driver() after gadget driver is bound or + * unbound. * * This represents the internal data structure which is used by the UDC-class * to hold information about udc driver and gadget together. @@ -48,6 +51,8 @@ struct usb_udc { struct list_head list; bool vbus; bool started; + bool allow_connect; + struct work_struct vbus_work; }; static struct class *udc_class; @@ -706,7 +711,7 @@ int usb_gadget_connect(struct usb_gadget *gadget) goto out; } - if (gadget->deactivated) { + if (gadget->deactivated || !gadget->udc->allow_connect) { /* * If gadget is deactivated we only save new state. * Gadget will be connected automatically after activation. @@ -1086,6 +1091,13 @@ static void usb_udc_connect_control(struct usb_udc *udc) usb_gadget_disconnect(udc->gadget); } +static void vbus_event_work(struct work_struct *work) +{ + struct usb_udc *udc = container_of(work, struct usb_udc, vbus_work); + + usb_udc_connect_control(udc); +} + /** * usb_udc_vbus_handler - updates the udc core vbus status, and try to * connect or disconnect gadget @@ -1094,6 +1106,14 @@ static void usb_udc_connect_control(struct usb_udc *udc) * * The udc driver calls it when it wants to connect or disconnect gadget * according to vbus status. + * + * This function can be invoked from interrupt context by irq handlers of + * the gadget drivers, however, usb_udc_connect_control() has to run in + * non-atomic context due to the following: + * a. Some of the gadget driver implementations expect the ->pullup + * callback to be invoked in non-atomic context. + * b. usb_gadget_disconnect() acquires udc_lock which is a mutex. + * Hence offload invocation of usb_udc_connect_control() to workqueue. */ void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status) { @@ -1101,7 +1121,7 @@ void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status) if (udc) { udc->vbus = status; - usb_udc_connect_control(udc); + schedule_work(&udc->vbus_work); } } EXPORT_SYMBOL_GPL(usb_udc_vbus_handler); @@ -1328,6 +1348,7 @@ int usb_add_gadget(struct usb_gadget *gadget) mutex_lock(&udc_lock); list_add_tail(&udc->list, &udc_list); mutex_unlock(&udc_lock); + INIT_WORK(&udc->vbus_work, vbus_event_work); ret = device_add(&udc->dev); if (ret) @@ -1459,6 +1480,7 @@ void usb_del_gadget(struct usb_gadget *gadget) flush_work(&gadget->work); device_del(&gadget->dev); ida_free(&gadget_id_numbers, gadget->id_number); + cancel_work_sync(&udc->vbus_work); device_unregister(&udc->dev); } EXPORT_SYMBOL_GPL(usb_del_gadget); @@ -1527,6 +1549,7 @@ static int gadget_bind_driver(struct device *dev) if (ret) goto err_start; usb_gadget_enable_async_callbacks(udc); + udc->allow_connect = true; usb_udc_connect_control(udc); kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); @@ -1558,6 +1581,8 @@ static void gadget_unbind_driver(struct device *dev) kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); + udc->allow_connect = false; + cancel_work_sync(&udc->vbus_work); usb_gadget_disconnect(gadget); usb_gadget_disable_async_callbacks(udc); if (gadget->irq) From 286d9975a838d0a54da049765fa1d1fb96b89682 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Fri, 9 Jun 2023 01:02:27 +0000 Subject: [PATCH 707/934] usb: gadget: udc: core: Prevent soft_connect_store() race usb_udc_connect_control(), soft_connect_store() and usb_gadget_deactivate() can potentially race against each other to invoke usb_gadget_connect()/usb_gadget_disconnect(). To prevent this, guard udc->started, gadget->allow_connect, gadget->deactivate and gadget->connect with connect_lock so that ->pullup() is only invoked when the gadget is bound, started and not deactivated. The routines usb_gadget_connect_locked(), usb_gadget_disconnect_locked(), usb_udc_connect_control_locked(), usb_gadget_udc_start_locked(), usb_gadget_udc_stop_locked() are called with this lock held. An earlier version of this commit was reverted due to the crash reported in https://lore.kernel.org/all/ZF4BvgsOyoKxdPFF@francesco-nb.int.toradex.com/. commit 16737e78d190 ("usb: gadget: udc: core: Offload usb_udc_vbus_handler processing") addresses the crash reported. Cc: stable@vger.kernel.org Fixes: 628ef0d273a6 ("usb: udc: add usb_udc_vbus_handler") Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Alan Stern Message-ID: <20230609010227.978661-2-badhri@google.com> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 155 +++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 49 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index d2e4f78c53e3..83fd1de14784 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -40,6 +40,11 @@ static const struct bus_type gadget_bus_type; * @allow_connect: Indicates whether UDC is allowed to be pulled up. * Set/cleared by gadget_(un)bind_driver() after gadget driver is bound or * unbound. + * @connect_lock: protects udc->started, gadget->connect, + * gadget->allow_connect and gadget->deactivate. The routines + * usb_gadget_connect_locked(), usb_gadget_disconnect_locked(), + * usb_udc_connect_control_locked(), usb_gadget_udc_start_locked() and + * usb_gadget_udc_stop_locked() are called with this lock held. * * This represents the internal data structure which is used by the UDC-class * to hold information about udc driver and gadget together. @@ -53,6 +58,7 @@ struct usb_udc { bool started; bool allow_connect; struct work_struct vbus_work; + struct mutex connect_lock; }; static struct class *udc_class; @@ -692,17 +698,8 @@ out: } EXPORT_SYMBOL_GPL(usb_gadget_vbus_disconnect); -/** - * usb_gadget_connect - software-controlled connect to USB host - * @gadget:the peripheral being connected - * - * Enables the D+ (or potentially D-) pullup. The host will start - * enumerating this gadget when the pullup is active and a VBUS session - * is active (the link is powered). - * - * Returns zero on success, else negative errno. - */ -int usb_gadget_connect(struct usb_gadget *gadget) +static int usb_gadget_connect_locked(struct usb_gadget *gadget) + __must_hold(&gadget->udc->connect_lock) { int ret = 0; @@ -711,10 +708,12 @@ int usb_gadget_connect(struct usb_gadget *gadget) goto out; } - if (gadget->deactivated || !gadget->udc->allow_connect) { + if (gadget->deactivated || !gadget->udc->allow_connect || !gadget->udc->started) { /* - * If gadget is deactivated we only save new state. - * Gadget will be connected automatically after activation. + * If the gadget isn't usable (because it is deactivated, + * unbound, or not yet started), we only save the new state. + * The gadget will be connected automatically when it is + * activated/bound/started. */ gadget->connected = true; goto out; @@ -729,22 +728,31 @@ out: return ret; } -EXPORT_SYMBOL_GPL(usb_gadget_connect); /** - * usb_gadget_disconnect - software-controlled disconnect from USB host - * @gadget:the peripheral being disconnected + * usb_gadget_connect - software-controlled connect to USB host + * @gadget:the peripheral being connected * - * Disables the D+ (or potentially D-) pullup, which the host may see - * as a disconnect (when a VBUS session is active). Not all systems - * support software pullup controls. - * - * Following a successful disconnect, invoke the ->disconnect() callback - * for the current gadget driver so that UDC drivers don't need to. + * Enables the D+ (or potentially D-) pullup. The host will start + * enumerating this gadget when the pullup is active and a VBUS session + * is active (the link is powered). * * Returns zero on success, else negative errno. */ -int usb_gadget_disconnect(struct usb_gadget *gadget) +int usb_gadget_connect(struct usb_gadget *gadget) +{ + int ret; + + mutex_lock(&gadget->udc->connect_lock); + ret = usb_gadget_connect_locked(gadget); + mutex_unlock(&gadget->udc->connect_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(usb_gadget_connect); + +static int usb_gadget_disconnect_locked(struct usb_gadget *gadget) + __must_hold(&gadget->udc->connect_lock) { int ret = 0; @@ -756,7 +764,7 @@ int usb_gadget_disconnect(struct usb_gadget *gadget) if (!gadget->connected) goto out; - if (gadget->deactivated) { + if (gadget->deactivated || !gadget->udc->started) { /* * If gadget is deactivated we only save new state. * Gadget will stay disconnected after activation. @@ -779,6 +787,30 @@ out: return ret; } + +/** + * usb_gadget_disconnect - software-controlled disconnect from USB host + * @gadget:the peripheral being disconnected + * + * Disables the D+ (or potentially D-) pullup, which the host may see + * as a disconnect (when a VBUS session is active). Not all systems + * support software pullup controls. + * + * Following a successful disconnect, invoke the ->disconnect() callback + * for the current gadget driver so that UDC drivers don't need to. + * + * Returns zero on success, else negative errno. + */ +int usb_gadget_disconnect(struct usb_gadget *gadget) +{ + int ret; + + mutex_lock(&gadget->udc->connect_lock); + ret = usb_gadget_disconnect_locked(gadget); + mutex_unlock(&gadget->udc->connect_lock); + + return ret; +} EXPORT_SYMBOL_GPL(usb_gadget_disconnect); /** @@ -796,13 +828,14 @@ int usb_gadget_deactivate(struct usb_gadget *gadget) { int ret = 0; + mutex_lock(&gadget->udc->connect_lock); if (gadget->deactivated) - goto out; + goto unlock; if (gadget->connected) { - ret = usb_gadget_disconnect(gadget); + ret = usb_gadget_disconnect_locked(gadget); if (ret) - goto out; + goto unlock; /* * If gadget was being connected before deactivation, we want @@ -812,7 +845,8 @@ int usb_gadget_deactivate(struct usb_gadget *gadget) } gadget->deactivated = true; -out: +unlock: + mutex_unlock(&gadget->udc->connect_lock); trace_usb_gadget_deactivate(gadget, ret); return ret; @@ -832,8 +866,9 @@ int usb_gadget_activate(struct usb_gadget *gadget) { int ret = 0; + mutex_lock(&gadget->udc->connect_lock); if (!gadget->deactivated) - goto out; + goto unlock; gadget->deactivated = false; @@ -842,9 +877,11 @@ int usb_gadget_activate(struct usb_gadget *gadget) * while it was being deactivated, we call usb_gadget_connect(). */ if (gadget->connected) - ret = usb_gadget_connect(gadget); + ret = usb_gadget_connect_locked(gadget); + mutex_unlock(&gadget->udc->connect_lock); -out: +unlock: + mutex_unlock(&gadget->udc->connect_lock); trace_usb_gadget_activate(gadget, ret); return ret; @@ -1083,19 +1120,22 @@ EXPORT_SYMBOL_GPL(usb_gadget_set_state); /* ------------------------------------------------------------------------- */ -static void usb_udc_connect_control(struct usb_udc *udc) +/* Acquire connect_lock before calling this function. */ +static void usb_udc_connect_control_locked(struct usb_udc *udc) __must_hold(&udc->connect_lock) { if (udc->vbus) - usb_gadget_connect(udc->gadget); + usb_gadget_connect_locked(udc->gadget); else - usb_gadget_disconnect(udc->gadget); + usb_gadget_disconnect_locked(udc->gadget); } static void vbus_event_work(struct work_struct *work) { struct usb_udc *udc = container_of(work, struct usb_udc, vbus_work); - usb_udc_connect_control(udc); + mutex_lock(&udc->connect_lock); + usb_udc_connect_control_locked(udc); + mutex_unlock(&udc->connect_lock); } /** @@ -1144,7 +1184,7 @@ void usb_gadget_udc_reset(struct usb_gadget *gadget, EXPORT_SYMBOL_GPL(usb_gadget_udc_reset); /** - * usb_gadget_udc_start - tells usb device controller to start up + * usb_gadget_udc_start_locked - tells usb device controller to start up * @udc: The UDC to be started * * This call is issued by the UDC Class driver when it's about @@ -1155,8 +1195,11 @@ EXPORT_SYMBOL_GPL(usb_gadget_udc_reset); * necessary to have it powered on. * * Returns zero on success, else negative errno. + * + * Caller should acquire connect_lock before invoking this function. */ -static inline int usb_gadget_udc_start(struct usb_udc *udc) +static inline int usb_gadget_udc_start_locked(struct usb_udc *udc) + __must_hold(&udc->connect_lock) { int ret; @@ -1173,7 +1216,7 @@ static inline int usb_gadget_udc_start(struct usb_udc *udc) } /** - * usb_gadget_udc_stop - tells usb device controller we don't need it anymore + * usb_gadget_udc_stop_locked - tells usb device controller we don't need it anymore * @udc: The UDC to be stopped * * This call is issued by the UDC Class driver after calling @@ -1182,8 +1225,11 @@ static inline int usb_gadget_udc_start(struct usb_udc *udc) * The details are implementation specific, but it can go as * far as powering off UDC completely and disable its data * line pullups. + * + * Caller should acquire connect lock before invoking this function. */ -static inline void usb_gadget_udc_stop(struct usb_udc *udc) +static inline void usb_gadget_udc_stop_locked(struct usb_udc *udc) + __must_hold(&udc->connect_lock) { if (!udc->started) { dev_err(&udc->dev, "UDC had already stopped\n"); @@ -1342,6 +1388,7 @@ int usb_add_gadget(struct usb_gadget *gadget) udc->gadget = gadget; gadget->udc = udc; + mutex_init(&udc->connect_lock); udc->started = false; @@ -1545,12 +1592,16 @@ static int gadget_bind_driver(struct device *dev) if (ret) goto err_bind; - ret = usb_gadget_udc_start(udc); - if (ret) + mutex_lock(&udc->connect_lock); + ret = usb_gadget_udc_start_locked(udc); + if (ret) { + mutex_unlock(&udc->connect_lock); goto err_start; + } usb_gadget_enable_async_callbacks(udc); udc->allow_connect = true; - usb_udc_connect_control(udc); + usb_udc_connect_control_locked(udc); + mutex_unlock(&udc->connect_lock); kobject_uevent(&udc->dev.kobj, KOBJ_CHANGE); return 0; @@ -1583,12 +1634,14 @@ static void gadget_unbind_driver(struct device *dev) udc->allow_connect = false; cancel_work_sync(&udc->vbus_work); - usb_gadget_disconnect(gadget); + mutex_lock(&udc->connect_lock); + usb_gadget_disconnect_locked(gadget); usb_gadget_disable_async_callbacks(udc); if (gadget->irq) synchronize_irq(gadget->irq); udc->driver->unbind(gadget); - usb_gadget_udc_stop(udc); + usb_gadget_udc_stop_locked(udc); + mutex_unlock(&udc->connect_lock); mutex_lock(&udc_lock); driver->is_bound = false; @@ -1674,11 +1727,15 @@ static ssize_t soft_connect_store(struct device *dev, } if (sysfs_streq(buf, "connect")) { - usb_gadget_udc_start(udc); - usb_gadget_connect(udc->gadget); + mutex_lock(&udc->connect_lock); + usb_gadget_udc_start_locked(udc); + usb_gadget_connect_locked(udc->gadget); + mutex_unlock(&udc->connect_lock); } else if (sysfs_streq(buf, "disconnect")) { - usb_gadget_disconnect(udc->gadget); - usb_gadget_udc_stop(udc); + mutex_lock(&udc->connect_lock); + usb_gadget_disconnect_locked(udc->gadget); + usb_gadget_udc_stop_locked(udc); + mutex_unlock(&udc->connect_lock); } else { dev_err(dev, "unsupported command '%s'\n", buf); ret = -EINVAL; From 11d24327c2d7ad7f24fcc44fb00e1fa91ebf6525 Mon Sep 17 00:00:00 2001 From: Ratchanan Srirattanamet Date: Wed, 24 May 2023 04:11:56 +0700 Subject: [PATCH 708/934] drm/nouveau: don't detect DSM for non-NVIDIA device The call site of nouveau_dsm_pci_probe() uses single set of output variables for all invocations. So, we must not write anything to them unless it's an NVIDIA device. Otherwise, if we are called with another device after the NVIDIA device, we'll clober the result of the NVIDIA device. For example, if the other device doesn't have _PR3 resources, the detection later would miss the presence of power resource support, and the rest of the code will keep using Optimus DSM, breaking power management for that machine. Also, because we're detecting NVIDIA's DSM, it doesn't make sense to run this detection on a non-NVIDIA device anyway. Thus, check at the beginning of the detection code if this is an NVIDIA card, and just return if it isn't. This, together with commit d22915d22ded ("drm/nouveau/devinit/tu102-: wait for GFW_BOOT_PROGRESS == COMPLETED") developed independently and landed earlier, fixes runtime power management of the NVIDIA card in Lenovo Legion 5-15ARH05. Without this patch, the GPU resumption code will "timeout", sometimes hanging userspace. As a bonus, we'll also stop preventing _PR3 usage from the bridge for unrelated devices, which is always nice, I guess. Fixes: ccfc2d5cdb02 ("drm/nouveau: Use generic helper to check _PR3 presence") Signed-off-by: Ratchanan Srirattanamet Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/79 Reviewed-by: Karol Herbst Signed-off-by: Karol Herbst Link: https://patchwork.freedesktop.org/patch/msgid/DM6PR19MB2780805D4BE1E3F9B3AC96D0BC409@DM6PR19MB2780.namprd19.prod.outlook.com --- drivers/gpu/drm/nouveau/nouveau_acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 8cf096f841a9..a2ae8c21e4dc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -220,6 +220,9 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out int optimus_funcs; struct pci_dev *parent_pdev; + if (pdev->vendor != PCI_VENDOR_ID_NVIDIA) + return; + *has_pr3 = false; parent_pdev = pci_upstream_bridge(pdev); if (parent_pdev) { From a82c3df955f8c1c726e4976527aa6ae924a67dd9 Mon Sep 17 00:00:00 2001 From: Robert Hodaszi Date: Fri, 9 Jun 2023 14:13:34 +0200 Subject: [PATCH 709/934] tty: serial: fsl_lpuart: reduce RX watermark to 0 on LS1028A LS1028A is using DMA with LPUART. Having RX watermark set to 1, means DMA transactions are started only after receiving the second character. On other platforms with newer LPUART IP, Receiver Idle Empty function initiates the DMA request after the receiver is idling for 4 characters. But this feature is missing on LS1028A, which is causing a 1-character delay in the RX direction on this platform. Set RX watermark to 0 to initiate RX DMA after each character. Link: https://lore.kernel.org/linux-serial/20230607103459.1222426-1-robert.hodaszi@digi.com/ Fixes: 9ad9df844754 ("tty: serial: fsl_lpuart: Fix the wrong RXWATER setting for rx dma case") Cc: stable Signed-off-by: Robert Hodaszi Message-ID: <20230609121334.1878626-1-robert.hodaszi@digi.com> Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 7486a2b8556c..7fd30fcc10c6 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -310,7 +310,7 @@ static const struct lpuart_soc_data ls1021a_data = { static const struct lpuart_soc_data ls1028a_data = { .devtype = LS1028A_LPUART, .iotype = UPIO_MEM32, - .rx_watermark = 1, + .rx_watermark = 0, }; static struct lpuart_soc_data imx7ulp_data = { From f9fd804aa0a36f15a35ca070ec4c52650876cc29 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 13 Jun 2023 10:34:53 +0100 Subject: [PATCH 710/934] ASoC: tegra: Fix Master Volume Control Commit 3ed2b549b39f ("ALSA: pcm: fix wait_time calculations") corrected the PCM wait_time calculations and in doing so reduced the calculated wait_time. This exposed an issue with the Tegra Master Volume Control (MVC) device where the reduced wait_time caused the MVC to fail. For now fix this by setting the default wait_time for Tegra to be 500ms. Fixes: 3ed2b549b39f ("ALSA: pcm: fix wait_time calculations") Signed-off-by: Jon Hunter Link: https://lore.kernel.org/r/20230613093453.13927-1-jonathanh@nvidia.com Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_pcm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index 468c8e77de21..0b69cebc9a33 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -117,6 +117,9 @@ int tegra_pcm_open(struct snd_soc_component *component, return ret; } + /* Set wait time to 500ms by default */ + substream->wait_time = 500; + return 0; } EXPORT_SYMBOL_GPL(tegra_pcm_open); From 44e46572f0bae431a6092e3cfd2f47bff8b8d18c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 13 Jun 2023 13:22:40 +0200 Subject: [PATCH 711/934] regmap: regcache: Don't sync read-only registers regcache_maple_sync() tries to sync all cached values no matter whether it's writable or not. OTOH, regache_sync_val() does care the wrtability and returns -EIO for a read-only register. This results in an error message like: snd_hda_codec_realtek hdaudioC0D0: Unable to sync register 0x2f0009. -5 and the sync loop is aborted incompletely. This patch adds the writable register check to regcache_sync_val() for addressing the bug above. Note that, although we may add the check in the caller side (regcache_maple_sync()), here we put in regcache_sync_val(), so that a similar case like this can be avoided in future. Fixes: f033c26de5a5 ("regmap: Add maple tree based register cache") Link: https://lore.kernel.org/r/877cs7g6f1.wl-tiwai@suse.de Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20230613112240.3361-1-tiwai@suse.de Signed-off-by: Mark Brown --- drivers/base/regmap/regcache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index 029564695dbb..97c681fcf9f6 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -284,6 +284,9 @@ static bool regcache_reg_needs_sync(struct regmap *map, unsigned int reg, { int ret; + if (!regmap_writeable(map, reg)) + return false; + /* If we don't know the chip just got reset, then sync everything. */ if (!map->no_sync_defaults) return true; From 8c00914e5438e3636f26b4f814b3297ae2a1b9ee Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Wed, 7 Jun 2023 16:18:03 +0800 Subject: [PATCH 712/934] gpiolib: Fix GPIO chip IRQ initialization restriction In case of gpio-regmap, IRQ chip is added by regmap-irq and associated with GPIO chip by gpiochip_irqchip_add_domain(). The initialization flag was not added in gpiochip_irqchip_add_domain(), causing gpiochip_to_irq() to return -EPROBE_DEFER. Fixes: 5467801f1fcb ("gpio: Restrict usage of GPIO chip irq members before initialization") Signed-off-by: Jiawen Wu Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index a7220e04a93e..9ecf93cbd801 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1792,6 +1792,14 @@ int gpiochip_irqchip_add_domain(struct gpio_chip *gc, gc->to_irq = gpiochip_to_irq; gc->irq.domain = domain; + /* + * Using barrier() here to prevent compiler from reordering + * gc->irq.initialized before adding irqdomain. + */ + barrier(); + + gc->irq.initialized = true; + return 0; } EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_domain); From c7753ed71c160f75f92ff5679e9fc22526e56fc5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Apr 2023 10:06:06 -0500 Subject: [PATCH 713/934] dt-bindings: pinctrl: qcom,pmic-mpp: Fix schema for "qcom,paired" The "qcom,paired" schema is all wrong. First, it's a list rather than an object(dictionary). Second, it is missing a required type. The meta-schema normally catches this, but schemas under "$defs" was not getting checked. A fix for that is pending. Fixes: f9a06b810951 ("dt-bindings: pinctrl: qcom,pmic-mpp: Convert qcom pmic mpp bindings to YAML") Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230418150606.1528107-1-robh@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml index c91d3e3a094b..80f960671857 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.yaml @@ -144,8 +144,9 @@ $defs: enum: [0, 1, 2, 3, 4, 5, 6, 7] qcom,paired: - - description: - Indicates that the pin should be operating in paired mode. + type: boolean + description: + Indicates that the pin should be operating in paired mode. required: - pins From 47b3ad6b7842f49d374a01b054a4b1461a621bdc Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Tue, 13 Jun 2023 15:41:46 +0200 Subject: [PATCH 714/934] mmc: mmci: stm32: fix max busy timeout calculation The way that the timeout is currently calculated could lead to a u64 timeout value in mmci_start_command(). This value is then cast in a u32 register that leads to mmc erase failed issue with some SD cards. Fixes: 8266c585f489 ("mmc: mmci: add hardware busy timeout feature") Signed-off-by: Yann Gautier Signed-off-by: Christophe Kerello Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230613134146.418016-1-yann.gautier@foss.st.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index f2b2e8b0574e..696cbef3ff7d 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1735,7 +1735,8 @@ static void mmci_set_max_busy_timeout(struct mmc_host *mmc) return; if (host->variant->busy_timeout && mmc->actual_clock) - max_busy_timeout = ~0UL / (mmc->actual_clock / MSEC_PER_SEC); + max_busy_timeout = U32_MAX / DIV_ROUND_UP(mmc->actual_clock, + MSEC_PER_SEC); mmc->max_busy_timeout = max_busy_timeout; } From c1bcb976d8feb107ff2c12caaf12ac5e70f44d5f Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 6 Jun 2023 11:11:59 +0800 Subject: [PATCH 715/934] gpio: sifive: add missing check for platform_get_irq Add the missing check for platform_get_irq() and return error code if it fails. The returned error code will be dealed with in builtin_platform_driver(sifive_gpio_driver) and the driver will not be registered. Fixes: f52d6d8b43e5 ("gpio: sifive: To get gpio irq offset from device tree data") Signed-off-by: Jiasheng Jiang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sifive.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c index 98939cd4a71e..745e5f67254e 100644 --- a/drivers/gpio/gpio-sifive.c +++ b/drivers/gpio/gpio-sifive.c @@ -221,8 +221,12 @@ static int sifive_gpio_probe(struct platform_device *pdev) return -ENODEV; } - for (i = 0; i < ngpio; i++) - chip->irq_number[i] = platform_get_irq(pdev, i); + for (i = 0; i < ngpio; i++) { + ret = platform_get_irq(pdev, i); + if (ret < 0) + return ret; + chip->irq_number[i] = ret; + } ret = bgpio_init(&chip->gc, dev, 4, chip->base + SIFIVE_GPIO_INPUT_VAL, From 95011f267c44a4d1f9ca1769e8a29ab2c559e004 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 8 Jun 2023 09:24:43 +0800 Subject: [PATCH 716/934] drm/bridge: ti-sn65dsi86: Avoid possible buffer overflow Smatch error:buffer overflow 'ti_sn_bridge_refclk_lut' 5 <= 5. Fixes: cea86c5bb442 ("drm/bridge: ti-sn65dsi86: Implement the pwm_chip") Signed-off-by: Su Hui Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20230608012443.839372-1-suhui@nfschina.com --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 7a748785c545..4676cf2900df 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -298,6 +298,10 @@ static void ti_sn_bridge_set_refclk_freq(struct ti_sn65dsi86 *pdata) if (refclk_lut[i] == refclk_rate) break; + /* avoid buffer overflow and "1" is the default rate in the datasheet. */ + if (i >= refclk_lut_size) + i = 1; + regmap_update_bits(pdata->regmap, SN_DPPLL_SRC_REG, REFCLK_FREQ_MASK, REFCLK_FREQ(i)); From eee43699217504ba69cadefc85c6992df555e33f Mon Sep 17 00:00:00 2001 From: Abe Kohandel Date: Tue, 13 Jun 2023 09:21:03 -0700 Subject: [PATCH 717/934] spi: dw: Replace incorrect spi_get_chipselect with set Commit 445164e8c136 ("spi: dw: Replace spi->chip_select references with function calls") replaced direct access to spi.chip_select with spi_*_chipselect calls but incorrectly replaced a set instance with a get instance, replace the incorrect instance. Fixes: 445164e8c136 ("spi: dw: Replace spi->chip_select references with function calls") Signed-off-by: Abe Kohandel Reviewed-by: Andy Shevchenko Acked-by: Serge Semin Link: https://lore.kernel.org/r/20230613162103.569812-1-abe.kohandel@intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-dw-mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c index 5f2aee69c1c1..15f5e9cb54ad 100644 --- a/drivers/spi/spi-dw-mmio.c +++ b/drivers/spi/spi-dw-mmio.c @@ -274,7 +274,7 @@ static void dw_spi_elba_set_cs(struct spi_device *spi, bool enable) */ spi_set_chipselect(spi, 0, 0); dw_spi_set_cs(spi, enable); - spi_get_chipselect(spi, cs); + spi_set_chipselect(spi, 0, cs); } static int dw_spi_elba_init(struct platform_device *pdev, From 20a2ce87fbaf81e4c3dcb631d738e423959eb320 Mon Sep 17 00:00:00 2001 From: Natalia Petrova Date: Fri, 12 May 2023 14:15:26 +0300 Subject: [PATCH 718/934] drm/nouveau/dp: check for NULL nv_connector->native_mode Add checking for NULL before calling nouveau_connector_detect_depth() in nouveau_connector_get_modes() function because nv_connector->native_mode could be dereferenced there since connector pointer passed to nouveau_connector_detect_depth() and the same value of nv_connector->native_mode is used there. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d4c2c99bdc83 ("drm/nouveau/dp: remove broken display depth function, use the improved one") Signed-off-by: Natalia Petrova Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230512111526.82408-1-n.petrova@fintech.ru --- drivers/gpu/drm/nouveau/nouveau_connector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 086b66b60d91..5dbf025e6873 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -966,7 +966,7 @@ nouveau_connector_get_modes(struct drm_connector *connector) /* Determine display colour depth for everything except LVDS now, * DP requires this before mode_valid() is called. */ - if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS) + if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode) nouveau_connector_detect_depth(connector); /* Find the native mode if this is a digital panel, if we didn't @@ -987,7 +987,7 @@ nouveau_connector_get_modes(struct drm_connector *connector) * "native" mode as some VBIOS tables require us to use the * pixel clock as part of the lookup... */ - if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS) + if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode) nouveau_connector_detect_depth(connector); if (nv_encoder->dcb->type == DCB_OUTPUT_TV) From 1dbcf770cc2d15baf8a1e8174d6fd014a68b45ca Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Wed, 24 May 2023 11:42:19 +0800 Subject: [PATCH 719/934] drm/amdgpu: Reset CP_VMID_PREEMPT after trailing fence signaled When MEC executes unmap_queue for mid command buffer preemption, it will kick the write pointer of the gfx ring, set CP_VMID_PREEMPT to trigger the preemption and wait for CP_VMID_PREEMPT becomes zero after the preemption done. There is a race condition that PFP may excute the resetting command before MEC set CP_VMID_PREEMPT. As a result, hang happens as CP_VMID_PREEMPT is always 0xffff. To avoid this, we send resetting CP_VMID_PREEMPT command after the trailing fence is siganled and update gfx write pointer explicitly. Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.3.x Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2535 --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e7f2b7bf0ff5..6c1e923bc7ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5369,10 +5369,6 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) amdgpu_ring_alloc(ring, 13); gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); - /*reset the CP_VMID_PREEMPT after trailing fence*/ - amdgpu_ring_emit_wreg(ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), - 0x0); /* assert IB preemption, emit the trailing fence */ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, @@ -5395,6 +5391,10 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); } + /*reset the CP_VMID_PREEMPT after trailing fence*/ + amdgpu_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), + 0x0); amdgpu_ring_commit(ring); /* deassert preemption condition */ From 55b94bb8c42464bad3d2217f6874aa1a85664eac Mon Sep 17 00:00:00 2001 From: Natalia Petrova Date: Fri, 12 May 2023 13:33:20 +0300 Subject: [PATCH 720/934] drm/nouveau: add nv_encoder pointer check for NULL Pointer nv_encoder could be dereferenced at nouveau_connector.c in case it's equal to NULL by jumping to goto label. This patch adds a NULL-check to avoid it. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 3195c5f9784a ("drm/nouveau: set encoder for lvds") Signed-off-by: Natalia Petrova Reviewed-by: Lyude Paul [Fixed patch title] Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230512103320.82234-1-n.petrova@fintech.ru --- drivers/gpu/drm/nouveau/nouveau_connector.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 5dbf025e6873..f75c6f09dd2a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -730,7 +730,8 @@ out: #endif nouveau_connector_set_edid(nv_connector, edid); - nouveau_connector_set_encoder(connector, nv_encoder); + if (nv_encoder) + nouveau_connector_set_encoder(connector, nv_encoder); return status; } From 94034b306ddde4a4a9c1a597ae7f61f04b710dc7 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Wed, 24 May 2023 16:51:32 +0800 Subject: [PATCH 721/934] drm/amdgpu: Program gds backup address as zero if no gds allocated It is firmware requirement to set gds_backup_addrlo and gds_backup_addrhi of DE meta both zero if no gds partition is allocated for the frame. Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.3.x --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6c1e923bc7ce..bb7dd39b6d1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -755,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); @@ -5127,7 +5127,8 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, gfx_v9_0_ring_emit_de_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? - true : false); + true : false, + job->gds_size > 0 && job->gds_base != 0); } amdgpu_ring_write(ring, header); @@ -5402,7 +5403,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) return r; } -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds) { struct amdgpu_device *adev = ring->adev; struct v9_de_ib_state de_payload = {0}; @@ -5433,8 +5434,10 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) PAGE_SIZE); } - de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); - de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + if (usegds) { + de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + } cnt = (sizeof(de_payload) >> 2) + 4 - 2; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); From 87af86ae89963c227a3beb4d914f3dc7959a690e Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Thu, 25 May 2023 16:52:55 +0800 Subject: [PATCH 722/934] drm/amdgpu: Modify indirect buffer packages for resubmission When the preempted IB frame resubmitted to cp, we need to modify the frame data including: 1. set PRE_RESUME 1 in CONTEXT_CONTROL. 2. use meta data(DE and CE) read from CSA in WRITE_DATA. Add functions to save the location the first time IBs emitted and callback to patch the package when resubmission happens. Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.3.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 18 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 9 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c | 60 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h | 15 +++++ 4 files changed, 102 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index dc474b809604..49de3a3eebc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -581,3 +581,21 @@ void amdgpu_ring_ib_end(struct amdgpu_ring *ring) if (ring->is_sw_ring) amdgpu_sw_ring_ib_end(ring); } + +void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring) +{ + if (ring->is_sw_ring) + amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CONTROL); +} + +void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring) +{ + if (ring->is_sw_ring) + amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_CE); +} + +void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring) +{ + if (ring->is_sw_ring) + amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index d8749444b689..2474cb71e476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -227,6 +227,9 @@ struct amdgpu_ring_funcs { int (*preempt_ib)(struct amdgpu_ring *ring); void (*emit_mem_sync)(struct amdgpu_ring *ring); void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable); + void (*patch_cntl)(struct amdgpu_ring *ring, unsigned offset); + void (*patch_ce)(struct amdgpu_ring *ring, unsigned offset); + void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); }; struct amdgpu_ring { @@ -318,10 +321,16 @@ struct amdgpu_ring { #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) +#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) +#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) +#define amdgpu_ring_patch_de(r, o) ((r)->funcs->patch_de((r), (o))) int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); void amdgpu_ring_ib_begin(struct amdgpu_ring *ring); void amdgpu_ring_ib_end(struct amdgpu_ring *ring); +void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring); +void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring); +void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring); void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index 62079f0e3ee8..73516abef662 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -105,6 +105,16 @@ static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux) amdgpu_fence_update_start_timestamp(e->ring, chunk->sync_seq, ktime_get()); + if (chunk->sync_seq == + le32_to_cpu(*(e->ring->fence_drv.cpu_addr + 2))) { + if (chunk->cntl_offset <= e->ring->buf_mask) + amdgpu_ring_patch_cntl(e->ring, + chunk->cntl_offset); + if (chunk->ce_offset <= e->ring->buf_mask) + amdgpu_ring_patch_ce(e->ring, chunk->ce_offset); + if (chunk->de_offset <= e->ring->buf_mask) + amdgpu_ring_patch_de(e->ring, chunk->de_offset); + } amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring, chunk->start, chunk->end); @@ -407,6 +417,17 @@ void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) amdgpu_ring_mux_end_ib(mux, ring); } +void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + unsigned offset; + + offset = ring->wptr & ring->buf_mask; + + amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type); +} + void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) { struct amdgpu_mux_entry *e; @@ -429,6 +450,10 @@ void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *r } chunk->start = ring->wptr; + /* the initialized value used to check if they are set by the ib submission*/ + chunk->cntl_offset = ring->buf_mask + 1; + chunk->de_offset = ring->buf_mask + 1; + chunk->ce_offset = ring->buf_mask + 1; list_add_tail(&chunk->entry, &e->list); } @@ -454,6 +479,41 @@ static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct a } } +void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, + struct amdgpu_ring *ring, u64 offset, + enum amdgpu_ring_mux_offset_type type) +{ + struct amdgpu_mux_entry *e; + struct amdgpu_mux_chunk *chunk; + + e = amdgpu_ring_mux_sw_entry(mux, ring); + if (!e) { + DRM_ERROR("cannot find entry!\n"); + return; + } + + chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry); + if (!chunk) { + DRM_ERROR("cannot find chunk!\n"); + return; + } + + switch (type) { + case AMDGPU_MUX_OFFSET_TYPE_CONTROL: + chunk->cntl_offset = offset; + break; + case AMDGPU_MUX_OFFSET_TYPE_DE: + chunk->de_offset = offset; + break; + case AMDGPU_MUX_OFFSET_TYPE_CE: + chunk->ce_offset = offset; + break; + default: + DRM_ERROR("invalid type (%d)\n", type); + break; + } +} + void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) { struct amdgpu_mux_entry *e; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h index 4be45fc14954..b22d4fb2a847 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h @@ -50,6 +50,12 @@ struct amdgpu_mux_entry { struct list_head list; }; +enum amdgpu_ring_mux_offset_type { + AMDGPU_MUX_OFFSET_TYPE_CONTROL, + AMDGPU_MUX_OFFSET_TYPE_DE, + AMDGPU_MUX_OFFSET_TYPE_CE, +}; + struct amdgpu_ring_mux { struct amdgpu_ring *real_ring; @@ -72,12 +78,18 @@ struct amdgpu_ring_mux { * @sync_seq: the fence seqno related with the saved IB. * @start:- start location on the software ring. * @end:- end location on the software ring. + * @control_offset:- the PRE_RESUME bit position used for resubmission. + * @de_offset:- the anchor in write_data for de meta of resubmission. + * @ce_offset:- the anchor in write_data for ce meta of resubmission. */ struct amdgpu_mux_chunk { struct list_head entry; uint32_t sync_seq; u64 start; u64 end; + u64 cntl_offset; + u64 de_offset; + u64 ce_offset; }; int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, @@ -89,6 +101,8 @@ u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ri u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, + u64 offset, enum amdgpu_ring_mux_offset_type type); bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux); u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring); @@ -97,6 +111,7 @@ void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring); +void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring *ring, enum amdgpu_ring_mux_offset_type type); const char *amdgpu_sw_ring_name(int idx); unsigned int amdgpu_sw_ring_priority(int idx); From 5b711e7f9c73e5ff44d6ac865711d9a05c2a0360 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Thu, 25 May 2023 18:42:15 +0800 Subject: [PATCH 723/934] drm/amdgpu: Implement gfx9 patch functions for resubmission Patch the packages including CONTEXT_CONTROL and WRITE_DATA for gfx9 during the resubmission scenario. Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.3.x --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 80 +++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index bb7dd39b6d1a..a674c8a58dc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5139,9 +5139,83 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, #endif lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_ib_on_emit_cntl(ring); amdgpu_ring_write(ring, control); } +static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring, + unsigned offset) +{ + u32 control = ring->ring[offset]; + + control |= INDIRECT_BUFFER_PRE_RESUME(1); + ring->ring[offset] = control; +} + +static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, + unsigned offset) +{ + struct amdgpu_device *adev = ring->adev; + void *ce_payload_cpu_addr; + uint64_t payload_offset, payload_size; + + payload_size = sizeof(struct v9_ce_ib_state); + + if (ring->is_mes_queue) { + payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); + } else { + payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; + } + + if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { + memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); + } else { + memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, + (ring->buf_mask + 1 - offset) << 2); + payload_size -= (ring->buf_mask + 1 - offset) << 2; + memcpy((void *)&ring->ring[0], + ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), + payload_size); + } +} + +static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, + unsigned offset) +{ + struct amdgpu_device *adev = ring->adev; + void *de_payload_cpu_addr; + uint64_t payload_offset, payload_size; + + payload_size = sizeof(struct v9_de_ib_state); + + if (ring->is_mes_queue) { + payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); + } else { + payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; + } + + if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { + memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); + } else { + memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, + (ring->buf_mask + 1 - offset) << 2); + payload_size -= (ring->buf_mask + 1 - offset) << 2; + memcpy((void *)&ring->ring[0], + de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), + payload_size); + } +} + static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, @@ -5337,6 +5411,8 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); + amdgpu_ring_ib_on_emit_ce(ring); + if (resume) amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, sizeof(ce_payload) >> 2); @@ -5448,6 +5524,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); + amdgpu_ring_ib_on_emit_de(ring); if (resume) amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, sizeof(de_payload) >> 2); @@ -6858,6 +6935,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, + .patch_cntl = gfx_v9_0_ring_patch_cntl, + .patch_de = gfx_v9_0_ring_patch_de_meta, + .patch_ce = gfx_v9_0_ring_patch_ce_meta, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { From e61f67749b351c19455ce3085af2ae9af80023bc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 6 Jun 2023 11:14:04 -0400 Subject: [PATCH 724/934] drm/amdgpu: add missing radeon secondary PCI ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0x5b70 is a missing RV370 secondary id. Add it so we don't try and probe it with amdgpu. Cc: michel@daenzer.net Reviewed-by: Michel Dänzer Tested-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b1ca1ab6d6ad..393b6fb7a71d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1615,6 +1615,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = { 0x5874, 0x5940, 0x5941, + 0x5b70, 0x5b72, 0x5b73, 0x5b74, From 3eb1a3a04056ba3df3205e169b8acc9da0c65a94 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 7 Jun 2023 01:41:22 -0500 Subject: [PATCH 725/934] drm/amd: Make sure image is written to trigger VBIOS image update flow The VBIOS image update flow requires userspace to: 1) Write the image to `psp_vbflash` 2) Read `psp_vbflash` 3) Poll `psp_vbflash_status` to check for completion If userspace reads `psp_vbflash` before writing an image, it's possible that it causes problems that can put the dGPU into an invalid state. Explicitly check that an image has been written before letting a read succeed. Cc: stable@vger.kernel.org Fixes: 8424f2ccb3c0 ("drm/amdgpu/psp: Add vbflash sysfs interface support") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9d7e6e0e73ed..cf2ffe6fe4a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3548,6 +3548,9 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, void *fw_pri_cpu_addr; int ret; + if (adev->psp.vbflash_image_size == 0) + return -EINVAL; + dev_info(adev->dev, "VBIOS flash to PSP started"); ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size, From 7ab1a4913d0051cf5196ef7987b5fa42c25e13b6 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 7 Jun 2023 01:45:20 -0500 Subject: [PATCH 726/934] drm/amd: Tighten permissions on VBIOS flashing attributes Non-root users shouldn't be able to try to trigger a VBIOS flash or query the flashing status. This should be reserved for users with the appropriate permissions. Cc: stable@vger.kernel.org Fixes: 8424f2ccb3c0 ("drm/amdgpu/psp: Add vbflash sysfs interface support") Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index cf2ffe6fe4a2..a150b7a4b4aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3602,13 +3602,13 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev, } static const struct bin_attribute psp_vbflash_bin_attr = { - .attr = {.name = "psp_vbflash", .mode = 0664}, + .attr = {.name = "psp_vbflash", .mode = 0660}, .size = 0, .write = amdgpu_psp_vbflash_write, .read = amdgpu_psp_vbflash_read, }; -static DEVICE_ATTR(psp_vbflash_status, 0444, amdgpu_psp_vbflash_status, NULL); +static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); int amdgpu_psp_sysfs_init(struct amdgpu_device *adev) { From 7ca302d488f80cf4529620acc1c545f9022d8bb8 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 8 Jun 2023 22:07:11 +0800 Subject: [PATCH 727/934] drm/amd/pm: workaround for compute workload type on some skus On smu 13.0.0, the compute workload type cannot be set on all the skus due to some other problems. This workaround is to make sure compute workload type can also run on some specific skus. v2: keep the variable consistent Signed-off-by: Kenneth Feng Acked-by: Lijo Lazar Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 09405ef1e3c8..08577d1b84ec 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1696,10 +1696,39 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, } } - /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ - workload_type = smu_cmn_to_asic_specific_index(smu, + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE && + (((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xC8)) || + ((smu->adev->pdev->device == 0x744C) && (smu->adev->pdev->revision == 0xCC)))) { + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_COMPUTE_BIT, + (void *)(&activity_monitor_external), + false); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__); + return ret; + } + + ret = smu_cmn_update_table(smu, + SMU_TABLE_ACTIVITY_MONITOR_COEFF, + WORKLOAD_PPLIB_CUSTOM_BIT, + (void *)(&activity_monitor_external), + true); + if (ret) { + dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__); + return ret; + } + + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + PP_SMC_POWER_PROFILE_CUSTOM); + } else { + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_WORKLOAD, smu->power_profile_mode); + } + if (workload_type < 0) return -EINVAL; From 7ac9be96b0113a34c33110b32912642bdc8ff33d Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 9 Jun 2023 16:03:56 +0200 Subject: [PATCH 728/934] drm/radeon: Disable outputs when releasing fbdev client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disable the modesetting pipeline before release the radeon's fbdev client. Fixes the following error: [ 17.217408] WARNING: CPU: 5 PID: 1464 at drivers/gpu/drm/ttm/ttm_bo.c:326 ttm_bo_release+0x27e/0x2d0 [ttm] [ 17.217418] Modules linked in: edac_mce_amd radeon(+) drm_ttm_helper ttm video drm_suballoc_helper drm_display_helper kvm irqbypass drm_kms_helper syscopyarea crc32_pclmul sysfillrect sha512_ssse3 sysimgblt sha512_generic cfbfillrect cfbimgblt wmi_bmof aesni_intel cfbcopyarea crypto_simd cryptd k10temp acpi_cpufreq wmi dm_mod [ 17.217432] CPU: 5 PID: 1464 Comm: systemd-udevd Not tainted 6.4.0-rc4+ #1 [ 17.217436] Hardware name: Micro-Star International Co., Ltd. MS-7A38/B450M PRO-VDH MAX (MS-7A38), BIOS B.G0 07/26/2022 [ 17.217438] RIP: 0010:ttm_bo_release+0x27e/0x2d0 [ttm] [ 17.217444] Code: 48 89 43 38 48 89 43 40 48 8b 5c 24 30 48 8b b5 40 08 00 00 48 8b 6c 24 38 48 83 c4 58 e9 7a 49 f7 e0 48 89 ef e9 6c fe ff ff <0f> 0b 48 83 7b 20 00 0f 84 b7 fd ff ff 0f 0b 0f 1f 00 e9 ad fd ff [ 17.217448] RSP: 0018:ffffc9000095fbb0 EFLAGS: 00010202 [ 17.217451] RAX: 0000000000000001 RBX: ffff8881052c8de0 RCX: 0000000000000000 [ 17.217453] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8881052c8de0 [ 17.217455] RBP: ffff888104a66e00 R08: ffff8881052c8de0 R09: ffff888104a7cf08 [ 17.217457] R10: ffffc9000095fbe0 R11: ffffc9000095fbe8 R12: ffff8881052c8c78 [ 17.217458] R13: ffff8881052c8c78 R14: dead000000000100 R15: ffff88810528b108 [ 17.217460] FS: 00007f319fcbb8c0(0000) GS:ffff88881a540000(0000) knlGS:0000000000000000 [ 17.217463] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 17.217464] CR2: 000055dc8b0224a0 CR3: 000000010373d000 CR4: 0000000000750ee0 [ 17.217466] PKRU: 55555554 [ 17.217468] Call Trace: [ 17.217470] [ 17.217472] ? __warn+0x97/0x160 [ 17.217476] ? ttm_bo_release+0x27e/0x2d0 [ttm] [ 17.217481] ? report_bug+0x1ec/0x200 [ 17.217487] ? handle_bug+0x3c/0x70 [ 17.217490] ? exc_invalid_op+0x1f/0x90 [ 17.217493] ? preempt_count_sub+0xb5/0x100 [ 17.217496] ? asm_exc_invalid_op+0x16/0x20 [ 17.217500] ? ttm_bo_release+0x27e/0x2d0 [ttm] [ 17.217505] ? ttm_resource_move_to_lru_tail+0x1ab/0x1d0 [ttm] [ 17.217511] radeon_bo_unref+0x1a/0x30 [radeon] [ 17.217547] radeon_gem_object_free+0x20/0x30 [radeon] [ 17.217579] radeon_fbdev_fb_destroy+0x57/0x90 [radeon] [ 17.217616] unregister_framebuffer+0x72/0x110 [ 17.217620] drm_client_dev_unregister+0x6d/0xe0 [ 17.217623] drm_dev_unregister+0x2e/0x90 [ 17.217626] drm_put_dev+0x26/0x90 [ 17.217628] pci_device_remove+0x44/0xc0 [ 17.217631] really_probe+0x257/0x340 [ 17.217635] __driver_probe_device+0x73/0x120 [ 17.217638] driver_probe_device+0x2c/0xb0 [ 17.217641] __driver_attach+0xa0/0x150 [ 17.217643] ? __pfx___driver_attach+0x10/0x10 [ 17.217646] bus_for_each_dev+0x67/0xa0 [ 17.217649] bus_add_driver+0x10e/0x210 [ 17.217651] driver_register+0x5c/0x120 [ 17.217653] ? __pfx_radeon_module_init+0x10/0x10 [radeon] [ 17.217681] do_one_initcall+0x44/0x220 [ 17.217684] ? kmalloc_trace+0x37/0xc0 [ 17.217688] do_init_module+0x64/0x240 [ 17.217691] __do_sys_finit_module+0xb2/0x100 [ 17.217694] do_syscall_64+0x3b/0x90 [ 17.217697] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 17.217700] RIP: 0033:0x7f319feaa5a9 [ 17.217702] Code: 08 89 e8 5b 5d c3 66 2e 0f 1f 84 00 00 00 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 27 08 0d 00 f7 d8 64 89 01 48 [ 17.217706] RSP: 002b:00007ffc6bf3e7f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 17.217709] RAX: ffffffffffffffda RBX: 00005607204f3170 RCX: 00007f319feaa5a9 [ 17.217710] RDX: 0000000000000000 RSI: 00007f31a002eefd RDI: 0000000000000018 [ 17.217712] RBP: 00007f31a002eefd R08: 0000000000000000 R09: 00005607204f1860 [ 17.217714] R10: 0000000000000018 R11: 0000000000000246 R12: 0000000000020000 [ 17.217716] R13: 0000000000000000 R14: 0000560720522450 R15: 0000560720255899 [ 17.217718] [ 17.217719] ---[ end trace 0000000000000000 ]--- The buffer object backing the fbdev emulation got pinned twice: by the fb_probe helper radeon_fbdev_create_pinned_object() and the modesetting code when the framebuffer got displayed. It only got unpinned once by the fbdev helper radeon_fbdev_destroy_pinned_object(). Hence TTM's BO- release function complains about the pin counter. Forcing the outputs off also undoes the modesettings pin increment. Tested-by: Borislav Petkov (AMD) Reported-by: Borislav Petkov Closes: https://lore.kernel.org/dri-devel/20230603174814.GCZHt83pN+wNjf63sC@fat_crate.local/ Signed-off-by: Thomas Zimmermann Fixes: e317a69fe891 ("drm/radeon: Implement client-based fbdev emulation") Cc: Alex Deucher Cc: Thomas Zimmermann Cc: "Christian König" Cc: "Pan, Xinhui" Cc: amd-gfx@lists.freedesktop.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_fbdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index fe76e29910ef..8f6c3aef0962 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -307,6 +307,7 @@ static void radeon_fbdev_client_unregister(struct drm_client_dev *client) if (fb_helper->info) { vga_switcheroo_client_fb_set(rdev->pdev, NULL); + drm_helper_force_disable_all(dev); drm_fb_helper_unregister_info(fb_helper); } else { drm_client_release(&fb_helper->client); From 9db5ec1ceb5303398ec4f899d691073d531257c3 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Tue, 6 Jun 2023 17:18:52 -0400 Subject: [PATCH 729/934] drm/amdgpu: vcn_4_0 set instance 0 init sched score to 1 Only vcn0 can process AV1 codecx. In order to use both vcn0 and vcn1 in h264/265 transcode to AV1 cases, set vcn0 sched score to 1 at initialization time. Signed-off-by: Sonny Jiang Reviewed-by: Leo Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index e5fd1e00914d..da126ff8bcbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -129,7 +129,11 @@ static int vcn_v4_0_sw_init(void *handle) if (adev->vcn.harvest_config & (1 << i)) continue; - atomic_set(&adev->vcn.inst[i].sched_score, 0); + /* Init instance 0 sched_score to 1, so it's scheduled after other instances */ + if (i == 0) + atomic_set(&adev->vcn.inst[i].sched_score, 1); + else + atomic_set(&adev->vcn.inst[i].sched_score, 0); /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], From 34e5a54327dce5033582f3609eb54812a8c61b90 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 9 Jun 2023 06:18:41 -0700 Subject: [PATCH 730/934] Revert "drm/amdgpu: remove TOPDOWN flags when allocating VRAM in large bar system" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit c105518679b6e87232874ffc989ec403bee59664. This patch disables the TOPDOWN flag for APU and few dGPU cards which has the VRAM size equal to the BAR size. When we enable the TOPDOWN flag, we get the free blocks at the highest available memory region and we don't split the lower order blocks. This change is required to keep off the fragmentation related issues particularly in ASIC which has VRAM space <= 500MiB Hence, we are reverting this patch. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2270 Signed-off-by: Arunpravin Paneer Selvam Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 3b225be89cb7..a70103ac0026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -140,7 +140,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) places[c].lpfn = visible_pfn; - else if (adev->gmc.real_vram_size != adev->gmc.visible_vram_size) + else places[c].flags |= TTM_PL_FLAG_TOPDOWN; if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) From e749dd10e5f292061ad63d2b030194bf7d7d452c Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Thu, 25 May 2023 08:37:40 -0400 Subject: [PATCH 731/934] drm/amd/display: edp do not add non-edid timings [Why] most edp support only timings from edid. applying non-edid timings, especially those timings out of edp bandwidth, may damage edp. [How] do not add non-edid timings for edp. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Stylon Wang Signed-off-by: Hersen Wu Reviewed-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d5cec03eaa8d..2b8d42de1f73 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7196,7 +7196,13 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) drm_add_modes_noedid(connector, 1920, 1080); } else { amdgpu_dm_connector_ddc_get_modes(connector, edid); - amdgpu_dm_connector_add_common_modes(encoder, connector); + /* most eDP supports only timings from its edid, + * usually only detailed timings are available + * from eDP edid. timings which are not from edid + * may damage eDP + */ + if (connector->connector_type != DRM_MODE_CONNECTOR_eDP) + amdgpu_dm_connector_add_common_modes(encoder, connector); amdgpu_dm_connector_add_freesync_modes(connector, edid); } amdgpu_dm_fbc_init(connector); From ea2062dd1f0384ae1b136d333ee4ced15bedae38 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Mon, 29 May 2023 18:00:09 +0800 Subject: [PATCH 732/934] drm/amd/display: fix the system hang while disable PSR [Why] When the PSR enabled. If you try to adjust the timing parameters, it may cause system hang. Because the timing mismatch with the DMCUB settings. [How] Disable the PSR before adjusting timing parameters. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Stylon Wang Signed-off-by: Tom Chung Reviewed-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2b8d42de1f73..7acd73e5004f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8204,6 +8204,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, if (acrtc_state->abm_level != dm_old_crtc_state->abm_level) bundle->stream_update.abm_level = &acrtc_state->abm_level; + mutex_lock(&dm->dc_lock); + if ((acrtc_state->update_type > UPDATE_TYPE_FAST) && + acrtc_state->stream->link->psr_settings.psr_allow_active) + amdgpu_dm_psr_disable(acrtc_state->stream); + mutex_unlock(&dm->dc_lock); + /* * If FreeSync state on the stream has changed then we need to * re-adjust the min/max bounds now that DC doesn't handle this @@ -8217,10 +8223,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } mutex_lock(&dm->dc_lock); - if ((acrtc_state->update_type > UPDATE_TYPE_FAST) && - acrtc_state->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(acrtc_state->stream); - update_planes_and_stream_adapter(dm->dc, acrtc_state->update_type, planes_count, From 7c5835bcb9176df94683396f1c0e5df6bf5094b3 Mon Sep 17 00:00:00 2001 From: Peichen Huang Date: Wed, 31 May 2023 13:36:14 +0800 Subject: [PATCH 733/934] drm/amd/display: limit DPIA link rate to HBR3 [Why] DPIA doesn't support UHBR, driver should not enable UHBR for dp tunneling [How] limit DPIA link rate to HBR3 Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Stylon Wang Signed-off-by: Peichen Huang Reviewed-by: Mustapha Ghaddar Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/link/link_detection.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index a131e30fd7d6..d471d58aba92 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -980,6 +980,11 @@ static bool detect_link_and_local_sink(struct dc_link *link, (link->dpcd_caps.dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER)) converter_disable_audio = true; + + /* limited link rate to HBR3 for DPIA until we implement USB4 V2 */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + link->reported_link_cap.link_rate > LINK_RATE_HIGH3) + link->reported_link_cap.link_rate = LINK_RATE_HIGH3; break; } From 85041e12418fd0c08ff972b7729f7971afb361f8 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Sun, 12 Feb 2023 13:16:32 +0100 Subject: [PATCH 734/934] nios2: dts: Fix tse_mac "max-frame-size" property The given value of 1518 seems to refer to the layer 2 ethernet frame size without 802.1Q tag. Actual use of the "max-frame-size" including in the consumer of the "altr,tse-1.0" compatible is the MTU. Fixes: 95acd4c7b69c ("nios2: Device tree support") Fixes: 61c610ec61bb ("nios2: Add Max10 device tree") Cc: Signed-off-by: Janne Grunau Signed-off-by: Dinh Nguyen --- arch/nios2/boot/dts/10m50_devboard.dts | 2 +- arch/nios2/boot/dts/3c120_devboard.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/nios2/boot/dts/10m50_devboard.dts b/arch/nios2/boot/dts/10m50_devboard.dts index 56339bef3247..0e7e5b0dd685 100644 --- a/arch/nios2/boot/dts/10m50_devboard.dts +++ b/arch/nios2/boot/dts/10m50_devboard.dts @@ -97,7 +97,7 @@ rx-fifo-depth = <8192>; tx-fifo-depth = <8192>; address-bits = <48>; - max-frame-size = <1518>; + max-frame-size = <1500>; local-mac-address = [00 00 00 00 00 00]; altr,has-supplementary-unicast; altr,enable-sup-addr = <1>; diff --git a/arch/nios2/boot/dts/3c120_devboard.dts b/arch/nios2/boot/dts/3c120_devboard.dts index d10fb81686c7..3ee316906379 100644 --- a/arch/nios2/boot/dts/3c120_devboard.dts +++ b/arch/nios2/boot/dts/3c120_devboard.dts @@ -106,7 +106,7 @@ interrupt-names = "rx_irq", "tx_irq"; rx-fifo-depth = <8192>; tx-fifo-depth = <8192>; - max-frame-size = <1518>; + max-frame-size = <1500>; local-mac-address = [ 00 00 00 00 00 00 ]; phy-mode = "rgmii-id"; phy-handle = <&phy0>; From 0108a4e9f3584a7a2c026d1601b0682ff7335d95 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Mon, 12 Jun 2023 17:44:40 -0700 Subject: [PATCH 735/934] bpf: ensure main program has an extable When subprograms are in use, the main program is not jit'd after the subprograms because jit_subprogs sets a value for prog->bpf_func upon success. Subsequent calls to the JIT are bypassed when this value is non-NULL. This leads to a situation where the main program and its func[0] counterpart are both in the bpf kallsyms tree, but only func[0] has an extable. Extables are only created during JIT. Now there are two nearly identical program ksym entries in the tree, but only one has an extable. Depending upon how the entries are placed, there's a chance that a fault will call search_extable on the aux with the NULL entry. Since jit_subprogs already copies state from func[0] to the main program, include the extable pointer in this state duplication. Additionally, ensure that the copy of the main program in func[0] is not added to the bpf_prog_kallsyms table. Instead, let the main program get added later in bpf_prog_load(). This ensures there is only a single copy of the main program in the kallsyms table, and that its tag matches the tag observed by tooling like bpftool. Cc: stable@vger.kernel.org Fixes: 1c2a088a6626 ("bpf: x64: add JIT support for multi-function programs") Signed-off-by: Krister Johansen Acked-by: Yonghong Song Acked-by: Ilya Leoshkevich Tested-by: Ilya Leoshkevich Link: https://lore.kernel.org/r/6de9b2f4b4724ef56efbb0339daaa66c8b68b1e7.1686616663.git.kjlx@templeofstupid.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0dd8adc7a159..cf5f230360f5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17217,9 +17217,10 @@ static int jit_subprogs(struct bpf_verifier_env *env) } /* finally lock prog and jit images for all functions and - * populate kallsysm + * populate kallsysm. Begin at the first subprogram, since + * bpf_prog_load will add the kallsyms for the main program. */ - for (i = 0; i < env->subprog_cnt; i++) { + for (i = 1; i < env->subprog_cnt; i++) { bpf_prog_lock_ro(func[i]); bpf_prog_kallsyms_add(func[i]); } @@ -17245,6 +17246,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) prog->jited = 1; prog->bpf_func = func[0]->bpf_func; prog->jited_len = func[0]->jited_len; + prog->aux->extable = func[0]->aux->extable; + prog->aux->num_exentries = func[0]->aux->num_exentries; prog->aux->func = func; prog->aux->func_cnt = env->subprog_cnt; bpf_prog_jit_attempt_done(prog); From 84a62b445c86d0f2c9831290ffecee7269f18254 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Mon, 12 Jun 2023 17:44:54 -0700 Subject: [PATCH 736/934] selftests/bpf: add a test for subprogram extables In certain situations a program with subprograms may have a NULL extable entry. This should not happen, and when it does, it turns a single trap into multiple. Add a test case for further debugging and to prevent regressions. The test-case contains three essentially identical versions of the same test because just one program may not be sufficient to trigger the oops. This is due to the fact that the items are stored in a binary tree and have identical values so it's possible to sometimes find the ksym with the extable. With 3 copies, this has been reliable on this author's test systems. When triggered out of this test case, the oops looks like this: BUG: kernel NULL pointer dereference, address: 000000000000000c #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 1132 Comm: test_progs Tainted: G OE 6.4.0-rc3+ #2 RIP: 0010:cmp_ex_search+0xb/0x30 Code: cc cc cc cc e8 36 cb 03 00 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 55 48 89 e5 48 8b 07 <48> 63 0e 48 01 f1 31 d2 48 39 c8 19 d2 48 39 c8 b8 01 00 00 00 0f RSP: 0018:ffffb30c4291f998 EFLAGS: 00010006 RAX: ffffffffc00b49da RBX: 0000000000000002 RCX: 000000000000000c RDX: 0000000000000002 RSI: 000000000000000c RDI: ffffb30c4291f9e8 RBP: ffffb30c4291f998 R08: ffffffffab1a42d0 R09: 0000000000000001 R10: 0000000000000000 R11: ffffffffab1a42d0 R12: ffffb30c4291f9e8 R13: 000000000000000c R14: 000000000000000c R15: 0000000000000000 FS: 00007fb5d9e044c0(0000) GS:ffff92e95ee00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000000c CR3: 000000010c3a2005 CR4: 00000000007706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: bsearch+0x41/0x90 ? __pfx_cmp_ex_search+0x10/0x10 ? bpf_prog_45a7907e7114d0ff_handle_fexit_ret_subprogs3+0x2a/0x6c search_extable+0x3b/0x60 ? bpf_prog_45a7907e7114d0ff_handle_fexit_ret_subprogs3+0x2a/0x6c search_bpf_extables+0x10d/0x190 ? bpf_prog_45a7907e7114d0ff_handle_fexit_ret_subprogs3+0x2a/0x6c search_exception_tables+0x5d/0x70 fixup_exception+0x3f/0x5b0 ? look_up_lock_class+0x61/0x110 ? __lock_acquire+0x6b8/0x3560 ? __lock_acquire+0x6b8/0x3560 ? __lock_acquire+0x6b8/0x3560 kernelmode_fixup_or_oops+0x46/0x110 __bad_area_nosemaphore+0x68/0x2b0 ? __lock_acquire+0x6b8/0x3560 bad_area_nosemaphore+0x16/0x20 do_kern_addr_fault+0x81/0xa0 exc_page_fault+0xd6/0x210 asm_exc_page_fault+0x2b/0x30 RIP: 0010:bpf_prog_45a7907e7114d0ff_handle_fexit_ret_subprogs3+0x2a/0x6c Code: f3 0f 1e fa 0f 1f 44 00 00 66 90 55 48 89 e5 f3 0f 1e fa 48 8b 7f 08 49 bb 00 00 00 00 00 80 00 00 4c 39 df 73 04 31 f6 eb 04 <48> 8b 77 00 49 bb 00 00 00 00 00 80 00 00 48 81 c7 7c 00 00 00 4c RSP: 0018:ffffb30c4291fcb8 EFLAGS: 00010282 RAX: 0000000000000001 RBX: 0000000000000001 RCX: 0000000000000000 RDX: 00000000cddf1af1 RSI: 000000005315a00d RDI: ffffffffffffffea RBP: ffffb30c4291fcb8 R08: ffff92e644bf38a8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000800000000000 R12: ffff92e663652690 R13: 00000000000001c8 R14: 00000000000001c8 R15: 0000000000000003 bpf_trampoline_251255721842_2+0x63/0x1000 bpf_testmod_return_ptr+0x9/0xb0 [bpf_testmod] ? bpf_testmod_test_read+0x43/0x2d0 [bpf_testmod] sysfs_kf_bin_read+0x60/0x90 kernfs_fop_read_iter+0x143/0x250 vfs_read+0x240/0x2a0 ksys_read+0x70/0xe0 __x64_sys_read+0x1f/0x30 do_syscall_64+0x68/0xa0 ? syscall_exit_to_user_mode+0x77/0x1f0 ? do_syscall_64+0x77/0xa0 ? irqentry_exit+0x35/0xa0 ? sysvec_apic_timer_interrupt+0x4d/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fb5da00a392 Code: ac 00 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb be 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 RSP: 002b:00007ffc5b3cab68 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 000055bee7b8b100 RCX: 00007fb5da00a392 RDX: 00000000000001c8 RSI: 0000000000000000 RDI: 0000000000000009 RBP: 00007ffc5b3caba0 R08: 0000000000000000 R09: 0000000000000037 R10: 000055bee7b8c2a7 R11: 0000000000000246 R12: 000055bee78f1f60 R13: 00007ffc5b3cae90 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: bpf_testmod(OE) nls_iso8859_1 dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua intel_rapl_msr intel_rapl_common intel_uncore_frequency_common ppdev nfit crct10dif_pclmul crc32_pclmul psmouse ghash_clmulni_intel sha512_ssse3 aesni_intel parport_pc crypto_simd cryptd input_leds parport rapl ena i2c_piix4 mac_hid serio_raw ramoops reed_solomon pstore_blk drm pstore_zone efi_pstore autofs4 [last unloaded: bpf_testmod(OE)] CR2: 000000000000000c Though there may be some variation, depending on which suprogram triggers the bug. Signed-off-by: Krister Johansen Acked-by: Yonghong Song Link: https://lore.kernel.org/r/4ebf95ec857cd785b81db69f3e408c039ad8408b.1686616663.git.kjlx@templeofstupid.com Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/subprogs_extable.c | 29 +++++++++++ .../bpf/progs/test_subprogs_extable.c | 51 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/subprogs_extable.c create mode 100644 tools/testing/selftests/bpf/progs/test_subprogs_extable.c diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c b/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c new file mode 100644 index 000000000000..3afd9f775f68 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "test_subprogs_extable.skel.h" + +void test_subprogs_extable(void) +{ + const int read_sz = 456; + struct test_subprogs_extable *skel; + int err; + + skel = test_subprogs_extable__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = test_subprogs_extable__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger tracepoint */ + ASSERT_OK(trigger_module_test_read(read_sz), "trigger_read"); + + ASSERT_NEQ(skel->bss->triggered, 0, "verify at least one program ran"); + + test_subprogs_extable__detach(skel); + +cleanup: + test_subprogs_extable__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c new file mode 100644 index 000000000000..e2a21fbd4e44 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 8); + __type(key, __u32); + __type(value, __u64); +} test_array SEC(".maps"); + +unsigned int triggered; + +static __u64 test_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data) +{ + return 1; +} + +SEC("fexit/bpf_testmod_return_ptr") +int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret) +{ + *(volatile long *)ret; + *(volatile int *)&ret->f_mode; + bpf_for_each_map_elem(&test_array, test_cb, NULL, 0); + triggered++; + return 0; +} + +SEC("fexit/bpf_testmod_return_ptr") +int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret) +{ + *(volatile long *)ret; + *(volatile int *)&ret->f_mode; + bpf_for_each_map_elem(&test_array, test_cb, NULL, 0); + triggered++; + return 0; +} + +SEC("fexit/bpf_testmod_return_ptr") +int BPF_PROG(handle_fexit_ret_subprogs3, int arg, struct file *ret) +{ + *(volatile long *)ret; + *(volatile int *)&ret->f_mode; + bpf_for_each_map_elem(&test_array, test_cb, NULL, 0); + triggered++; + return 0; +} + +char _license[] SEC("license") = "GPL"; From cac9e4418f4cbd548ccb065b3adcafe073f7f7d2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 12 Jun 2023 13:51:36 -0600 Subject: [PATCH 737/934] io_uring/net: save msghdr->msg_control for retries If the application sets ->msg_control and we have to later retry this command, or if it got queued with IOSQE_ASYNC to begin with, then we need to retain the original msg_control value. This is due to the net stack overwriting this field with an in-kernel pointer, to copy it in. Hitting that path for the second time will now fail the copy from user, as it's attempting to copy from a non-user address. Cc: stable@vger.kernel.org # 5.10+ Link: https://github.com/axboe/liburing/issues/880 Reported-and-tested-by: Marek Majkowski Signed-off-by: Jens Axboe --- io_uring/net.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index 89e839013837..51b0f7fbb4f5 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -65,6 +65,7 @@ struct io_sr_msg { u16 addr_len; u16 buf_group; void __user *addr; + void __user *msg_control; /* used only for send zerocopy */ struct io_kiocb *notif; }; @@ -195,11 +196,15 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + int ret; iomsg->msg.msg_name = &iomsg->addr; iomsg->free_iov = iomsg->fast_iov; - return sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, + ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, &iomsg->free_iov); + /* save msg_control as sys_sendmsg() overwrites it */ + sr->msg_control = iomsg->msg.msg_control; + return ret; } int io_send_prep_async(struct io_kiocb *req) @@ -297,6 +302,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (req_has_async_data(req)) { kmsg = req->async_data; + kmsg->msg.msg_control = sr->msg_control; } else { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) From 374283a1001277e4d07491387aac1fad5aa08d43 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 12 Jun 2023 10:18:50 +0300 Subject: [PATCH 738/934] net: ethernet: ti: am65-cpsw: Call of_node_put() on error path This code returns directly but it should instead call of_node_put() to drop some reference counts. Fixes: dab2b265dd23 ("net: ethernet: ti: am65-cpsw: Add support for SERDES configuration") Signed-off-by: Dan Carpenter Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/e3012f0c-1621-40e6-bf7d-03c276f6e07f@kili.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 11cbcd9e2c72..bebcfd5e6b57 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2068,7 +2068,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) /* Initialize the Serdes PHY for the port */ ret = am65_cpsw_init_serdes_phy(dev, port_np, port); if (ret) - return ret; + goto of_node_put; port->slave.mac_only = of_property_read_bool(port_np, "ti,mac-only"); From aef6e908b54200d04f2d77dab31509fcff2e60ae Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 12 Jun 2023 09:57:09 +0200 Subject: [PATCH 739/934] selftests/tc-testing: Fix Error: Specified qdisc kind is unknown. All TEQL tests assume that sch_teql module is loaded. Load module in tdc.sh before running qdisc tests. Fixes following example error when running tests via tdc.sh for all TEQL tests: # $ sudo ./tdc.py -d eth2 -e 84a0 # -- ns/SubPlugin.__init__ # Test 84a0: Create TEQL with default setting # exit: 2 # exit: 0 # Error: Specified qdisc kind is unknown. # # -----> teardown stage *** Could not execute: "$TC qdisc del dev $DUMMY handle 1: root" # # -----> teardown stage *** Error message: "Error: Invalid handle. # " # returncode 2; expected [0] # # -----> teardown stage *** Aborting test run. # # <_io.BufferedReader name=3> *** stdout *** # # <_io.BufferedReader name=5> *** stderr *** # "-----> teardown stage" did not complete successfully # Exception ('teardown', 'Error: Specified qdisc kind is unknown.\n', '"-----> teardown stage" did not complete successfully') (caught in test_runner, running test 2 84a0 Create TEQL with default setting stage teardown) # --------------- # traceback # File "/images/src/linux/tools/testing/selftests/tc-testing/./tdc.py", line 495, in test_runner # res = run_one_test(pm, args, index, tidx) # File "/images/src/linux/tools/testing/selftests/tc-testing/./tdc.py", line 434, in run_one_test # prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout) # File "/images/src/linux/tools/testing/selftests/tc-testing/./tdc.py", line 245, in prepare_env # raise PluginMgrTestFail( # --------------- # accumulated output for this test: # Error: Specified qdisc kind is unknown. # # --------------- # # All test results: # # 1..1 # ok 1 84a0 - Create TEQL with default setting # skipped - "-----> teardown stage" did not complete successfully Fixes: cc62fbe114c9 ("selftests/tc-testing: add selftests for teql qdisc") Signed-off-by: Vlad Buslov Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tdc.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index afb0cd86fa3d..eb357bd7923c 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -2,5 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 modprobe netdevsim +modprobe sch_teql ./tdc.py -c actions --nobuildebpf ./tdc.py -c qdisc From b849c566ee9c6ed78288a522278dcaf419f8e239 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 12 Jun 2023 09:57:10 +0200 Subject: [PATCH 740/934] selftests/tc-testing: Fix Error: failed to find target LOG Add missing netfilter config dependency. Fixes following example error when running tests via tdc.sh for all XT tests: # $ sudo ./tdc.py -d eth2 -e 2029 # Test 2029: Add xt action with log-prefix # exit: 255 # exit: 0 # failed to find target LOG # # bad action parsing # parse_action: bad value (7:xt)! # Illegal "action" # # -----> teardown stage *** Could not execute: "$TC actions flush action xt" # # -----> teardown stage *** Error message: "Error: Cannot flush unknown TC action. # We have an error flushing # " # returncode 1; expected [0] # # -----> teardown stage *** Aborting test run. # # <_io.BufferedReader name=3> *** stdout *** # # <_io.BufferedReader name=5> *** stderr *** # "-----> teardown stage" did not complete successfully # Exception ('teardown', ' failed to find target LOG\n\nbad action parsing\nparse_action: bad value (7:xt)!\nIllegal "action"\n', '"-----> teardown stage" did not complete successfully') (caught in test_runner, running test 2 2029 Add xt action with log-prefix stage teardown) # --------------- # traceback # File "/images/src/linux/tools/testing/selftests/tc-testing/./tdc.py", line 495, in test_runner # res = run_one_test(pm, args, index, tidx) # File "/images/src/linux/tools/testing/selftests/tc-testing/./tdc.py", line 434, in run_one_test # prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout) # File "/images/src/linux/tools/testing/selftests/tc-testing/./tdc.py", line 245, in prepare_env # raise PluginMgrTestFail( # --------------- # accumulated output for this test: # failed to find target LOG # # bad action parsing # parse_action: bad value (7:xt)! # Illegal "action" # # --------------- # # All test results: # # 1..1 # ok 1 2029 - Add xt action with log-prefix # skipped - "-----> teardown stage" did not complete successfully Fixes: 910d504bc187 ("selftests/tc-testings: add selftests for xt action") Signed-off-by: Vlad Buslov Reviewed-by: Pedro Tammela Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index 4638c63a339f..aec4de8bea78 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -6,6 +6,7 @@ CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_LABELS=y CONFIG_NF_NAT=m +CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NET_SCHED=y From b39d8c41c7a8336ce85c376b5d4906089524a0ae Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 12 Jun 2023 09:57:11 +0200 Subject: [PATCH 741/934] selftests/tc-testing: Fix SFB db test Setting very small value of db like 10ms introduces rounding errors when converting to/from jiffies on some kernel configs. For example, on 250hz the actual value will be set to 12ms which causes the test to fail: # $ sudo ./tdc.py -d eth2 -e 3410 # -- ns/SubPlugin.__init__ # Test 3410: Create SFB with db setting # # All test results: # # 1..1 # not ok 1 3410 - Create SFB with db setting # Could not match regex pattern. Verify command output: # qdisc sfb 1: root refcnt 2 rehash 600s db 12ms limit 1000p max 25p target 20p increment 0.000503548 decrement 4.57771e-05 penalty_rate 10pps penalty_burst 20p Set the value to 100ms instead which currently seem to work on 100hz, 250hz, 300hz and 1000hz kernel configs. Fixes: 6ad92dc56fca ("selftests/tc-testing: add selftests for sfb qdisc") Signed-off-by: Vlad Buslov Reviewed-by: Pedro Tammela Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json index ba2f5e79cdbf..e21c7f22c6d4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json @@ -58,10 +58,10 @@ "setup": [ "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb db 10", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb db 100", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 10ms", + "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 100ms", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root", From 11b8b2e70a9b4a1b60eefc0cd79cd2d3c08545f1 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 12 Jun 2023 09:57:12 +0200 Subject: [PATCH 742/934] selftests/tc-testing: Remove configs that no longer exist Some qdiscs and classifiers have recently been retired from kernel. However, tc-testing config is still cluttered with them which causes noise when using merge_config.sh script to update existing config for tc-testing compatibility. Remove the config settings for affected qdiscs and classifiers. Fixes: fb38306ceb9e ("net/sched: Retire ATM qdisc") Fixes: 051d44209842 ("net/sched: Retire CBQ qdisc") Fixes: bbe77c14ee61 ("net/sched: Retire dsmark qdisc") Fixes: 265b4da82dbf ("net/sched: Retire rsvp classifier") Fixes: 8c710f75256b ("net/sched: Retire tcindex classifier") Signed-off-by: Vlad Buslov Reviewed-by: Pedro Tammela Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/config | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index aec4de8bea78..6e73b09c20c8 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -13,14 +13,11 @@ CONFIG_NET_SCHED=y # # Queueing/Scheduling # -CONFIG_NET_SCH_ATM=m CONFIG_NET_SCH_CAKE=m -CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_DSMARK=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_FQ_CODEL=m @@ -58,8 +55,6 @@ CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 CONFIG_NET_EMATCH_CMP=m From f1a0898b5d6a77d332d036da03bad6fa9770de5b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 9 Jun 2023 14:29:39 -0700 Subject: [PATCH 743/934] wifi: iwlwifi: mvm: spin_lock_bh() to fix lockdep regression Lockdep on 6.4-rc on ThinkPad X1 Carbon 5th says ===================================================== WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected 6.4.0-rc5 #1 Not tainted ----------------------------------------------------- kworker/3:1/49 [HC0[0]:SC0[4]:HE1:SE0] is trying to acquire: ffff8881066fa368 (&mvm_sta->deflink.lq_sta.rs_drv.pers.lock){+.+.}-{2:2}, at: rs_drv_get_rate+0x46/0xe7 and this task is already holding: ffff8881066f80a8 (&sta->rate_ctrl_lock){+.-.}-{2:2}, at: rate_control_get_rate+0xbd/0x126 which would create a new lock dependency: (&sta->rate_ctrl_lock){+.-.}-{2:2} -> (&mvm_sta->deflink.lq_sta.rs_drv.pers.lock){+.+.}-{2:2} but this new dependency connects a SOFTIRQ-irq-safe lock: (&sta->rate_ctrl_lock){+.-.}-{2:2} etc. etc. etc. Changing the spin_lock() in rs_drv_get_rate() to spin_lock_bh() was not enough to pacify lockdep, but changing them all on pers.lock has worked. Fixes: a8938bc881d2 ("wifi: iwlwifi: mvm: Add locking to the rate read flow") Signed-off-by: Hugh Dickins Link: https://lore.kernel.org/r/79ffcc22-9775-cb6d-3ffd-1a517c40beef@google.com Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 23266d0c9ce4..9a20468345e4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -2692,7 +2692,7 @@ static void rs_drv_get_rate(void *mvm_r, struct ieee80211_sta *sta, lq_sta = mvm_sta; - spin_lock(&lq_sta->pers.lock); + spin_lock_bh(&lq_sta->pers.lock); iwl_mvm_hwrate_to_tx_rate_v1(lq_sta->last_rate_n_flags, info->band, &info->control.rates[0]); info->control.rates[0].count = 1; @@ -2707,7 +2707,7 @@ static void rs_drv_get_rate(void *mvm_r, struct ieee80211_sta *sta, iwl_mvm_hwrate_to_tx_rate_v1(last_ucode_rate, info->band, &txrc->reported_rate); } - spin_unlock(&lq_sta->pers.lock); + spin_unlock_bh(&lq_sta->pers.lock); } static void *rs_drv_alloc_sta(void *mvm_rate, struct ieee80211_sta *sta, @@ -3264,11 +3264,11 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, /* If it's locked we are in middle of init flow * just wait for next tx status to update the lq_sta data */ - if (!spin_trylock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock)) + if (!spin_trylock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock)) return; __iwl_mvm_rs_tx_status(mvm, sta, tid, info, ndp); - spin_unlock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); + spin_unlock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); } #ifdef CONFIG_MAC80211_DEBUGFS @@ -4117,9 +4117,9 @@ void iwl_mvm_rs_rate_init(struct iwl_mvm *mvm, } else { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); - spin_lock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); + spin_lock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); rs_drv_rate_init(mvm, sta, band); - spin_unlock(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); + spin_unlock_bh(&mvmsta->deflink.lq_sta.rs_drv.pers.lock); } } From 41f2c7c342d3adb1c4dd5f2e3dd831adff16a669 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Fri, 9 Jun 2023 15:22:59 +0300 Subject: [PATCH 744/934] net/sched: act_ct: Fix promotion of offloaded unreplied tuple Currently UNREPLIED and UNASSURED connections are added to the nf flow table. This causes the following connection packets to be processed by the flow table which then skips conntrack_in(), and thus such the connections will remain UNREPLIED and UNASSURED even if reply traffic is then seen. Even still, the unoffloaded reply packets are the ones triggering hardware update from new to established state, and if there aren't any to triger an update and/or previous update was missed, hardware can get out of sync with sw and still mark packets as new. Fix the above by: 1) Not skipping conntrack_in() for UNASSURED packets, but still refresh for hardware, as before the cited patch. 2) Try and force a refresh by reply-direction packets that update the hardware rules from new to established state. 3) Remove any bidirectional flows that didn't failed to update in hardware for re-insertion as bidrectional once any new packet arrives. Fixes: 6a9bad0069cf ("net/sched: act_ct: offload UDP NEW connections") Co-developed-by: Vlad Buslov Signed-off-by: Vlad Buslov Signed-off-by: Paul Blakey Reviewed-by: Florian Westphal Link: https://lore.kernel.org/r/1686313379-117663-1-git-send-email-paulb@nvidia.com Signed-off-by: Paolo Abeni --- include/net/netfilter/nf_flow_table.h | 2 +- net/netfilter/nf_flow_table_core.c | 13 ++++++++++--- net/netfilter/nf_flow_table_ip.c | 4 ++-- net/sched/act_ct.c | 9 ++++++++- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index ebb28ec5b6fa..f37f9f34430c 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -268,7 +268,7 @@ int flow_offload_route_init(struct flow_offload *flow, int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow); void flow_offload_refresh(struct nf_flowtable *flow_table, - struct flow_offload *flow); + struct flow_offload *flow, bool force); struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 04bd0ed4d2ae..b0ef48b21dcb 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -317,12 +317,12 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) EXPORT_SYMBOL_GPL(flow_offload_add); void flow_offload_refresh(struct nf_flowtable *flow_table, - struct flow_offload *flow) + struct flow_offload *flow, bool force) { u32 timeout; timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); - if (timeout - READ_ONCE(flow->timeout) > HZ) + if (force || timeout - READ_ONCE(flow->timeout) > HZ) WRITE_ONCE(flow->timeout, timeout); else return; @@ -334,6 +334,12 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, } EXPORT_SYMBOL_GPL(flow_offload_refresh); +static bool nf_flow_is_outdated(const struct flow_offload *flow) +{ + return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) && + !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags); +} + static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return nf_flow_timeout_delta(flow->timeout) <= 0; @@ -423,7 +429,8 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { if (nf_flow_has_expired(flow) || - nf_ct_is_dying(flow->ct)) + nf_ct_is_dying(flow->ct) || + nf_flow_is_outdated(flow)) flow_offload_teardown(flow); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 19efba1e51ef..3bbaf9c7ea46 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -384,7 +384,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; - flow_offload_refresh(flow_table, flow); + flow_offload_refresh(flow_table, flow, false); nf_flow_encap_pop(skb, tuplehash); thoff -= offset; @@ -650,7 +650,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; - flow_offload_refresh(flow_table, flow); + flow_offload_refresh(flow_table, flow, false); nf_flow_encap_pop(skb, tuplehash); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9cc0bc7c71ed..abc71a06d634 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -610,6 +610,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct flow_offload_tuple tuple = {}; enum ip_conntrack_info ctinfo; struct tcphdr *tcph = NULL; + bool force_refresh = false; struct flow_offload *flow; struct nf_conn *ct; u8 dir; @@ -647,6 +648,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, * established state, then don't refresh. */ return false; + force_refresh = true; } if (tcph && (unlikely(tcph->fin || tcph->rst))) { @@ -660,7 +662,12 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, else ctinfo = IP_CT_ESTABLISHED_REPLY; - flow_offload_refresh(nf_ft, flow); + flow_offload_refresh(nf_ft, flow, force_refresh); + if (!test_bit(IPS_ASSURED_BIT, &ct->status)) { + /* Process this flow in SW to allow promoting to ASSURED */ + return false; + } + nf_conntrack_get(&ct->ct_general); nf_ct_set(skb, ct, ctinfo); if (nf_ft->flags & NF_FLOWTABLE_COUNTER) From 2d5f6a8d7aef7852a9ecc555f88c673a1c91754f Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Sat, 10 Jun 2023 20:30:15 -0700 Subject: [PATCH 745/934] net/sched: Refactor qdisc_graft() for ingress and clsact Qdiscs Grafting ingress and clsact Qdiscs does not need a for-loop in qdisc_graft(). Refactor it. No functional changes intended. Tested-by: Pedro Tammela Acked-by: Jamal Hadi Salim Reviewed-by: Jamal Hadi Salim Reviewed-by: Vlad Buslov Signed-off-by: Peilin Ye Signed-off-by: Paolo Abeni --- net/sched/sch_api.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e4b6452318c0..094ca3a5b633 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1079,12 +1079,12 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (parent == NULL) { unsigned int i, num_q, ingress; + struct netdev_queue *dev_queue; ingress = 0; num_q = dev->num_tx_queues; if ((q && q->flags & TCQ_F_INGRESS) || (new && new->flags & TCQ_F_INGRESS)) { - num_q = 1; ingress = 1; if (!dev_ingress_queue(dev)) { NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); @@ -1100,18 +1100,18 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (new && new->ops->attach && !ingress) goto skip; - for (i = 0; i < num_q; i++) { - struct netdev_queue *dev_queue = dev_ingress_queue(dev); - - if (!ingress) + if (!ingress) { + for (i = 0; i < num_q; i++) { dev_queue = netdev_get_tx_queue(dev, i); + old = dev_graft_qdisc(dev_queue, new); - old = dev_graft_qdisc(dev_queue, new); - if (new && i > 0) - qdisc_refcount_inc(new); - - if (!ingress) + if (new && i > 0) + qdisc_refcount_inc(new); qdisc_put(old); + } + } else { + dev_queue = dev_ingress_queue(dev); + old = dev_graft_qdisc(dev_queue, new); } skip: From 84ad0af0bccd3691cb951c2974c5cb2c10594d4a Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Sat, 10 Jun 2023 20:30:25 -0700 Subject: [PATCH 746/934] net/sched: qdisc_destroy() old ingress and clsact Qdiscs before grafting mini_Qdisc_pair::p_miniq is a double pointer to mini_Qdisc, initialized in ingress_init() to point to net_device::miniq_ingress. ingress Qdiscs access this per-net_device pointer in mini_qdisc_pair_swap(). Similar for clsact Qdiscs and miniq_egress. Unfortunately, after introducing RTNL-unlocked RTM_{NEW,DEL,GET}TFILTER requests (thanks Hillf Danton for the hint), when replacing ingress or clsact Qdiscs, for example, the old Qdisc ("@old") could access the same miniq_{in,e}gress pointer(s) concurrently with the new Qdisc ("@new"), causing race conditions [1] including a use-after-free bug in mini_qdisc_pair_swap() reported by syzbot: BUG: KASAN: slab-use-after-free in mini_qdisc_pair_swap+0x1c2/0x1f0 net/sched/sch_generic.c:1573 Write of size 8 at addr ffff888045b31308 by task syz-executor690/14901 ... Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106 print_address_description.constprop.0+0x2c/0x3c0 mm/kasan/report.c:319 print_report mm/kasan/report.c:430 [inline] kasan_report+0x11c/0x130 mm/kasan/report.c:536 mini_qdisc_pair_swap+0x1c2/0x1f0 net/sched/sch_generic.c:1573 tcf_chain_head_change_item net/sched/cls_api.c:495 [inline] tcf_chain0_head_change.isra.0+0xb9/0x120 net/sched/cls_api.c:509 tcf_chain_tp_insert net/sched/cls_api.c:1826 [inline] tcf_chain_tp_insert_unique net/sched/cls_api.c:1875 [inline] tc_new_tfilter+0x1de6/0x2290 net/sched/cls_api.c:2266 ... @old and @new should not affect each other. In other words, @old should never modify miniq_{in,e}gress after @new, and @new should not update @old's RCU state. Fixing without changing sch_api.c turned out to be difficult (please refer to Closes: for discussions). Instead, make sure @new's first call always happen after @old's last call (in {ingress,clsact}_destroy()) has finished: In qdisc_graft(), return -EBUSY if @old has any ongoing filter requests, and call qdisc_destroy() for @old before grafting @new. Introduce qdisc_refcount_dec_if_one() as the counterpart of qdisc_refcount_inc_nz() used for filter requests. Introduce a non-static version of qdisc_destroy() that does a TCQ_F_BUILTIN check, just like qdisc_put() etc. Depends on patch "net/sched: Refactor qdisc_graft() for ingress and clsact Qdiscs". [1] To illustrate, the syzkaller reproducer adds ingress Qdiscs under TC_H_ROOT (no longer possible after commit c7cfbd115001 ("net/sched: sch_ingress: Only create under TC_H_INGRESS")) on eth0 that has 8 transmission queues: Thread 1 creates ingress Qdisc A (containing mini Qdisc a1 and a2), then adds a flower filter X to A. Thread 2 creates another ingress Qdisc B (containing mini Qdisc b1 and b2) to replace A, then adds a flower filter Y to B. Thread 1 A's refcnt Thread 2 RTM_NEWQDISC (A, RTNL-locked) qdisc_create(A) 1 qdisc_graft(A) 9 RTM_NEWTFILTER (X, RTNL-unlocked) __tcf_qdisc_find(A) 10 tcf_chain0_head_change(A) mini_qdisc_pair_swap(A) (1st) | | RTM_NEWQDISC (B, RTNL-locked) RCU sync 2 qdisc_graft(B) | 1 notify_and_destroy(A) | tcf_block_release(A) 0 RTM_NEWTFILTER (Y, RTNL-unlocked) qdisc_destroy(A) tcf_chain0_head_change(B) tcf_chain0_head_change_cb_del(A) mini_qdisc_pair_swap(B) (2nd) mini_qdisc_pair_swap(A) (3rd) | ... ... Here, B calls mini_qdisc_pair_swap(), pointing eth0->miniq_ingress to its mini Qdisc, b1. Then, A calls mini_qdisc_pair_swap() again during ingress_destroy(), setting eth0->miniq_ingress to NULL, so ingress packets on eth0 will not find filter Y in sch_handle_ingress(). This is just one of the possible consequences of concurrently accessing miniq_{in,e}gress pointers. Fixes: 7a096d579e8e ("net: sched: ingress: set 'unlocked' flag for Qdisc ops") Fixes: 87f373921c4e ("net: sched: ingress: set 'unlocked' flag for clsact Qdisc ops") Reported-by: syzbot+b53a9c0d1ea4ad62da8b@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/0000000000006cf87705f79acf1a@google.com/ Cc: Hillf Danton Cc: Vlad Buslov Signed-off-by: Peilin Ye Acked-by: Jamal Hadi Salim Signed-off-by: Paolo Abeni --- include/net/sch_generic.h | 8 ++++++++ net/sched/sch_api.c | 28 +++++++++++++++++++++++----- net/sched/sch_generic.c | 14 +++++++++++--- 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 27271f2b37cb..12eadecf8cd0 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -137,6 +137,13 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc) refcount_inc(&qdisc->refcnt); } +static inline bool qdisc_refcount_dec_if_one(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return true; + return refcount_dec_if_one(&qdisc->refcnt); +} + /* Intended to be used by unlocked users, when concurrent qdisc release is * possible. */ @@ -652,6 +659,7 @@ void dev_deactivate_many(struct list_head *head); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); +void qdisc_destroy(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 094ca3a5b633..aa6b1fe65151 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1086,10 +1086,22 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if ((q && q->flags & TCQ_F_INGRESS) || (new && new->flags & TCQ_F_INGRESS)) { ingress = 1; - if (!dev_ingress_queue(dev)) { + dev_queue = dev_ingress_queue(dev); + if (!dev_queue) { NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); return -ENOENT; } + + q = rtnl_dereference(dev_queue->qdisc_sleeping); + + /* This is the counterpart of that qdisc_refcount_inc_nz() call in + * __tcf_qdisc_find() for filter requests. + */ + if (!qdisc_refcount_dec_if_one(q)) { + NL_SET_ERR_MSG(extack, + "Current ingress or clsact Qdisc has ongoing filter requests"); + return -EBUSY; + } } if (dev->flags & IFF_UP) @@ -1110,8 +1122,16 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, qdisc_put(old); } } else { - dev_queue = dev_ingress_queue(dev); - old = dev_graft_qdisc(dev_queue, new); + old = dev_graft_qdisc(dev_queue, NULL); + + /* {ingress,clsact}_destroy() @old before grafting @new to avoid + * unprotected concurrent accesses to net_device::miniq_{in,e}gress + * pointer(s) in mini_qdisc_pair_swap(). + */ + qdisc_notify(net, skb, n, classid, old, new, extack); + qdisc_destroy(old); + + dev_graft_qdisc(dev_queue, new); } skip: @@ -1125,8 +1145,6 @@ skip: if (new && new->ops->attach) new->ops->attach(new); - } else { - notify_and_destroy(net, skb, n, classid, old, new, extack); } if (dev->flags & IFF_UP) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 3248259eba32..5d7e23f4cc0e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1046,7 +1046,7 @@ static void qdisc_free_cb(struct rcu_head *head) qdisc_free(q); } -static void qdisc_destroy(struct Qdisc *qdisc) +static void __qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; @@ -1070,6 +1070,14 @@ static void qdisc_destroy(struct Qdisc *qdisc) call_rcu(&qdisc->rcu, qdisc_free_cb); } +void qdisc_destroy(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return; + + __qdisc_destroy(qdisc); +} + void qdisc_put(struct Qdisc *qdisc) { if (!qdisc) @@ -1079,7 +1087,7 @@ void qdisc_put(struct Qdisc *qdisc) !refcount_dec_and_test(&qdisc->refcnt)) return; - qdisc_destroy(qdisc); + __qdisc_destroy(qdisc); } EXPORT_SYMBOL(qdisc_put); @@ -1094,7 +1102,7 @@ void qdisc_put_unlocked(struct Qdisc *qdisc) !refcount_dec_and_rtnl_lock(&qdisc->refcnt)) return; - qdisc_destroy(qdisc); + __qdisc_destroy(qdisc); rtnl_unlock(); } EXPORT_SYMBOL(qdisc_put_unlocked); From 3c40eb8145325b0f5b93b8a169146078cb2c49d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Wed, 7 Jun 2023 10:27:12 +0200 Subject: [PATCH 747/934] mmc: meson-gx: remove redundant mmc_request_done() call from irq context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to mmc_request_done() can schedule, so it must not be called from irq context. Wake the irq thread if it needs to be called, and let its existing logic do its work. Fixes the following kernel bug, which appears when running an RT patched kernel on the AmLogic Meson AXG A113X SoC: [ 11.111407] BUG: scheduling while atomic: kworker/0:1H/75/0x00010001 [ 11.111438] Modules linked in: [ 11.111451] CPU: 0 PID: 75 Comm: kworker/0:1H Not tainted 6.4.0-rc3-rt2-rtx-00081-gfd07f41ed6b4-dirty #1 [ 11.111461] Hardware name: RTX AXG A113X Linux Platform Board (DT) [ 11.111469] Workqueue: kblockd blk_mq_run_work_fn [ 11.111492] Call trace: [ 11.111497] dump_backtrace+0xac/0xe8 [ 11.111510] show_stack+0x18/0x28 [ 11.111518] dump_stack_lvl+0x48/0x60 [ 11.111530] dump_stack+0x18/0x24 [ 11.111537] __schedule_bug+0x4c/0x68 [ 11.111548] __schedule+0x80/0x574 [ 11.111558] schedule_loop+0x2c/0x50 [ 11.111567] schedule_rtlock+0x14/0x20 [ 11.111576] rtlock_slowlock_locked+0x468/0x730 [ 11.111587] rt_spin_lock+0x40/0x64 [ 11.111596] __wake_up_common_lock+0x5c/0xc4 [ 11.111610] __wake_up+0x18/0x24 [ 11.111620] mmc_blk_mq_req_done+0x68/0x138 [ 11.111633] mmc_request_done+0x104/0x118 [ 11.111644] meson_mmc_request_done+0x38/0x48 [ 11.111654] meson_mmc_irq+0x128/0x1f0 [ 11.111663] __handle_irq_event_percpu+0x70/0x114 [ 11.111674] handle_irq_event_percpu+0x18/0x4c [ 11.111683] handle_irq_event+0x80/0xb8 [ 11.111691] handle_fasteoi_irq+0xa4/0x120 [ 11.111704] handle_irq_desc+0x20/0x38 [ 11.111712] generic_handle_domain_irq+0x1c/0x28 [ 11.111721] gic_handle_irq+0x8c/0xa8 [ 11.111735] call_on_irq_stack+0x24/0x4c [ 11.111746] do_interrupt_handler+0x88/0x94 [ 11.111757] el1_interrupt+0x34/0x64 [ 11.111769] el1h_64_irq_handler+0x18/0x24 [ 11.111779] el1h_64_irq+0x64/0x68 [ 11.111786] __add_wait_queue+0x0/0x4c [ 11.111795] mmc_blk_rw_wait+0x84/0x118 [ 11.111804] mmc_blk_mq_issue_rq+0x5c4/0x654 [ 11.111814] mmc_mq_queue_rq+0x194/0x214 [ 11.111822] blk_mq_dispatch_rq_list+0x3ac/0x528 [ 11.111834] __blk_mq_sched_dispatch_requests+0x340/0x4d0 [ 11.111847] blk_mq_sched_dispatch_requests+0x38/0x70 [ 11.111858] blk_mq_run_work_fn+0x3c/0x70 [ 11.111865] process_one_work+0x17c/0x1f0 [ 11.111876] worker_thread+0x1d4/0x26c [ 11.111885] kthread+0xe4/0xf4 [ 11.111894] ret_from_fork+0x10/0x20 Fixes: 51c5d8447bd7 ("MMC: meson: initial support for GX platforms") Cc: stable@vger.kernel.org Signed-off-by: Martin Hundebøll Link: https://lore.kernel.org/r/20230607082713.517157-1-martin@geanix.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index b8514d9d5e73..f90b0fd8d8b0 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -991,11 +991,8 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id) if (data && !cmd->error) data->bytes_xfered = data->blksz * data->blocks; - if (meson_mmc_bounce_buf_read(data) || - meson_mmc_get_next_command(cmd)) - ret = IRQ_WAKE_THREAD; - else - ret = IRQ_HANDLED; + + return IRQ_WAKE_THREAD; } out: @@ -1007,9 +1004,6 @@ out: writel(start, host->regs + SD_EMMC_START); } - if (ret == IRQ_HANDLED) - meson_mmc_request_done(host->mmc, cmd->mrq); - return ret; } From bef68e201e538eaa3a91f97aae8161eb2d0a8ed7 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Mon, 12 Jun 2023 16:34:58 +0200 Subject: [PATCH 748/934] selftests: forwarding: hw_stats_l3: Set addrgenmode in a separate step Setting the IPv6 address generation mode of a net device during its creation never worked, but after commit b0ad3c179059 ("rtnetlink: call validate_linkmsg in rtnl_create_link") it explicitly fails [1]. The failure is caused by the fact that validate_linkmsg() is called before the net device is registered, when it still does not have an 'inet6_dev'. Likewise, raising the net device before setting the address generation mode is meaningless, because by the time the mode is set, the address has already been generated. Therefore, fix the test to first create the net device, then set its IPv6 address generation mode and finally bring it up. [1] # ip link add name mydev addrgenmode eui64 type dummy RTNETLINK answers: Address family not supported by protocol Fixes: ba95e7930957 ("selftests: forwarding: hw_stats_l3: Add a new test") Signed-off-by: Danielle Ratson Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/f3b05d85b2bc0c3d6168fe8f7207c6c8365703db.1686580046.git.petrm@nvidia.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/forwarding/hw_stats_l3.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh index 432fe8469851..48584a51388f 100755 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh +++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh @@ -84,8 +84,9 @@ h2_destroy() router_rp1_200_create() { - ip link add name $rp1.200 up \ - link $rp1 addrgenmode eui64 type vlan id 200 + ip link add name $rp1.200 link $rp1 type vlan id 200 + ip link set dev $rp1.200 addrgenmode eui64 + ip link set dev $rp1.200 up ip address add dev $rp1.200 192.0.2.2/28 ip address add dev $rp1.200 2001:db8:1::2/64 ip stats set dev $rp1.200 l3_stats on @@ -256,9 +257,11 @@ reapply_config() router_rp1_200_destroy - ip link add name $rp1.200 link $rp1 addrgenmode none type vlan id 200 + ip link add name $rp1.200 link $rp1 type vlan id 200 + ip link set dev $rp1.200 addrgenmode none ip stats set dev $rp1.200 l3_stats on - ip link set dev $rp1.200 up addrgenmode eui64 + ip link set dev $rp1.200 addrgenmode eui64 + ip link set dev $rp1.200 up ip address add dev $rp1.200 192.0.2.2/28 ip address add dev $rp1.200 2001:db8:1::2/64 } From b104dbedbe61d89a933479f8effce6409037ef73 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 6 Jun 2023 07:59:19 +0100 Subject: [PATCH 749/934] Documentation: RISC-V: patch-acceptance: mention patchwork's role MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Palmer suggested at some point, not sure if it was in one of the weekly linux-riscv syncs, or a conversation at FOSDEM, that we should document the role of the automation running on our patchwork instance plays in patch acceptance. Add a short note to the patch-acceptance document to that end. Signed-off-by: Conor Dooley Reviewed-by: Björn Töpel Link: https://lore.kernel.org/r/20230606-rehab-monsoon-12c17bbe08e3@wendy Signed-off-by: Palmer Dabbelt --- Documentation/riscv/patch-acceptance.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst index 07d5a5623e2a..634aa222b410 100644 --- a/Documentation/riscv/patch-acceptance.rst +++ b/Documentation/riscv/patch-acceptance.rst @@ -16,6 +16,24 @@ tested code over experimental code. We wish to extend these same principles to the RISC-V-related code that will be accepted for inclusion in the kernel. +Patchwork +--------- + +RISC-V has a patchwork instance, where the status of patches can be checked: + + https://patchwork.kernel.org/project/linux-riscv/list/ + +If your patch does not appear in the default view, the RISC-V maintainers have +likely either requested changes, or expect it to be applied to another tree. + +Automation runs against this patchwork instance, building/testing patches as +they arrive. The automation applies patches against the current HEAD of the +RISC-V `for-next` and `fixes` branches, depending on whether the patch has been +detected as a fix. Failing those, it will use the RISC-V `master` branch. +The exact commit to which a series has been applied will be noted on patchwork. +Patches for which any of the checks fail are unlikely to be applied and in most +cases will need to be resubmitted. + Submit Checklist Addendum ------------------------- We'll only accept patches for new modules or extensions if the From c774e6779f38bf36f0cce65e30793704bab4b0d7 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 11 Jun 2023 11:23:32 -0500 Subject: [PATCH 750/934] cifs: fix lease break oops in xfstest generic/098 umount can race with lease break so need to check if tcon->ses->server is still valid to send the lease break response. Reviewed-by: Bharath SM Reviewed-by: Shyam Prasad N Fixes: 59a556aebc43 ("SMB3: drop reference to cfile before sending oplock break") Signed-off-by: Steve French --- fs/smb/client/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index df88b8c04d03..051283386e22 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -4942,9 +4942,13 @@ oplock_break_ack: * disconnected since oplock already released by the server */ if (!oplock_break_cancelled) { - rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid, + /* check for server null since can race with kill_sb calling tree disconnect */ + if (tcon->ses && tcon->ses->server) { + rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid, volatile_fid, net_fid, cinode); - cifs_dbg(FYI, "Oplock release rc = %d\n", rc); + cifs_dbg(FYI, "Oplock release rc = %d\n", rc); + } else + pr_warn_once("lease break not sent for unmounted share\n"); } cifs_done_oplock_break(cinode); From e4645cc2f1e2d6f268bb8dcfac40997c52432aed Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Fri, 9 Jun 2023 17:46:56 +0000 Subject: [PATCH 751/934] cifs: add a warning when the in-flight count goes negative We've seen the in-flight count go into negative with some internal stress testing in Microsoft. Adding a WARN when this happens, in hope of understanding why this happens when it happens. Signed-off-by: Shyam Prasad N Reviewed-by: Bharath SM Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index bbb19bbb14c9..a8bb9d00d33a 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -93,6 +93,7 @@ smb2_add_credits(struct TCP_Server_Info *server, server->conn_id, server->hostname, *val, add, server->in_flight); } + WARN_ON_ONCE(server->in_flight == 0); server->in_flight--; if (server->in_flight == 0 && ((optype & CIFS_OP_MASK) != CIFS_NEG_OP) && From b50f2d048ecf1512ff85128ea4153bceb0e60590 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 14 Jun 2023 14:49:35 +0800 Subject: [PATCH 752/934] btrfs: scrub: fix a return value overwrite in scrub_stripe() [RETURN VALUE OVERWRITE] Inside scrub_stripe(), we would submit all the remaining stripes after iterating all extents. But since flush_scrub_stripes() can return error, we need to avoid overwriting the existing @ret if there is any error. However the existing check is doing the wrong check: ret2 = flush_scrub_stripes(); if (!ret2) ret = ret2; This would overwrite the existing @ret to 0 as long as the final flush detects no critical errors. [FIX] We should check @ret other than @ret2 in that case. Fixes: 8eb3dd17eadd ("btrfs: dev-replace: error out if we have unrepaired metadata error during") Reviewed-by: Christoph Hellwig Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 50c241aba1a1..bceaa8c2007e 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2266,7 +2266,7 @@ next: } out: ret2 = flush_scrub_stripes(sctx); - if (!ret2) + if (!ret) ret = ret2; if (sctx->raid56_data_stripes) { for (int i = 0; i < nr_data_stripes(map); i++) From ba470eebc2f6c2f704872955a715b9555328e7d0 Mon Sep 17 00:00:00 2001 From: sunliming Date: Mon, 29 May 2023 11:21:00 +0800 Subject: [PATCH 753/934] tracing/user_events: Prevent same name but different args event User processes register name_args for events. If the same name but different args event are registered. The trace outputs of second event are printed as the first event. This is incorrect. Return EADDRINUSE back to the user process if the same name but different args event has being registered. Link: https://lore.kernel.org/linux-trace-kernel/20230529032100.286534-1-sunliming@kylinos.cn Signed-off-by: sunliming Reviewed-by: Masami Hiramatsu (Google) Acked-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 36 +++++++++++++++---- .../selftests/user_events/ftrace_test.c | 6 ++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index dbb14705d0d3..37a38496a6be 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1786,6 +1786,8 @@ static int user_event_parse(struct user_event_group *group, char *name, int ret; u32 key; struct user_event *user; + int argc = 0; + char **argv; /* Prevent dyn_event from racing */ mutex_lock(&event_mutex); @@ -1793,13 +1795,35 @@ static int user_event_parse(struct user_event_group *group, char *name, mutex_unlock(&event_mutex); if (user) { - *newuser = user; - /* - * Name is allocated by caller, free it since it already exists. - * Caller only worries about failure cases for freeing. - */ - kfree(name); + if (args) { + argv = argv_split(GFP_KERNEL, args, &argc); + if (!argv) { + ret = -ENOMEM; + goto error; + } + + ret = user_fields_match(user, argc, (const char **)argv); + argv_free(argv); + + } else + ret = list_empty(&user->fields); + + if (ret) { + *newuser = user; + /* + * Name is allocated by caller, free it since it already exists. + * Caller only worries about failure cases for freeing. + */ + kfree(name); + } else { + ret = -EADDRINUSE; + goto error; + } + return 0; +error: + refcount_dec(&user->refcnt); + return ret; } user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT); diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index 7c99cef94a65..6e8c4b47281c 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -228,6 +228,12 @@ TEST_F(user, register_events) { ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); + /* Multiple registers to same name but different args should fail */ + reg.enable_bit = 29; + reg.name_args = (__u64)"__test_event u32 field1;"; + ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSREG, ®)); + ASSERT_EQ(EADDRINUSE, errno); + /* Ensure disabled */ self->enable_fd = open(enable_file, O_RDWR); ASSERT_NE(-1, self->enable_fd); From cfac4ed7279d056df6167bd665e460787dc9e0c4 Mon Sep 17 00:00:00 2001 From: sunliming Date: Mon, 29 May 2023 14:51:10 +0800 Subject: [PATCH 754/934] tracing/user_events: Handle matching arguments that is null from dyn_events When A registering user event from dyn_events has no argments, it will pass the matching check, regardless of whether there is a user event with the same name and arguments. Add the matching check when the arguments of registering user event is null. Link: https://lore.kernel.org/linux-trace-kernel/20230529065110.303440-1-sunliming@kylinos.cn Signed-off-by: sunliming Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 37a38496a6be..afe61dc86543 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1745,6 +1745,8 @@ static bool user_event_match(const char *system, const char *event, if (match && argc > 0) match = user_fields_match(user, argc, argv); + else if (match && argc == 0) + match = list_empty(&user->fields); return match; } From e70bb54d7a51299e7feca9169c07d79f9a3fc01d Mon Sep 17 00:00:00 2001 From: sunliming Date: Thu, 25 May 2023 16:52:32 +0800 Subject: [PATCH 755/934] tracing: Modify print_fields() for fields output order Now the print_fields() print trace event fields in reverse order. Modify it to the positive sequence. Example outputs for a user event: test0 u32 count1; u32 count2 Output before: example-2547 [000] ..... 325.666387: test0: count2=0x2 (2) count1=0x1 (1) Output after: example-2742 [002] ..... 429.769370: test0: count1=0x1 (1) count2=0x2 (2) Link: https://lore.kernel.org/linux-trace-kernel/20230525085232.5096-1-sunliming@kylinos.cn Fixes: 80a76994b2d88 ("tracing: Add "fields" option to show raw trace event fields") Signed-off-by: sunliming Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 15f05faaae44..1e33f367783e 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -847,7 +847,7 @@ static void print_fields(struct trace_iterator *iter, struct trace_event_call *c int ret; void *pos; - list_for_each_entry(field, head, link) { + list_for_each_entry_reverse(field, head, link) { trace_seq_printf(&iter->seq, " %s=", field->name); if (field->offset + field->size > iter->ent_size) { trace_seq_puts(&iter->seq, ""); From 6f05dcabe5c241d066ec472cf38ac8b84f8c9c6f Mon Sep 17 00:00:00 2001 From: sunliming Date: Tue, 6 Jun 2023 14:20:24 +0800 Subject: [PATCH 756/934] tracing/user_events: Fix the incorrect trace record for empty arguments events The user_events support events that has empty arguments. But the trace event is discarded and not really committed when the arguments is empty. Fix this by not attempting to copy in zero-length data. Link: https://lkml.kernel.org/r/20230606062027.1008398-2-sunliming@kylinos.cn Acked-by: Beau Belgrave Acked-by: Masami Hiramatsu (Google) Signed-off-by: sunliming Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index afe61dc86543..49914b6cb651 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1432,7 +1432,7 @@ static void user_event_ftrace(struct user_event *user, struct iov_iter *i, if (unlikely(!entry)) return; - if (unlikely(!copy_nofault(entry + 1, i->count, i))) + if (unlikely(i->count != 0 && !copy_nofault(entry + 1, i->count, i))) goto discard; if (!list_empty(&user->validators) && @@ -1473,7 +1473,7 @@ static void user_event_perf(struct user_event *user, struct iov_iter *i, perf_fetch_caller_regs(regs); - if (unlikely(!copy_nofault(perf_entry + 1, i->count, i))) + if (unlikely(i->count != 0 && !copy_nofault(perf_entry + 1, i->count, i))) goto discard; if (!list_empty(&user->validators) && From 3e7269dd5fd5e77c215829e80fc1f29c989c9c42 Mon Sep 17 00:00:00 2001 From: sunliming Date: Tue, 6 Jun 2023 14:20:25 +0800 Subject: [PATCH 757/934] selftests/user_events: Add ftrace self-test for empty arguments events Tests to ensure events that has empty arguments can input trace record correctly when using ftrace. Link: https://lkml.kernel.org/r/20230606062027.1008398-3-sunliming@kylinos.cn Acked-by: Beau Belgrave Acked-by: Masami Hiramatsu (Google) Signed-off-by: sunliming Signed-off-by: Steven Rostedt (Google) --- .../selftests/user_events/ftrace_test.c | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index 6e8c4b47281c..abfb49558a26 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -316,6 +316,39 @@ TEST_F(user, write_events) { ASSERT_EQ(EINVAL, errno); } +TEST_F(user, write_empty_events) { + struct user_reg reg = {0}; + struct iovec io[1]; + int before = 0, after = 0; + + reg.size = sizeof(reg); + reg.name_args = (__u64)"__test_event"; + reg.enable_bit = 31; + reg.enable_addr = (__u64)&self->check; + reg.enable_size = sizeof(self->check); + + io[0].iov_base = ®.write_index; + io[0].iov_len = sizeof(reg.write_index); + + /* Register should work */ + ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); + ASSERT_EQ(0, reg.write_index); + ASSERT_EQ(0, self->check); + + /* Enable event */ + self->enable_fd = open(enable_file, O_RDWR); + ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1"))) + + /* Event should now be enabled */ + ASSERT_EQ(1 << reg.enable_bit, self->check); + + /* Write should make it out to ftrace buffers */ + before = trace_bytes(); + ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 1)); + after = trace_bytes(); + ASSERT_GT(after, before); +} + TEST_F(user, write_fault) { struct user_reg reg = {0}; struct iovec io[2]; From 4b56c21b11dcd2c6e4c6ee81bde77bea05e64db0 Mon Sep 17 00:00:00 2001 From: sunliming Date: Tue, 6 Jun 2023 14:20:26 +0800 Subject: [PATCH 758/934] selftests/user_events: Clear the events after perf self-test When the self test is completed, perf self-test left the user events not to be cleared. Clear the events by unregister and delete the event. Link: https://lkml.kernel.org/r/20230606062027.1008398-4-sunliming@kylinos.cn Acked-by: Beau Belgrave Acked-by: Masami Hiramatsu (Google) Signed-off-by: sunliming Signed-off-by: Steven Rostedt (Google) --- .../testing/selftests/user_events/perf_test.c | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index a070258d4449..e97f24ab6e2f 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -81,6 +81,32 @@ static int get_offset(void) return offset; } +static int clear(int *check) +{ + struct user_unreg unreg = {0}; + + unreg.size = sizeof(unreg); + unreg.disable_bit = 31; + unreg.disable_addr = (__u64)check; + + int fd = open(data_file, O_RDWR); + + if (fd == -1) + return -1; + + if (ioctl(fd, DIAG_IOCSUNREG, &unreg) == -1) + if (errno != ENOENT) + return -1; + + if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1) + if (errno != ENOENT) + return -1; + + close(fd); + + return 0; +} + FIXTURE(user) { int data_fd; int check; @@ -93,6 +119,9 @@ FIXTURE_SETUP(user) { FIXTURE_TEARDOWN(user) { close(self->data_fd); + + if (clear(&self->check) != 0) + printf("WARNING: Clear didn't work!\n"); } TEST_F(user, perf_write) { From 42187bdc3ca45435bec96c66a90276315b2db1cb Mon Sep 17 00:00:00 2001 From: sunliming Date: Tue, 6 Jun 2023 14:20:27 +0800 Subject: [PATCH 759/934] selftests/user_events: Add perf self-test for empty arguments events Tests to ensure events that has empty arguments can input trace record correctly when using perf. Link: https://lkml.kernel.org/r/20230606062027.1008398-5-sunliming@kylinos.cn Acked-by: Beau Belgrave Acked-by: Masami Hiramatsu (Google) Signed-off-by: sunliming Signed-off-by: Steven Rostedt (Google) --- .../testing/selftests/user_events/perf_test.c | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index e97f24ab6e2f..8b09be566fa2 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -189,6 +189,59 @@ TEST_F(user, perf_write) { ASSERT_EQ(0, self->check); } +TEST_F(user, perf_empty_events) { + struct perf_event_attr pe = {0}; + struct user_reg reg = {0}; + struct perf_event_mmap_page *perf_page; + int page_size = sysconf(_SC_PAGESIZE); + int id, fd; + __u32 *val; + + reg.size = sizeof(reg); + reg.name_args = (__u64)"__test_event"; + reg.enable_bit = 31; + reg.enable_addr = (__u64)&self->check; + reg.enable_size = sizeof(self->check); + + /* Register should work */ + ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); + ASSERT_EQ(0, reg.write_index); + ASSERT_EQ(0, self->check); + + /* Id should be there */ + id = get_id(); + ASSERT_NE(-1, id); + + pe.type = PERF_TYPE_TRACEPOINT; + pe.size = sizeof(pe); + pe.config = id; + pe.sample_type = PERF_SAMPLE_RAW; + pe.sample_period = 1; + pe.wakeup_events = 1; + + /* Tracepoint attach should work */ + fd = perf_event_open(&pe, 0, -1, -1, 0); + ASSERT_NE(-1, fd); + + perf_page = mmap(NULL, page_size * 2, PROT_READ, MAP_SHARED, fd, 0); + ASSERT_NE(MAP_FAILED, perf_page); + + /* Status should be updated */ + ASSERT_EQ(1 << reg.enable_bit, self->check); + + /* Ensure write shows up at correct offset */ + ASSERT_NE(-1, write(self->data_fd, ®.write_index, + sizeof(reg.write_index))); + val = (void *)(((char *)perf_page) + perf_page->data_offset); + ASSERT_EQ(PERF_RECORD_SAMPLE, *val); + + munmap(perf_page, page_size * 2); + close(fd); + + /* Status should be updated */ + ASSERT_EQ(0, self->check); +} + int main(int argc, char **argv) { return test_harness_run(argc, argv); From ed0e0ae0c932188654422d01f4e0ea14ff97c063 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Thu, 1 Jun 2023 15:49:28 -0700 Subject: [PATCH 760/934] tracing/user_events: Remove user_ns walk for groups During discussions it was suggested that user_ns is not a good place to try to attach a tracing namespace. The current code has stubs to enable that work that are very likely to change and incur a performance cost. Remove the user_ns walk when creating a group and determining the system name to use, since it's unlikely user_ns will be used in the future. Link: https://lore.kernel.org/all/20230601-urenkel-holzofen-cd9403b9cadd@brauner/ Link: https://lore.kernel.org/linux-trace-kernel/20230601224928.301-1-beaub@linux.microsoft.com Suggested-by: Christian Brauner Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 42 ++++---------------------------- 1 file changed, 5 insertions(+), 37 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 49914b6cb651..cf6d4c02c363 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -182,21 +182,11 @@ static void user_event_group_destroy(struct user_event_group *group) kfree(group); } -static char *user_event_group_system_name(struct user_namespace *user_ns) +static char *user_event_group_system_name(void) { char *system_name; int len = sizeof(USER_EVENTS_SYSTEM) + 1; - if (user_ns != &init_user_ns) { - /* - * Unexpected at this point: - * We only currently support init_user_ns. - * When we enable more, this will trigger a failure so log. - */ - pr_warn("user_events: Namespace other than init_user_ns!\n"); - return NULL; - } - system_name = kmalloc(len, GFP_KERNEL); if (!system_name) @@ -207,34 +197,12 @@ static char *user_event_group_system_name(struct user_namespace *user_ns) return system_name; } -static inline struct user_event_group -*user_event_group_from_user_ns(struct user_namespace *user_ns) -{ - if (user_ns == &init_user_ns) - return init_group; - - return NULL; -} - static struct user_event_group *current_user_event_group(void) { - struct user_namespace *user_ns = current_user_ns(); - struct user_event_group *group = NULL; - - while (user_ns) { - group = user_event_group_from_user_ns(user_ns); - - if (group) - break; - - user_ns = user_ns->parent; - } - - return group; + return init_group; } -static struct user_event_group -*user_event_group_create(struct user_namespace *user_ns) +static struct user_event_group *user_event_group_create(void) { struct user_event_group *group; @@ -243,7 +211,7 @@ static struct user_event_group if (!group) return NULL; - group->system_name = user_event_group_system_name(user_ns); + group->system_name = user_event_group_system_name(); if (!group->system_name) goto error; @@ -2603,7 +2571,7 @@ static int __init trace_events_user_init(void) if (!fault_cache) return -ENOMEM; - init_group = user_event_group_create(&init_user_ns); + init_group = user_event_group_create(); if (!init_group) { kmem_cache_destroy(fault_cache); From 0c7e314a6352664e12ec465f576cf039e95f8369 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 12 Jun 2023 10:50:33 -0500 Subject: [PATCH 761/934] RDMA/rxe: Fix rxe_cq_post A recent patch replaced a tasklet execution of cq->comp_handler by a direct call. While this made sense it let changes to cq->notify state be unprotected and assumed that the cq completion machinery and the ulp done callbacks were reentrant. The result is that in some cases completion events can be lost. This patch moves the cq->comp_handler call inside of the spinlock in rxe_cq_post which solves both issues. This is compatible with the matching code in the request notify verb. Fixes: 78b26a335310 ("RDMA/rxe: Remove tasklet call from rxe_cq.c") Link: https://lore.kernel.org/r/20230612155032.17036-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_cq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 20ff0c0c4605..6ca2a05b6a2a 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -113,8 +113,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT); - spin_unlock_irqrestore(&cq->cq_lock, flags); - if ((cq->notify == IB_CQ_NEXT_COMP) || (cq->notify == IB_CQ_SOLICITED && solicited)) { cq->notify = 0; @@ -122,6 +120,8 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } + spin_unlock_irqrestore(&cq->cq_lock, flags); + return 0; } From b08d72580584ab89c41d6fc0f15cd1cf4ce2ed93 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Wed, 14 Jun 2023 09:33:31 -0700 Subject: [PATCH 762/934] tracing/user_events: Store register flags on events Currently we don't have any available flags for user processes to use to indicate options for user_events. We will soon have a flag to indicate the event should or should not auto-delete once it's not being used by anyone. Add a reg_flags field to user_events and parameters to existing functions to allow for this in future patches. Link: https://lkml.kernel.org/r/20230614163336.5797-2-beaub@linux.microsoft.com Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index cf6d4c02c363..629823e21447 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -87,6 +87,7 @@ struct user_event { struct list_head validators; refcount_t refcnt; int min_size; + int reg_flags; char status; }; @@ -165,7 +166,7 @@ typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i, static int user_event_parse(struct user_event_group *group, char *name, char *args, char *flags, - struct user_event **newuser); + struct user_event **newuser, int reg_flags); static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm); static struct user_event_mm *user_event_mm_get_all(struct user_event *user); @@ -810,7 +811,8 @@ static struct list_head *user_event_get_fields(struct trace_event_call *call) * Upon success user_event has its ref count increased by 1. */ static int user_event_parse_cmd(struct user_event_group *group, - char *raw_command, struct user_event **newuser) + char *raw_command, struct user_event **newuser, + int reg_flags) { char *name = raw_command; char *args = strpbrk(name, " "); @@ -824,7 +826,7 @@ static int user_event_parse_cmd(struct user_event_group *group, if (flags) *flags++ = '\0'; - return user_event_parse(group, name, args, flags, newuser); + return user_event_parse(group, name, args, flags, newuser, reg_flags); } static int user_field_array_size(const char *type) @@ -1588,7 +1590,7 @@ static int user_event_create(const char *raw_command) mutex_lock(&group->reg_mutex); - ret = user_event_parse_cmd(group, name, &user); + ret = user_event_parse_cmd(group, name, &user, 0); if (!ret) refcount_dec(&user->refcnt); @@ -1751,7 +1753,7 @@ static int user_event_trace_register(struct user_event *user) */ static int user_event_parse(struct user_event_group *group, char *name, char *args, char *flags, - struct user_event **newuser) + struct user_event **newuser, int reg_flags) { int ret; u32 key; @@ -1846,6 +1848,8 @@ error: if (ret) goto put_user_lock; + user->reg_flags = reg_flags; + /* Ensure we track self ref and caller ref (2) */ refcount_set(&user->refcnt, 2); @@ -2144,7 +2148,7 @@ static long user_events_ioctl_reg(struct user_event_file_info *info, return ret; } - ret = user_event_parse_cmd(info->group, name, &user); + ret = user_event_parse_cmd(info->group, name, &user, reg.flags); if (ret) { kfree(name); From f0dbf6fd0bddb9290c89fdd7afc53dad5051030e Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Wed, 14 Jun 2023 09:33:32 -0700 Subject: [PATCH 763/934] tracing/user_events: Track refcount consistently via put/get Various parts of the code today track user_event's refcnt field directly via a refcount_add/dec. This makes it hard to modify the behavior of the last reference decrement in all code paths consistently. For example, in the future we will auto-delete events upon the last reference going away. This last reference could happen in many places, but we want it to be consistently handled. Add user_event_get() and user_event_put() for the add/dec. Update all places where direct refcounts are being used to utilize these new functions. In each location pass if event_mutex is locked or not. This allows us to drop events automatically in future patches clearly. Ensure when caller states the lock is held, it really is (or is not) held. Link: https://lkml.kernel.org/r/20230614163336.5797-3-beaub@linux.microsoft.com Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 69 +++++++++++++++++++------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 629823e21447..c064458eea5c 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -177,6 +177,28 @@ static u32 user_event_key(char *name) return jhash(name, strlen(name), 0); } +static struct user_event *user_event_get(struct user_event *user) +{ + refcount_inc(&user->refcnt); + + return user; +} + +static void user_event_put(struct user_event *user, bool locked) +{ +#ifdef CONFIG_LOCKDEP + if (locked) + lockdep_assert_held(&event_mutex); + else + lockdep_assert_not_held(&event_mutex); +#endif + + if (unlikely(!user)) + return; + + refcount_dec(&user->refcnt); +} + static void user_event_group_destroy(struct user_event_group *group) { kfree(group->system_name); @@ -228,12 +250,13 @@ error: return NULL; }; -static void user_event_enabler_destroy(struct user_event_enabler *enabler) +static void user_event_enabler_destroy(struct user_event_enabler *enabler, + bool locked) { list_del_rcu(&enabler->mm_enablers_link); /* No longer tracking the event via the enabler */ - refcount_dec(&enabler->event->refcnt); + user_event_put(enabler->event, locked); kfree(enabler); } @@ -295,7 +318,7 @@ static void user_event_enabler_fault_fixup(struct work_struct *work) /* User asked for enabler to be removed during fault */ if (test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))) { - user_event_enabler_destroy(enabler); + user_event_enabler_destroy(enabler, true); goto out; } @@ -470,14 +493,12 @@ static bool user_event_enabler_dup(struct user_event_enabler *orig, if (!enabler) return false; - enabler->event = orig->event; + enabler->event = user_event_get(orig->event); enabler->addr = orig->addr; /* Only dup part of value (ignore future flags, etc) */ enabler->values = orig->values & ENABLE_VAL_DUP_MASK; - refcount_inc(&enabler->event->refcnt); - /* Enablers not exposed yet, RCU not required */ list_add(&enabler->mm_enablers_link, &mm->enablers); @@ -594,7 +615,7 @@ static void user_event_mm_destroy(struct user_event_mm *mm) struct user_event_enabler *enabler, *next; list_for_each_entry_safe(enabler, next, &mm->enablers, mm_enablers_link) - user_event_enabler_destroy(enabler); + user_event_enabler_destroy(enabler, false); mmdrop(mm->mm); kfree(mm); @@ -749,7 +770,7 @@ retry: * exit or run exec(), which includes forks and clones. */ if (!*write_result) { - refcount_inc(&enabler->event->refcnt); + user_event_get(user); list_add_rcu(&enabler->mm_enablers_link, &user_mm->enablers); } @@ -1337,10 +1358,8 @@ static struct user_event *find_user_event(struct user_event_group *group, *outkey = key; hash_for_each_possible(group->register_table, user, node, key) - if (!strcmp(EVENT_NAME(user), name)) { - refcount_inc(&user->refcnt); - return user; - } + if (!strcmp(EVENT_NAME(user), name)) + return user_event_get(user); return NULL; } @@ -1554,12 +1573,12 @@ static int user_event_reg(struct trace_event_call *call, return ret; inc: - refcount_inc(&user->refcnt); + user_event_get(user); update_enable_bit_for(user); return 0; dec: update_enable_bit_for(user); - refcount_dec(&user->refcnt); + user_event_put(user, true); return 0; } @@ -1593,7 +1612,7 @@ static int user_event_create(const char *raw_command) ret = user_event_parse_cmd(group, name, &user, 0); if (!ret) - refcount_dec(&user->refcnt); + user_event_put(user, false); mutex_unlock(&group->reg_mutex); @@ -1794,7 +1813,7 @@ static int user_event_parse(struct user_event_group *group, char *name, return 0; error: - refcount_dec(&user->refcnt); + user_event_put(user, false); return ret; } @@ -1883,7 +1902,7 @@ static int delete_user_event(struct user_event_group *group, char *name) if (!user) return -ENOENT; - refcount_dec(&user->refcnt); + user_event_put(user, true); if (!user_event_last_ref(user)) return -EBUSY; @@ -2042,9 +2061,7 @@ static int user_events_ref_add(struct user_event_file_info *info, for (i = 0; i < count; ++i) new_refs->events[i] = refs->events[i]; - new_refs->events[i] = user; - - refcount_inc(&user->refcnt); + new_refs->events[i] = user_event_get(user); rcu_assign_pointer(info->refs, new_refs); @@ -2158,7 +2175,7 @@ static long user_events_ioctl_reg(struct user_event_file_info *info, ret = user_events_ref_add(info, user); /* No longer need parse ref, ref_add either worked or not */ - refcount_dec(&user->refcnt); + user_event_put(user, false); /* Positive number is index and valid */ if (ret < 0) @@ -2307,7 +2324,7 @@ static long user_events_ioctl_unreg(unsigned long uarg) set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)); if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))) - user_event_enabler_destroy(enabler); + user_event_enabler_destroy(enabler, true); /* Removed at least one */ ret = 0; @@ -2365,7 +2382,6 @@ static int user_events_release(struct inode *node, struct file *file) struct user_event_file_info *info = file->private_data; struct user_event_group *group; struct user_event_refs *refs; - struct user_event *user; int i; if (!info) @@ -2389,12 +2405,9 @@ static int user_events_release(struct inode *node, struct file *file) * The underlying user_events are ref counted, and cannot be freed. * After this decrement, the user_events may be freed elsewhere. */ - for (i = 0; i < refs->count; ++i) { - user = refs->events[i]; + for (i = 0; i < refs->count; ++i) + user_event_put(refs->events[i], false); - if (user) - refcount_dec(&user->refcnt); - } out: file->private_data = NULL; From a65442edb47a6d9c974b50049296ac9ddb378fee Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Wed, 14 Jun 2023 09:33:33 -0700 Subject: [PATCH 764/934] tracing/user_events: Add auto cleanup and future persist flag Currently user events need to be manually deleted via the delete IOCTL call or via the dynamic_events file. Most operators and processes wish to have these events auto cleanup when they are no longer used by anything to prevent them piling without manual maintenance. However, some operators may not want this, such as pre-registering events via the dynamic_events tracefs file. Update user_event_put() to attempt an auto delete of the event if it's the last reference. The auto delete must run in a work queue to ensure proper behavior of class->reg() invocations that don't expect the call to go away from underneath them during the unregister. Add work_struct to user_event struct to ensure we can do this reliably. Add a persist flag, that is not yet exposed, to ensure we can toggle between auto-cleanup and leaving the events existing in the future. When a non-zero flag is seen during register, return -EINVAL to ensure ABI is clear for the user processes while we work out the best approach for persistent events. Link: https://lkml.kernel.org/r/20230614163336.5797-4-beaub@linux.microsoft.com Link: https://lore.kernel.org/linux-trace-kernel/20230518093600.3f119d68@rorschach.local.home/ Suggested-by: Steven Rostedt Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 139 ++++++++++++++++++++++++++++--- 1 file changed, 126 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index c064458eea5c..8df0550415e7 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -49,6 +49,18 @@ #define EVENT_STATUS_PERF BIT(1) #define EVENT_STATUS_OTHER BIT(7) +/* + * User register flags are not allowed yet, keep them here until we are + * ready to expose them out to the user ABI. + */ +enum user_reg_flag { + /* Event will not delete upon last reference closing */ + USER_EVENT_REG_PERSIST = 1U << 0, + + /* This value or above is currently non-ABI */ + USER_EVENT_REG_MAX = 1U << 1, +}; + /* * Stores the system name, tables, and locks for a group of events. This * allows isolation for events by various means. @@ -85,6 +97,7 @@ struct user_event { struct hlist_node node; struct list_head fields; struct list_head validators; + struct work_struct put_work; refcount_t refcnt; int min_size; int reg_flags; @@ -171,6 +184,7 @@ static int user_event_parse(struct user_event_group *group, char *name, static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm); static struct user_event_mm *user_event_mm_get_all(struct user_event *user); static void user_event_mm_put(struct user_event_mm *mm); +static int destroy_user_event(struct user_event *user); static u32 user_event_key(char *name) { @@ -184,19 +198,103 @@ static struct user_event *user_event_get(struct user_event *user) return user; } +static void delayed_destroy_user_event(struct work_struct *work) +{ + struct user_event *user = container_of( + work, struct user_event, put_work); + + mutex_lock(&event_mutex); + + if (!refcount_dec_and_test(&user->refcnt)) + goto out; + + if (destroy_user_event(user)) { + /* + * The only reason this would fail here is if we cannot + * update the visibility of the event. In this case the + * event stays in the hashtable, waiting for someone to + * attempt to delete it later. + */ + pr_warn("user_events: Unable to delete event\n"); + refcount_set(&user->refcnt, 1); + } +out: + mutex_unlock(&event_mutex); +} + static void user_event_put(struct user_event *user, bool locked) { -#ifdef CONFIG_LOCKDEP - if (locked) - lockdep_assert_held(&event_mutex); - else - lockdep_assert_not_held(&event_mutex); -#endif + bool delete; if (unlikely(!user)) return; - refcount_dec(&user->refcnt); + /* + * When the event is not enabled for auto-delete there will always + * be at least 1 reference to the event. During the event creation + * we initially set the refcnt to 2 to achieve this. In those cases + * the caller must acquire event_mutex and after decrement check if + * the refcnt is 1, meaning this is the last reference. When auto + * delete is enabled, there will only be 1 ref, IE: refcnt will be + * only set to 1 during creation to allow the below checks to go + * through upon the last put. The last put must always be done with + * the event mutex held. + */ + if (!locked) { + lockdep_assert_not_held(&event_mutex); + delete = refcount_dec_and_mutex_lock(&user->refcnt, &event_mutex); + } else { + lockdep_assert_held(&event_mutex); + delete = refcount_dec_and_test(&user->refcnt); + } + + if (!delete) + return; + + /* + * We now have the event_mutex in all cases, which ensures that + * no new references will be taken until event_mutex is released. + * New references come through find_user_event(), which requires + * the event_mutex to be held. + */ + + if (user->reg_flags & USER_EVENT_REG_PERSIST) { + /* We should not get here when persist flag is set */ + pr_alert("BUG: Auto-delete engaged on persistent event\n"); + goto out; + } + + /* + * Unfortunately we have to attempt the actual destroy in a work + * queue. This is because not all cases handle a trace_event_call + * being removed within the class->reg() operation for unregister. + */ + INIT_WORK(&user->put_work, delayed_destroy_user_event); + + /* + * Since the event is still in the hashtable, we have to re-inc + * the ref count to 1. This count will be decremented and checked + * in the work queue to ensure it's still the last ref. This is + * needed because a user-process could register the same event in + * between the time of event_mutex release and the work queue + * running the delayed destroy. If we removed the item now from + * the hashtable, this would result in a timing window where a + * user process would fail a register because the trace_event_call + * register would fail in the tracing layers. + */ + refcount_set(&user->refcnt, 1); + + if (WARN_ON_ONCE(!schedule_work(&user->put_work))) { + /* + * If we fail we must wait for an admin to attempt delete or + * another register/close of the event, whichever is first. + */ + pr_warn("user_events: Unable to queue delayed destroy\n"); + } +out: + /* Ensure if we didn't have event_mutex before we unlock it */ + if (!locked) + mutex_unlock(&event_mutex); } static void user_event_group_destroy(struct user_event_group *group) @@ -793,7 +891,12 @@ out: static __always_inline __must_check bool user_event_last_ref(struct user_event *user) { - return refcount_read(&user->refcnt) == 1; + int last = 0; + + if (user->reg_flags & USER_EVENT_REG_PERSIST) + last = 1; + + return refcount_read(&user->refcnt) == last; } static __always_inline __must_check @@ -1609,7 +1712,8 @@ static int user_event_create(const char *raw_command) mutex_lock(&group->reg_mutex); - ret = user_event_parse_cmd(group, name, &user, 0); + /* Dyn events persist, otherwise they would cleanup immediately */ + ret = user_event_parse_cmd(group, name, &user, USER_EVENT_REG_PERSIST); if (!ret) user_event_put(user, false); @@ -1780,6 +1884,10 @@ static int user_event_parse(struct user_event_group *group, char *name, int argc = 0; char **argv; + /* User register flags are not ready yet */ + if (reg_flags != 0 || flags != NULL) + return -EINVAL; + /* Prevent dyn_event from racing */ mutex_lock(&event_mutex); user = find_user_event(group, name, &key); @@ -1869,8 +1977,13 @@ error: user->reg_flags = reg_flags; - /* Ensure we track self ref and caller ref (2) */ - refcount_set(&user->refcnt, 2); + if (user->reg_flags & USER_EVENT_REG_PERSIST) { + /* Ensure we track self ref and caller ref (2) */ + refcount_set(&user->refcnt, 2); + } else { + /* Ensure we track only caller ref (1) */ + refcount_set(&user->refcnt, 1); + } dyn_event_init(&user->devent, &user_event_dops); dyn_event_add(&user->devent, &user->call); @@ -2092,8 +2205,8 @@ static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg) if (ret) return ret; - /* Ensure no flags, since we don't support any yet */ - if (kreg->flags != 0) + /* Ensure only valid flags */ + if (kreg->flags & ~(USER_EVENT_REG_MAX-1)) return -EINVAL; /* Ensure supported size */ From 216a137e3eadac888cece141bd2c940785e235a2 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Wed, 14 Jun 2023 09:33:34 -0700 Subject: [PATCH 765/934] selftests/user_events: Ensure auto cleanup works as expected User events now auto cleanup upon the last reference put. Update ftrace_test to ensure this works as expected. Ensure EBUSY delays while event is being deleted do not cause transient failures by waiting and re-attempting. Link: https://lkml.kernel.org/r/20230614163336.5797-5-beaub@linux.microsoft.com Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- .../selftests/user_events/ftrace_test.c | 47 +++++++++++++++---- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index abfb49558a26..eb6904d89f14 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -102,30 +102,56 @@ err: return -1; } +static bool wait_for_delete(void) +{ + int i; + + for (i = 0; i < 1000; ++i) { + int fd = open(enable_file, O_RDONLY); + + if (fd == -1) + return true; + + close(fd); + usleep(1000); + } + + return false; +} + static int clear(int *check) { struct user_unreg unreg = {0}; + int fd; unreg.size = sizeof(unreg); unreg.disable_bit = 31; unreg.disable_addr = (__u64)check; - int fd = open(data_file, O_RDWR); + fd = open(data_file, O_RDWR); if (fd == -1) return -1; if (ioctl(fd, DIAG_IOCSUNREG, &unreg) == -1) if (errno != ENOENT) - return -1; + goto fail; - if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1) - if (errno != ENOENT) - return -1; + if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1) { + if (errno == EBUSY) { + if (!wait_for_delete()) + goto fail; + } else if (errno != ENOENT) + goto fail; + } close(fd); return 0; +fail: + close(fd); + + return -1; } static int check_print_fmt(const char *event, const char *expected, int *check) @@ -155,9 +181,8 @@ static int check_print_fmt(const char *event, const char *expected, int *check) /* Register should work */ ret = ioctl(fd, DIAG_IOCSREG, ®); - close(fd); - if (ret != 0) { + close(fd); printf("Reg failed in fmt\n"); return ret; } @@ -165,6 +190,8 @@ static int check_print_fmt(const char *event, const char *expected, int *check) /* Ensure correct print_fmt */ ret = get_print_fmt(print_fmt, sizeof(print_fmt)); + close(fd); + if (ret != 0) return ret; @@ -256,10 +283,10 @@ TEST_F(user, register_events) { unreg.disable_bit = 30; ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg)); - /* Delete should work only after close and unregister */ + /* Delete should have been auto-done after close and unregister */ close(self->data_fd); - self->data_fd = open(data_file, O_RDWR); - ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event")); + + ASSERT_EQ(true, wait_for_delete()); } TEST_F(user, write_events) { From 61701242e86f0b509a0f5f9deb226bedfc8dfab7 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Wed, 14 Jun 2023 09:33:35 -0700 Subject: [PATCH 766/934] selftests/user_events: Adapt dyn_test to non-persist events Now that user_events does not honor persist events the dynamic_events file cannot be easily used to test parsing and matching cases. Update dyn_test to use the direct ABI file instead of dynamic_events so that we still have testing coverage until persist events and dynamic_events file integration has been decided. Link: https://lkml.kernel.org/r/20230614163336.5797-6-beaub@linux.microsoft.com Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- .../testing/selftests/user_events/dyn_test.c | 177 ++++++++++++++---- 1 file changed, 136 insertions(+), 41 deletions(-) diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c index 8879a7b04c6a..d6979a48478f 100644 --- a/tools/testing/selftests/user_events/dyn_test.c +++ b/tools/testing/selftests/user_events/dyn_test.c @@ -16,42 +16,140 @@ #include "../kselftest_harness.h" -const char *dyn_file = "/sys/kernel/tracing/dynamic_events"; -const char *clear = "!u:__test_event"; +const char *abi_file = "/sys/kernel/tracing/user_events_data"; +const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable"; -static int Append(const char *value) +static bool wait_for_delete(void) { - int fd = open(dyn_file, O_RDWR | O_APPEND); - int ret = write(fd, value, strlen(value)); + int i; + + for (i = 0; i < 1000; ++i) { + int fd = open(enable_file, O_RDONLY); + + if (fd == -1) + return true; + + close(fd); + usleep(1000); + } + + return false; +} + +static int reg_event(int fd, int *check, int bit, const char *value) +{ + struct user_reg reg = {0}; + + reg.size = sizeof(reg); + reg.name_args = (__u64)value; + reg.enable_bit = bit; + reg.enable_addr = (__u64)check; + reg.enable_size = sizeof(*check); + + if (ioctl(fd, DIAG_IOCSREG, ®) == -1) + return -1; + + return 0; +} + +static int unreg_event(int fd, int *check, int bit) +{ + struct user_unreg unreg = {0}; + + unreg.size = sizeof(unreg); + unreg.disable_bit = bit; + unreg.disable_addr = (__u64)check; + + return ioctl(fd, DIAG_IOCSUNREG, &unreg); +} + +static int parse(int *check, const char *value) +{ + int fd = open(abi_file, O_RDWR); + int ret; + + if (fd == -1) + return -1; + + /* Until we have persist flags via dynamic events, use the base name */ + if (value[0] != 'u' || value[1] != ':') { + close(fd); + return -1; + } + + ret = reg_event(fd, check, 31, value + 2); + + if (ret != -1) { + if (unreg_event(fd, check, 31) == -1) + printf("WARN: Couldn't unreg event\n"); + } close(fd); + return ret; } -#define CLEAR() \ +static int check_match(int *check, const char *first, const char *second, bool *match) +{ + int fd = open(abi_file, O_RDWR); + int ret = -1; + + if (fd == -1) + return -1; + + if (reg_event(fd, check, 31, first) == -1) + goto cleanup; + + if (reg_event(fd, check, 30, second) == -1) { + if (errno == EADDRINUSE) { + /* Name is in use, with different fields */ + *match = false; + ret = 0; + } + + goto cleanup; + } + + *match = true; + ret = 0; +cleanup: + unreg_event(fd, check, 31); + unreg_event(fd, check, 30); + + close(fd); + + wait_for_delete(); + + return ret; +} + +#define TEST_MATCH(x, y) \ do { \ - int ret = Append(clear); \ - if (ret == -1) \ - ASSERT_EQ(ENOENT, errno); \ + bool match; \ + ASSERT_NE(-1, check_match(&self->check, x, y, &match)); \ + ASSERT_EQ(true, match); \ } while (0) -#define TEST_PARSE(x) \ +#define TEST_NMATCH(x, y) \ do { \ - ASSERT_NE(-1, Append(x)); \ - CLEAR(); \ + bool match; \ + ASSERT_NE(-1, check_match(&self->check, x, y, &match)); \ + ASSERT_EQ(false, match); \ } while (0) -#define TEST_NPARSE(x) ASSERT_EQ(-1, Append(x)) +#define TEST_PARSE(x) ASSERT_NE(-1, parse(&self->check, x)) + +#define TEST_NPARSE(x) ASSERT_EQ(-1, parse(&self->check, x)) FIXTURE(user) { + int check; }; FIXTURE_SETUP(user) { - CLEAR(); } FIXTURE_TEARDOWN(user) { - CLEAR(); + wait_for_delete(); } TEST_F(user, basic_types) { @@ -95,33 +193,30 @@ TEST_F(user, size_types) { TEST_NPARSE("u:__test_event char a 20"); } -TEST_F(user, flags) { - /* Should work */ - TEST_PARSE("u:__test_event:BPF_ITER u32 a"); - /* Forward compat */ - TEST_PARSE("u:__test_event:BPF_ITER,FLAG_FUTURE u32 a"); -} - TEST_F(user, matching) { - /* Register */ - ASSERT_NE(-1, Append("u:__test_event struct custom a 20")); - /* Should not match */ - TEST_NPARSE("!u:__test_event struct custom b"); - /* Should match */ - TEST_PARSE("!u:__test_event struct custom a"); - /* Multi field reg */ - ASSERT_NE(-1, Append("u:__test_event u32 a; u32 b")); - /* Non matching cases */ - TEST_NPARSE("!u:__test_event u32 a"); - TEST_NPARSE("!u:__test_event u32 b"); - TEST_NPARSE("!u:__test_event u32 a; u32 "); - TEST_NPARSE("!u:__test_event u32 a; u32 a"); - /* Matching case */ - TEST_PARSE("!u:__test_event u32 a; u32 b"); - /* Register */ - ASSERT_NE(-1, Append("u:__test_event u32 a; u32 b")); - /* Ensure trailing semi-colon case */ - TEST_PARSE("!u:__test_event u32 a; u32 b;"); + /* Single name matches */ + TEST_MATCH("__test_event u32 a", + "__test_event u32 a"); + + /* Multiple names match */ + TEST_MATCH("__test_event u32 a; u32 b", + "__test_event u32 a; u32 b"); + + /* Multiple names match with dangling ; */ + TEST_MATCH("__test_event u32 a; u32 b", + "__test_event u32 a; u32 b;"); + + /* Single name doesn't match */ + TEST_NMATCH("__test_event u32 a", + "__test_event u32 b"); + + /* Multiple names don't match */ + TEST_NMATCH("__test_event u32 a; u32 b", + "__test_event u32 b; u32 a"); + + /* Types don't match */ + TEST_NMATCH("__test_event u64 a; u64 b", + "__test_event u32 a; u32 b"); } int main(int argc, char **argv) From 0113d4615dbf053ae9a7a1e0acbc6652713af01f Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Wed, 14 Jun 2023 09:33:36 -0700 Subject: [PATCH 767/934] tracing/user_events: Document auto-cleanup and remove dyn_event refs Now user_events auto-cleanup upon the last reference by default. This makes it not possible to use the dynamics event file via tracefs. Document that auto-cleanup is enabled by default and remove the refernce to /sys/kernel/tracing/dynamic_events file to make this clear. Link: https://lkml.kernel.org/r/20230614163336.5797-7-beaub@linux.microsoft.com Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/user_events.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Documentation/trace/user_events.rst b/Documentation/trace/user_events.rst index f79987e16cf4..e7b07313550a 100644 --- a/Documentation/trace/user_events.rst +++ b/Documentation/trace/user_events.rst @@ -14,10 +14,6 @@ Programs can view status of the events via /sys/kernel/tracing/user_events_status and can both register and write data out via /sys/kernel/tracing/user_events_data. -Programs can also use /sys/kernel/tracing/dynamic_events to register and -delete user based events via the u: prefix. The format of the command to -dynamic_events is the same as the ioctl with the u: prefix applied. - Typically programs will register a set of events that they wish to expose to tools that can read trace_events (such as ftrace and perf). The registration process tells the kernel which address and bit to reflect if any tool has @@ -144,6 +140,9 @@ its name. Delete will only succeed if there are no references left to the event (in both user and kernel space). User programs should use a separate file to request deletes than the one used for registration due to this. +**NOTE:** By default events will auto-delete when there are no references left +to the event. Flags in the future may change this logic. + Unregistering ------------- If after registering an event it is no longer wanted to be updated then it can From adeaa3f290ecf7f6a6a5c53219a4686cbdff5fbd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 Jun 2023 19:26:55 -0600 Subject: [PATCH 768/934] io_uring/io-wq: clear current->worker_private on exit A recent fix stopped clearing PF_IO_WORKER from current->flags on exit, which meant that we can now call inc/dec running on the worker after it has been removed if it ends up scheduling in/out as part of exit. If this happens after an RCU grace period has passed, then the struct pointed to by current->worker_private may have been freed, and we can now be accessing memory that is freed. Ensure this doesn't happen by clearing the task worker_private field. Both io_wq_worker_running() and io_wq_worker_sleeping() check this field before going any further, and we don't need any accounting etc done after this worker has exited. Fixes: fd37b884003c ("io_uring/io-wq: don't clear PF_IO_WORKER on exit") Reported-by: Zorro Lang Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index fe38eb0cbc82..399e9a15c38d 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -220,7 +220,12 @@ static void io_worker_exit(struct io_worker *worker) list_del_rcu(&worker->all_list); raw_spin_unlock(&wq->lock); io_wq_dec_running(worker); - worker->flags = 0; + /* + * this worker is a goner, clear ->worker_private to avoid any + * inc/dec running calls that could happen as part of exit from + * touching 'worker'. + */ + current->worker_private = NULL; kfree_rcu(worker, rcu); io_worker_ref_put(wq); From ec21a38df77a5aefbd2f70c48127003b6f259cf3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 9 Jun 2023 09:16:21 +0100 Subject: [PATCH 769/934] Revert "media: dvb-core: Fix use-after-free on race condition at dvb_frontend" As reported by Thomas Voegtle , sometimes a DVB card does not initialize properly booting Linux 6.4-rc4. This is not always, maybe in 3 out of 4 attempts. After double-checking, the root cause seems to be related to the UAF fix, which is causing a race issue: [ 26.332149] tda10071 7-0005: found a 'NXP TDA10071' in cold state, will try to load a firmware [ 26.340779] tda10071 7-0005: downloading firmware from file 'dvb-fe-tda10071.fw' [ 989.277402] INFO: task vdr:743 blocked for more than 491 seconds. [ 989.283504] Not tainted 6.4.0-rc5-i5 #249 [ 989.288036] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 989.295860] task:vdr state:D stack:0 pid:743 ppid:711 flags:0x00004002 [ 989.295865] Call Trace: [ 989.295867] [ 989.295869] __schedule+0x2ea/0x12d0 [ 989.295877] ? asm_sysvec_apic_timer_interrupt+0x16/0x20 [ 989.295881] schedule+0x57/0xc0 [ 989.295884] schedule_preempt_disabled+0xc/0x20 [ 989.295887] __mutex_lock.isra.16+0x237/0x480 [ 989.295891] ? dvb_get_property.isra.10+0x1bc/0xa50 [ 989.295898] ? dvb_frontend_stop+0x36/0x180 [ 989.338777] dvb_frontend_stop+0x36/0x180 [ 989.338781] dvb_frontend_open+0x2f1/0x470 [ 989.338784] dvb_device_open+0x81/0xf0 [ 989.338804] ? exact_lock+0x20/0x20 [ 989.338808] chrdev_open+0x7f/0x1c0 [ 989.338811] ? generic_permission+0x1a2/0x230 [ 989.338813] ? link_path_walk.part.63+0x340/0x380 [ 989.338815] ? exact_lock+0x20/0x20 [ 989.338817] do_dentry_open+0x18e/0x450 [ 989.374030] path_openat+0xca5/0xe00 [ 989.374031] ? terminate_walk+0xec/0x100 [ 989.374034] ? path_lookupat+0x93/0x140 [ 989.374036] do_filp_open+0xc0/0x140 [ 989.374038] ? __call_rcu_common.constprop.91+0x92/0x240 [ 989.374041] ? __check_object_size+0x147/0x260 [ 989.374043] ? __check_object_size+0x147/0x260 [ 989.374045] ? alloc_fd+0xbb/0x180 [ 989.374048] ? do_sys_openat2+0x243/0x310 [ 989.374050] do_sys_openat2+0x243/0x310 [ 989.374052] do_sys_open+0x52/0x80 [ 989.374055] do_syscall_64+0x5b/0x80 [ 989.421335] ? __task_pid_nr_ns+0x92/0xa0 [ 989.421337] ? syscall_exit_to_user_mode+0x20/0x40 [ 989.421339] ? do_syscall_64+0x67/0x80 [ 989.421341] ? syscall_exit_to_user_mode+0x20/0x40 [ 989.421343] ? do_syscall_64+0x67/0x80 [ 989.421345] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 989.421348] RIP: 0033:0x7fe895d067e3 [ 989.421349] RSP: 002b:00007fff933c2ba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000101 [ 989.421351] RAX: ffffffffffffffda RBX: 00007fff933c2c10 RCX: 00007fe895d067e3 [ 989.421352] RDX: 0000000000000802 RSI: 00005594acdce160 RDI: 00000000ffffff9c [ 989.421353] RBP: 0000000000000802 R08: 0000000000000000 R09: 0000000000000000 [ 989.421353] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000001 [ 989.421354] R13: 00007fff933c2ca0 R14: 00000000ffffffff R15: 00007fff933c2c90 [ 989.421355] This reverts commit 6769a0b7ee0c3b31e1b22c3fadff2bfb642de23f. Fixes: 6769a0b7ee0c ("media: dvb-core: Fix use-after-free on race condition at dvb_frontend") Link: https://lore.kernel.org/all/da5382ad-09d6-20ac-0d53-611594b30861@lio96.de/ Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dvb_frontend.c | 53 +++++---------------------- include/media/dvb_frontend.h | 6 +-- 2 files changed, 10 insertions(+), 49 deletions(-) diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index bc6950a5740f..9293b058ab99 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -817,26 +817,15 @@ static void dvb_frontend_stop(struct dvb_frontend *fe) dev_dbg(fe->dvb->device, "%s:\n", __func__); - mutex_lock(&fe->remove_mutex); - if (fe->exit != DVB_FE_DEVICE_REMOVED) fe->exit = DVB_FE_NORMAL_EXIT; mb(); - if (!fepriv->thread) { - mutex_unlock(&fe->remove_mutex); + if (!fepriv->thread) return; - } kthread_stop(fepriv->thread); - mutex_unlock(&fe->remove_mutex); - - if (fepriv->dvbdev->users < -1) { - wait_event(fepriv->dvbdev->wait_queue, - fepriv->dvbdev->users == -1); - } - sema_init(&fepriv->sem, 1); fepriv->state = FESTATE_IDLE; @@ -2780,13 +2769,9 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) struct dvb_adapter *adapter = fe->dvb; int ret; - mutex_lock(&fe->remove_mutex); - dev_dbg(fe->dvb->device, "%s:\n", __func__); - if (fe->exit == DVB_FE_DEVICE_REMOVED) { - ret = -ENODEV; - goto err_remove_mutex; - } + if (fe->exit == DVB_FE_DEVICE_REMOVED) + return -ENODEV; if (adapter->mfe_shared == 2) { mutex_lock(&adapter->mfe_lock); @@ -2794,8 +2779,7 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) if (adapter->mfe_dvbdev && !adapter->mfe_dvbdev->writers) { mutex_unlock(&adapter->mfe_lock); - ret = -EBUSY; - goto err_remove_mutex; + return -EBUSY; } adapter->mfe_dvbdev = dvbdev; } @@ -2818,10 +2802,8 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) while (mferetry-- && (mfedev->users != -1 || mfepriv->thread)) { if (msleep_interruptible(500)) { - if (signal_pending(current)) { - ret = -EINTR; - goto err_remove_mutex; - } + if (signal_pending(current)) + return -EINTR; } } @@ -2833,8 +2815,7 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) if (mfedev->users != -1 || mfepriv->thread) { mutex_unlock(&adapter->mfe_lock); - ret = -EBUSY; - goto err_remove_mutex; + return -EBUSY; } adapter->mfe_dvbdev = dvbdev; } @@ -2893,8 +2874,6 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) if (adapter->mfe_shared) mutex_unlock(&adapter->mfe_lock); - - mutex_unlock(&fe->remove_mutex); return ret; err3: @@ -2916,9 +2895,6 @@ err1: err0: if (adapter->mfe_shared) mutex_unlock(&adapter->mfe_lock); - -err_remove_mutex: - mutex_unlock(&fe->remove_mutex); return ret; } @@ -2929,8 +2905,6 @@ static int dvb_frontend_release(struct inode *inode, struct file *file) struct dvb_frontend_private *fepriv = fe->frontend_priv; int ret; - mutex_lock(&fe->remove_mutex); - dev_dbg(fe->dvb->device, "%s:\n", __func__); if ((file->f_flags & O_ACCMODE) != O_RDONLY) { @@ -2952,18 +2926,10 @@ static int dvb_frontend_release(struct inode *inode, struct file *file) } mutex_unlock(&fe->dvb->mdev_lock); #endif + if (fe->exit != DVB_FE_NO_EXIT) + wake_up(&dvbdev->wait_queue); if (fe->ops.ts_bus_ctrl) fe->ops.ts_bus_ctrl(fe, 0); - - if (fe->exit != DVB_FE_NO_EXIT) { - mutex_unlock(&fe->remove_mutex); - wake_up(&dvbdev->wait_queue); - } else { - mutex_unlock(&fe->remove_mutex); - } - - } else { - mutex_unlock(&fe->remove_mutex); } dvb_frontend_put(fe); @@ -3064,7 +3030,6 @@ int dvb_register_frontend(struct dvb_adapter *dvb, fepriv = fe->frontend_priv; kref_init(&fe->refcount); - mutex_init(&fe->remove_mutex); /* * After initialization, there need to be two references: one diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h index 367d5381217b..e7c44870f20d 100644 --- a/include/media/dvb_frontend.h +++ b/include/media/dvb_frontend.h @@ -686,10 +686,7 @@ struct dtv_frontend_properties { * @id: Frontend ID * @exit: Used to inform the DVB core that the frontend * thread should exit (usually, means that the hardware - * got disconnected). - * @remove_mutex: mutex that avoids a race condition between a callback - * called when the hardware is disconnected and the - * file_operations of dvb_frontend. + * got disconnected. */ struct dvb_frontend { @@ -707,7 +704,6 @@ struct dvb_frontend { int (*callback)(void *adapter_priv, int component, int cmd, int arg); int id; unsigned int exit; - struct mutex remove_mutex; }; /** From 23200a4c8ac284f8b4263d7cecaefecaa3ad6732 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 May 2023 12:58:33 +0200 Subject: [PATCH 770/934] clk: pxa: fix NULL pointer dereference in pxa3xx_clk_update_accr sparse points out an embarrasing bug in an older patch of mine, which uses the register offset instead of an __iomem pointer: drivers/clk/pxa/clk-pxa3xx.c:167:9: sparse: sparse: Using plain integer as NULL pointer Unlike sparse, gcc and clang ignore this bug and fail to warn because a literal '0' is considered a valid representation of a NULL pointer. Fixes: 3c816d950a49 ("ARM: pxa: move clk register definitions to driver") Cc: stable@vger.kernel.org Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202305111301.RAHohdob-lkp@intel.com/ Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230511105845.299859-1-arnd@kernel.org Signed-off-by: Stephen Boyd --- drivers/clk/pxa/clk-pxa3xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c index 42958a542662..621e298f101a 100644 --- a/drivers/clk/pxa/clk-pxa3xx.c +++ b/drivers/clk/pxa/clk-pxa3xx.c @@ -164,7 +164,7 @@ void pxa3xx_clk_update_accr(u32 disable, u32 enable, u32 xclkcfg, u32 mask) accr &= ~disable; accr |= enable; - writel(accr, ACCR); + writel(accr, clk_regs + ACCR); if (xclkcfg) __asm__("mcr p14, 0, %0, c6, c0, 0\n" : : "r"(xclkcfg)); From 9dc704dcc09eae7d21b5da0615eb2ed79278f63e Mon Sep 17 00:00:00 2001 From: Sagar Biradar Date: Fri, 19 May 2023 16:08:34 -0700 Subject: [PATCH 771/934] scsi: aacraid: Reply queue mapping to CPUs based on IRQ affinity Fix the I/O hang that arises because of the MSIx vector not having a mapped online CPU upon receiving completion. SCSI cmds take the blk_mq route, which is setup during init. Reserved cmds fetch the vector_no from mq_map after init is complete. Before init, they have to use 0 - as per the norm. Reviewed-by: Gilbert Wu Signed-off-by: Sagar Biradar Reviewed-by: John Garry Link: https://lore.kernel.org/r/20230519230834.27436-1-sagar.biradar@microchip.com Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/aacraid.h | 1 + drivers/scsi/aacraid/commsup.c | 6 +++++- drivers/scsi/aacraid/linit.c | 14 ++++++++++++++ drivers/scsi/aacraid/src.c | 25 +++++++++++++++++++++++-- 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 5e115e8b2ba4..7c6efde75da6 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -1678,6 +1678,7 @@ struct aac_dev u32 handle_pci_error; bool init_reset; u8 soft_reset_support; + u8 use_map_queue; }; #define aac_adapter_interrupt(dev) \ diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index deb32c9f4b3e..3f062e4013ab 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -223,8 +223,12 @@ int aac_fib_setup(struct aac_dev * dev) struct fib *aac_fib_alloc_tag(struct aac_dev *dev, struct scsi_cmnd *scmd) { struct fib *fibptr; + u32 blk_tag; + int i; - fibptr = &dev->fibs[scsi_cmd_to_rq(scmd)->tag]; + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); + i = blk_mq_unique_tag_to_tag(blk_tag); + fibptr = &dev->fibs[i]; /* * Null out fields that depend on being zero at the start of * each I/O diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 68f4dbcfff49..c4a36c0be527 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -504,6 +505,15 @@ common_config: return 0; } +static void aac_map_queues(struct Scsi_Host *shost) +{ + struct aac_dev *aac = (struct aac_dev *)shost->hostdata; + + blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + aac->pdev, 0); + aac->use_map_queue = true; +} + /** * aac_change_queue_depth - alter queue depths * @sdev: SCSI device we are considering @@ -1488,6 +1498,7 @@ static const struct scsi_host_template aac_driver_template = { .bios_param = aac_biosparm, .shost_groups = aac_host_groups, .slave_configure = aac_slave_configure, + .map_queues = aac_map_queues, .change_queue_depth = aac_change_queue_depth, .sdev_groups = aac_dev_groups, .eh_abort_handler = aac_eh_abort, @@ -1775,6 +1786,8 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) shost->max_lun = AAC_MAX_LUN; pci_set_drvdata(pdev, shost); + shost->nr_hw_queues = aac->max_msix; + shost->host_tagset = 1; error = scsi_add_host(shost, &pdev->dev); if (error) @@ -1906,6 +1919,7 @@ static void aac_remove_one(struct pci_dev *pdev) struct aac_dev *aac = (struct aac_dev *)shost->hostdata; aac_cancel_rescan_worker(aac); + aac->use_map_queue = false; scsi_remove_host(shost); __aac_shutdown(aac); diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 11ef58204e96..61949f374188 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -493,6 +493,10 @@ static int aac_src_deliver_message(struct fib *fib) #endif u16 vector_no; + struct scsi_cmnd *scmd; + u32 blk_tag; + struct Scsi_Host *shost = dev->scsi_host_ptr; + struct blk_mq_queue_map *qmap; atomic_inc(&q->numpending); @@ -505,8 +509,25 @@ static int aac_src_deliver_message(struct fib *fib) if ((dev->comm_interface == AAC_COMM_MESSAGE_TYPE3) && dev->sa_firmware) vector_no = aac_get_vector(dev); - else - vector_no = fib->vector_no; + else { + if (!fib->vector_no || !fib->callback_data) { + if (shost && dev->use_map_queue) { + qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; + vector_no = qmap->mq_map[raw_smp_processor_id()]; + } + /* + * We hardcode the vector_no for + * reserved commands as a valid shost is + * absent during the init + */ + else + vector_no = 0; + } else { + scmd = (struct scsi_cmnd *)fib->callback_data; + blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd)); + vector_no = blk_mq_unique_tag_to_hwq(blk_tag); + } + } if (native_hba) { if (fib->flags & FIB_CONTEXT_FLAG_NATIVE_HBA_TMF) { From 31d16e712bdcaee769de4780f72ff8d6cd3f0589 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Fri, 9 Jun 2023 13:38:21 -0700 Subject: [PATCH 772/934] scsi: storvsc: Always set no_report_opcodes Hyper-V synthetic SCSI devices do not support the MAINTENANCE_IN SCSI command, so scsi_report_opcode() always fails, resulting in messages like this: hv_storvsc : tag#205 cmd 0xa3 status: scsi 0x2 srb 0x86 hv 0xc0000001 The recently added support for command duration limits calls scsi_report_opcode() four times as each device comes online, which significantly increases the number of messages logged in a system with many disks. Fix the problem by always marking Hyper-V synthetic SCSI devices as not supporting scsi_report_opcode(). With this setting, the MAINTENANCE_IN SCSI command is not issued and no messages are logged. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1686343101-18930-1-git-send-email-mikelley@microsoft.com Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index e6bc622954cf..659196a2f63a 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1567,6 +1567,8 @@ static int storvsc_device_configure(struct scsi_device *sdevice) { blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ)); + /* storvsc devices don't support MAINTENANCE_IN SCSI cmd */ + sdevice->no_report_opcodes = 1; sdevice->no_write_same = 1; /* From 91271699228bfc66f1bc8abc0327169dc156d854 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 13 Jun 2023 09:43:00 -0500 Subject: [PATCH 773/934] scsi: target: core: Fix error path in target_setup_session() In the error exits in target_setup_session(), if a branch is taken to free_sess: transport_free_session() may call to target_free_cmd_counter() and then fall through to call target_free_cmd_counter() a second time. This can, and does, sometimes cause seg faults since the data field in cmd_cnt->refcnt has been freed in the first call. Fix this problem by simply returning after the call to transport_free_session(). The second call is redundant for those cases. Fixes: 4edba7e4a8f3 ("scsi: target: Move cmd counter allocation") Signed-off-by: Bob Pearson Link: https://lore.kernel.org/r/20230613144259.12890-1-rpearsonhpe@gmail.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 86adff2a86ed..687adc9e086c 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -504,6 +504,8 @@ target_setup_session(struct se_portal_group *tpg, free_sess: transport_free_session(sess); + return ERR_PTR(rc); + free_cnt: target_free_cmd_counter(cmd_cnt); return ERR_PTR(rc); From 9cefd6e7e0a77b0fbca5c793f6fb6821b0962775 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Wed, 14 Jun 2023 10:59:44 -0700 Subject: [PATCH 774/934] scsi: lpfc: Fix incorrect big endian type assignment in bsg loopback path The kernel test robot reported sparse warnings regarding incorrect type assignment for __be16 variables in bsg loopback path. Change the flagged lines to use the be16_to_cpu() and cpu_to_be16() macros appropriately. Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20230614175944.3577-1-justintee8345@gmail.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202306110819.sDIKiGgg-lkp@intel.com/ Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_bsg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 9a322a3a2150..595dca92e8db 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -889,7 +889,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocbq) { uint32_t evt_req_id = 0; - uint32_t cmd; + u16 cmd; struct lpfc_dmabuf *dmabuf = NULL; struct lpfc_bsg_event *evt; struct event_data *evt_dat = NULL; @@ -915,7 +915,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ct_req = (struct lpfc_sli_ct_request *)bdeBuf1->virt; evt_req_id = ct_req->FsType; - cmd = ct_req->CommandResponse.bits.CmdRsp; + cmd = be16_to_cpu(ct_req->CommandResponse.bits.CmdRsp); spin_lock_irqsave(&phba->ct_ev_lock, flags); list_for_each_entry(evt, &phba->ct_ev_waiters, node) { @@ -3186,8 +3186,8 @@ lpfc_bsg_diag_loopback_run(struct bsg_job *job) ctreq->RevisionId.bits.InId = 0; ctreq->FsType = SLI_CT_ELX_LOOPBACK; ctreq->FsSubType = 0; - ctreq->CommandResponse.bits.CmdRsp = ELX_LOOPBACK_DATA; - ctreq->CommandResponse.bits.Size = size; + ctreq->CommandResponse.bits.CmdRsp = cpu_to_be16(ELX_LOOPBACK_DATA); + ctreq->CommandResponse.bits.Size = cpu_to_be16(size); segment_offset = ELX_LOOPBACK_HEADER_SZ; } else segment_offset = 0; From 19482792113fa1dc419f9bc7b04b9dbdaa5256fd Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Wed, 14 Jun 2023 06:50:25 +0800 Subject: [PATCH 775/934] Revert "ext4: remove unnecessary check in ext4_bg_num_gdb_nometa" This reverts commit ad3f09be6cfe332be8ff46c78e6ec0f8839107aa. The reverted commit was intended to simpfy the code to get group descriptor block number in non-meta block group by assuming s_gdb_count is block number used for all non-meta block group descriptors. However s_gdb_count is block number used for all meta *and* non-meta group descriptors. So s_gdb_group will be > actual group descriptor block number used for all non-meta block group which should be "total non-meta block group" / "group descriptors per block", e.g. s_first_meta_bg. Signed-off-by: Kemeng Shi Link: https://lore.kernel.org/r/20230613225025.3859522-1-shikemeng@huaweicloud.com Fixes: ad3f09be6cfe ("ext4: remove unnecessary check in ext4_bg_num_gdb_nometa") Cc: stable@kernel.org Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index c1edde817be8..09c75a3127c7 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -886,7 +886,10 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, if (!ext4_bg_has_super(sb, group)) return 0; - return EXT4_SB(sb)->s_gdb_count; + if (ext4_has_feature_meta_bg(sb)) + return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); + else + return EXT4_SB(sb)->s_gdb_count; } /** From f451fd97dd2b78f286379203a47d9d295c467255 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Wed, 14 Jun 2023 12:02:55 +0200 Subject: [PATCH 776/934] ext4: drop the call to ext4_error() from ext4_get_group_info() A recent patch added a call to ext4_error() which is problematic since some callers of the ext4_get_group_info() function may be holding a spinlock, whereas ext4_error() must never be called in atomic context. This triggered a report from Syzbot: "BUG: sleeping function called from invalid context in ext4_update_super" (see the link below). Therefore, drop the call to ext4_error() from ext4_get_group_info(). In the meantime use eight characters tabs instead of nine characters ones. Reported-by: syzbot+4acc7d910e617b360859@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/00000000000070575805fdc6cdb2@google.com/ Fixes: 5354b2af3406 ("ext4: allow ext4_get_group_info() to fail") Suggested-by: Theodore Ts'o Signed-off-by: Fabio M. De Francesco Link: https://lore.kernel.org/r/20230614100446.14337-1-fmdefrancesco@gmail.com --- fs/ext4/balloc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 09c75a3127c7..1f72f977c6db 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -324,17 +324,15 @@ static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb, struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info **grp_info; - long indexv, indexh; + struct ext4_group_info **grp_info; + long indexv, indexh; - if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) { - ext4_error(sb, "invalid group %u", group); - return NULL; - } - indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); - indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); - return grp_info[indexh]; + if (unlikely(group >= EXT4_SB(sb)->s_groups_count)) + return NULL; + indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); + indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); + grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); + return grp_info[indexh]; } /* From 361b6889ae636926cdff517add240c3c8e24593a Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 14 Jun 2023 09:52:49 +0800 Subject: [PATCH 777/934] net/handshake: remove fput() that causes use-after-free A reference underflow is found in TLS handshake subsystem that causes a direct use-after-free. Part of the crash log is like below: [ 2.022114] ------------[ cut here ]------------ [ 2.022193] refcount_t: underflow; use-after-free. [ 2.022288] WARNING: CPU: 0 PID: 60 at lib/refcount.c:28 refcount_warn_saturate+0xbe/0x110 [ 2.022432] Modules linked in: [ 2.022848] RIP: 0010:refcount_warn_saturate+0xbe/0x110 [ 2.023231] RSP: 0018:ffffc900001bfe18 EFLAGS: 00000286 [ 2.023325] RAX: 0000000000000000 RBX: 0000000000000007 RCX: 00000000ffffdfff [ 2.023438] RDX: 0000000000000000 RSI: 00000000ffffffea RDI: 0000000000000001 [ 2.023555] RBP: ffff888004c20098 R08: ffffffff82b392c8 R09: 00000000ffffdfff [ 2.023693] R10: ffffffff82a592e0 R11: ffffffff82b092e0 R12: ffff888004c200d8 [ 2.023813] R13: 0000000000000000 R14: ffff888004c20000 R15: ffffc90000013ca8 [ 2.023930] FS: 0000000000000000(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 [ 2.024062] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2.024161] CR2: ffff888003601000 CR3: 0000000002a2e000 CR4: 00000000000006f0 [ 2.024275] Call Trace: [ 2.024322] [ 2.024367] ? __warn+0x7f/0x130 [ 2.024430] ? refcount_warn_saturate+0xbe/0x110 [ 2.024513] ? report_bug+0x199/0x1b0 [ 2.024585] ? handle_bug+0x3c/0x70 [ 2.024676] ? exc_invalid_op+0x18/0x70 [ 2.024750] ? asm_exc_invalid_op+0x1a/0x20 [ 2.024830] ? refcount_warn_saturate+0xbe/0x110 [ 2.024916] ? refcount_warn_saturate+0xbe/0x110 [ 2.024998] __tcp_close+0x2f4/0x3d0 [ 2.025065] ? __pfx_kunit_generic_run_threadfn_adapter+0x10/0x10 [ 2.025168] tcp_close+0x1f/0x70 [ 2.025231] inet_release+0x33/0x60 [ 2.025297] sock_release+0x1f/0x80 [ 2.025361] handshake_req_cancel_test2+0x100/0x2d0 [ 2.025457] kunit_try_run_case+0x4c/0xa0 [ 2.025532] kunit_generic_run_threadfn_adapter+0x15/0x20 [ 2.025644] kthread+0xe1/0x110 [ 2.025708] ? __pfx_kthread+0x10/0x10 [ 2.025780] ret_from_fork+0x2c/0x50 One can enable CONFIG_NET_HANDSHAKE_KUNIT_TEST config to reproduce above crash. The root cause of this bug is that the commit 1ce77c998f04 ("net/handshake: Unpin sock->file if a handshake is cancelled") adds one additional fput() function. That patch claims that the fput() is used to enable sock->file to be freed even when user space never calls DONE. However, it seems that the intended DONE routine will never give an additional fput() of ths sock->file. The existing two of them are just used to balance the reference added in sockfd_lookup(). This patch revert the mentioned commit to avoid the use-after-free. The patched kernel could successfully pass the KUNIT test and boot to shell. [ 0.733613] # Subtest: Handshake API tests [ 0.734029] 1..11 [ 0.734255] KTAP version 1 [ 0.734542] # Subtest: req_alloc API fuzzing [ 0.736104] ok 1 handshake_req_alloc NULL proto [ 0.736114] ok 2 handshake_req_alloc CLASS_NONE [ 0.736559] ok 3 handshake_req_alloc CLASS_MAX [ 0.737020] ok 4 handshake_req_alloc no callbacks [ 0.737488] ok 5 handshake_req_alloc no done callback [ 0.737988] ok 6 handshake_req_alloc excessive privsize [ 0.738529] ok 7 handshake_req_alloc all good [ 0.739036] # req_alloc API fuzzing: pass:7 fail:0 skip:0 total:7 [ 0.739444] ok 1 req_alloc API fuzzing [ 0.740065] ok 2 req_submit NULL req arg [ 0.740436] ok 3 req_submit NULL sock arg [ 0.740834] ok 4 req_submit NULL sock->file [ 0.741236] ok 5 req_lookup works [ 0.741621] ok 6 req_submit max pending [ 0.741974] ok 7 req_submit multiple [ 0.742382] ok 8 req_cancel before accept [ 0.742764] ok 9 req_cancel after accept [ 0.743151] ok 10 req_cancel after done [ 0.743510] ok 11 req_destroy works [ 0.743882] # Handshake API tests: pass:11 fail:0 skip:0 total:11 [ 0.744205] # Totals: pass:17 fail:0 skip:0 total:17 Acked-by: Chuck Lever Fixes: 1ce77c998f04 ("net/handshake: Unpin sock->file if a handshake is cancelled") Signed-off-by: Lin Ma Link: https://lore.kernel.org/r/20230613083204.633896-1-linma@zju.edu.cn Link: https://lore.kernel.org/r/20230614015249.987448-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/handshake/handshake.h | 1 - net/handshake/request.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h index 8aeaadca844f..4dac965c99df 100644 --- a/net/handshake/handshake.h +++ b/net/handshake/handshake.h @@ -31,7 +31,6 @@ struct handshake_req { struct list_head hr_list; struct rhash_head hr_rhash; unsigned long hr_flags; - struct file *hr_file; const struct handshake_proto *hr_proto; struct sock *hr_sk; void (*hr_odestruct)(struct sock *sk); diff --git a/net/handshake/request.c b/net/handshake/request.c index d78d41abb3d9..94d5cef3e048 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -239,7 +239,6 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, } req->hr_odestruct = req->hr_sk->sk_destruct; req->hr_sk->sk_destruct = handshake_sk_destruct; - req->hr_file = sock->file; ret = -EOPNOTSUPP; net = sock_net(req->hr_sk); @@ -335,9 +334,6 @@ bool handshake_req_cancel(struct sock *sk) return false; } - /* Request accepted and waiting for DONE */ - fput(req->hr_file); - out_true: trace_handshake_cancel(net, req, sk); From 24b454bc354ab7b1aa918a4fe3d7696516f592d4 Mon Sep 17 00:00:00 2001 From: Jakub Buchocki Date: Mon, 12 Jun 2023 10:14:21 -0700 Subject: [PATCH 778/934] ice: Fix ice module unload Clearing the interrupt scheme before PFR reset, during the removal routine, could cause the hardware errors and possibly lead to system reboot, as the PF reset can cause the interrupt to be generated. Place the call for PFR reset inside ice_deinit_dev(), wait until reset and all pending transactions are done, then call ice_clear_interrupt_scheme(). This introduces a PFR reset to multiple error paths. Additionally, remove the call for the reset from ice_load() - it will be a part of ice_unload() now. Error example: [ 75.229328] ice 0000:ca:00.1: Failed to read Tx Scheduler Tree - User Selection data from flash [ 77.571315] {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 [ 77.571418] {1}[Hardware Error]: event severity: recoverable [ 77.571459] {1}[Hardware Error]: Error 0, type: recoverable [ 77.571500] {1}[Hardware Error]: section_type: PCIe error [ 77.571540] {1}[Hardware Error]: port_type: 4, root port [ 77.571580] {1}[Hardware Error]: version: 3.0 [ 77.571615] {1}[Hardware Error]: command: 0x0547, status: 0x4010 [ 77.571661] {1}[Hardware Error]: device_id: 0000:c9:02.0 [ 77.571703] {1}[Hardware Error]: slot: 25 [ 77.571736] {1}[Hardware Error]: secondary_bus: 0xca [ 77.571773] {1}[Hardware Error]: vendor_id: 0x8086, device_id: 0x347a [ 77.571821] {1}[Hardware Error]: class_code: 060400 [ 77.571858] {1}[Hardware Error]: bridge: secondary_status: 0x2800, control: 0x0013 [ 77.572490] pcieport 0000:c9:02.0: AER: aer_status: 0x00200000, aer_mask: 0x00100020 [ 77.572870] pcieport 0000:c9:02.0: [21] ACSViol (First) [ 77.573222] pcieport 0000:c9:02.0: AER: aer_layer=Transaction Layer, aer_agent=Receiver ID [ 77.573554] pcieport 0000:c9:02.0: AER: aer_uncor_severity: 0x00463010 [ 77.691273] {2}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 [ 77.691738] {2}[Hardware Error]: event severity: recoverable [ 77.691971] {2}[Hardware Error]: Error 0, type: recoverable [ 77.692192] {2}[Hardware Error]: section_type: PCIe error [ 77.692403] {2}[Hardware Error]: port_type: 4, root port [ 77.692616] {2}[Hardware Error]: version: 3.0 [ 77.692825] {2}[Hardware Error]: command: 0x0547, status: 0x4010 [ 77.693032] {2}[Hardware Error]: device_id: 0000:c9:02.0 [ 77.693238] {2}[Hardware Error]: slot: 25 [ 77.693440] {2}[Hardware Error]: secondary_bus: 0xca [ 77.693641] {2}[Hardware Error]: vendor_id: 0x8086, device_id: 0x347a [ 77.693853] {2}[Hardware Error]: class_code: 060400 [ 77.694054] {2}[Hardware Error]: bridge: secondary_status: 0x0800, control: 0x0013 [ 77.719115] pci 0000:ca:00.1: AER: can't recover (no error_detected callback) [ 77.719140] pcieport 0000:c9:02.0: AER: device recovery failed [ 77.719216] pcieport 0000:c9:02.0: AER: aer_status: 0x00200000, aer_mask: 0x00100020 [ 77.719390] pcieport 0000:c9:02.0: [21] ACSViol (First) [ 77.719557] pcieport 0000:c9:02.0: AER: aer_layer=Transaction Layer, aer_agent=Receiver ID [ 77.719723] pcieport 0000:c9:02.0: AER: aer_uncor_severity: 0x00463010 Fixes: 5b246e533d01 ("ice: split probe into smaller functions") Signed-off-by: Jakub Buchocki Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230612171421.21570-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 03513d4871ab..42c318ceff61 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4802,9 +4802,13 @@ err_init_pf: static void ice_deinit_dev(struct ice_pf *pf) { ice_free_irq_msix_misc(pf); - ice_clear_interrupt_scheme(pf); ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); + + /* Service task is already stopped, so call reset directly. */ + ice_reset(&pf->hw, ICE_RESET_PFR); + pci_wait_for_pending_transaction(pf->pdev); + ice_clear_interrupt_scheme(pf); } static void ice_init_features(struct ice_pf *pf) @@ -5094,10 +5098,6 @@ int ice_load(struct ice_pf *pf) struct ice_vsi *vsi; int err; - err = ice_reset(&pf->hw, ICE_RESET_PFR); - if (err) - return err; - err = ice_init_dev(pf); if (err) return err; @@ -5354,12 +5354,6 @@ static void ice_remove(struct pci_dev *pdev) ice_setup_mc_magic_wake(pf); ice_set_wake(pf); - /* Issue a PFR as part of the prescribed driver unload flow. Do not - * do it via ice_schedule_reset() since there is no need to rebuild - * and the service task is already stopped. - */ - ice_reset(&pf->hw, ICE_RESET_PFR); - pci_wait_for_pending_transaction(pdev); pci_disable_device(pdev); } From c9a82bec02c339cdda99b37c5e62b3b71fc4209c Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 12 Jun 2023 11:34:26 +0200 Subject: [PATCH 779/934] net/sched: cls_api: Fix lockup on flushing explicitly created chain Mingshuai Ren reports: When a new chain is added by using tc, one soft lockup alarm will be generated after delete the prio 0 filter of the chain. To reproduce the problem, perform the following steps: (1) tc qdisc add dev eth0 root handle 1: htb default 1 (2) tc chain add dev eth0 (3) tc filter del dev eth0 chain 0 parent 1: prio 0 (4) tc filter add dev eth0 chain 0 parent 1: Fix the issue by accounting for additional reference to chains that are explicitly created by RTM_NEWCHAIN message as opposed to implicitly by RTM_NEWTFILTER message. Fixes: 726d061286ce ("net: sched: prevent insertion of new classifiers during chain flush") Reported-by: Mingshuai Ren Closes: https://lore.kernel.org/lkml/87legswvi3.fsf@nvidia.com/T/ Signed-off-by: Vlad Buslov Link: https://lore.kernel.org/r/20230612093426.2867183-1-vladbu@nvidia.com Signed-off-by: Jakub Kicinski --- net/sched/cls_api.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c877a6343fd4..a193cc7b3241 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -657,8 +657,8 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, { struct tcf_block *block = chain->block; const struct tcf_proto_ops *tmplt_ops; + unsigned int refcnt, non_act_refcnt; bool free_block = false; - unsigned int refcnt; void *tmplt_priv; mutex_lock(&block->lock); @@ -678,13 +678,15 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, * save these to temporary variables. */ refcnt = --chain->refcnt; + non_act_refcnt = refcnt - chain->action_refcnt; tmplt_ops = chain->tmplt_ops; tmplt_priv = chain->tmplt_priv; - /* The last dropped non-action reference will trigger notification. */ - if (refcnt - chain->action_refcnt == 0 && !by_act) { - tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index, - block, NULL, 0, 0, false); + if (non_act_refcnt == chain->explicitly_created && !by_act) { + if (non_act_refcnt == 0) + tc_chain_notify_delete(tmplt_ops, tmplt_priv, + chain->index, block, NULL, 0, 0, + false); /* Last reference to chain, no need to lock. */ chain->flushing = false; } From 6ac7a27a8b07588497ed53dfd885df9c72bc67e0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 13 Jun 2023 20:09:07 +0300 Subject: [PATCH 780/934] net: dsa: felix: fix taprio guard band overflow at 10Mbps with jumbo frames The DEV_MAC_MAXLEN_CFG register contains a 16-bit value - up to 65535. Plus 2 * VLAN_HLEN (4), that is up to 65543. The picos_per_byte variable is the largest when "speed" is lowest - SPEED_10 = 10. In that case it is (1000000L * 8) / 10 = 800000. Their product - 52434400000 - exceeds 32 bits, which is a problem, because apparently, a multiplication between two 32-bit factors is evaluated as 32-bit before being assigned to a 64-bit variable. In fact it's a problem for any MTU value larger than 5368. Cast one of the factors of the multiplication to u64 to force the multiplication to take place on 64 bits. Issue found by Coverity. Fixes: 55a515b1f5a9 ("net: dsa: felix: drop oversized frames with tc-taprio instead of hanging the port") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230613170907.2413559-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index cfb3faeaa5bf..d172a3e9736c 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1263,7 +1263,7 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) /* Consider the standard Ethernet overhead of 8 octets preamble+SFD, * 4 octets FCS, 12 octets IFG. */ - needed_bit_time_ps = (maxlen + 24) * picos_per_byte; + needed_bit_time_ps = (u64)(maxlen + 24) * picos_per_byte; dev_dbg(ocelot->dev, "port %d: max frame size %d needs %llu ps at speed %d\n", From ddc1729b07cc84bb29f577698b8d2e74a4004a6e Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Thu, 15 Jun 2023 14:35:52 +0800 Subject: [PATCH 781/934] LoongArch: Let pmd_present() return true when splitting pmd When we split a pmd into ptes, pmd_present() and pmd_trans_huge() should return true, otherwise it would be treated as a swap pmd. This is the same as arm64 does in commit b65399f6111b ("arm64/mm: Change THP helpers to comply with generic MM semantics"), we also add a new bit named _PAGE_PRESENT_INVALID for LoongArch. Signed-off-by: Hongchen Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgtable-bits.h | 2 ++ arch/loongarch/include/asm/pgtable.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 8b98d22a145b..de46a6b1e9f1 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -22,12 +22,14 @@ #define _PAGE_PFN_SHIFT 12 #define _PAGE_SWP_EXCLUSIVE_SHIFT 23 #define _PAGE_PFN_END_SHIFT 48 +#define _PAGE_PRESENT_INVALID_SHIFT 60 #define _PAGE_NO_READ_SHIFT 61 #define _PAGE_NO_EXEC_SHIFT 62 #define _PAGE_RPLV_SHIFT 63 /* Used by software */ #define _PAGE_PRESENT (_ULCAST_(1) << _PAGE_PRESENT_SHIFT) +#define _PAGE_PRESENT_INVALID (_ULCAST_(1) << _PAGE_PRESENT_INVALID_SHIFT) #define _PAGE_WRITE (_ULCAST_(1) << _PAGE_WRITE_SHIFT) #define _PAGE_ACCESSED (_ULCAST_(1) << _PAGE_ACCESSED_SHIFT) #define _PAGE_MODIFIED (_ULCAST_(1) << _PAGE_MODIFIED_SHIFT) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index d28fb9dbec59..9a9f9ff9b709 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -213,7 +213,7 @@ static inline int pmd_bad(pmd_t pmd) static inline int pmd_present(pmd_t pmd) { if (unlikely(pmd_val(pmd) & _PAGE_HUGE)) - return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE)); + return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PRESENT_INVALID)); return pmd_val(pmd) != (unsigned long)invalid_pte_table; } @@ -558,6 +558,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) static inline pmd_t pmd_mkinvalid(pmd_t pmd) { + pmd_val(pmd) |= _PAGE_PRESENT_INVALID; pmd_val(pmd) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY | _PAGE_PROTNONE); return pmd; From 346dc929623cef70ff7832a4fa0ffd1b696e312a Mon Sep 17 00:00:00 2001 From: Qi Hu Date: Thu, 15 Jun 2023 14:35:52 +0800 Subject: [PATCH 782/934] LoongArch: Fix the write_fcsr() macro The "write_fcsr()" macro uses wrong the positions for val and dest in asm. Fix it! Reported-by: Miao HAO Signed-off-by: Qi Hu Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index b3323ab5b78d..35e8a52fea11 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1496,7 +1496,7 @@ __BUILD_CSR_OP(tlbidx) #define write_fcsr(dest, val) \ do { \ __asm__ __volatile__( \ - " movgr2fcsr %0, "__stringify(dest)" \n" \ + " movgr2fcsr "__stringify(dest)", %0 \n" \ : : "r" (val)); \ } while (0) From 962369120d750cbc9c4dc492f32b4304669ff6aa Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 15 Jun 2023 14:35:52 +0800 Subject: [PATCH 783/934] LoongArch: Fix perf event id calculation LoongArch PMCFG has 10bit event id rather than 8 bit, so fix it. Cc: stable@vger.kernel.org Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/kernel/perf_event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index ff28f99b47d7..0491bf453cd4 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -271,7 +271,7 @@ static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); /* Make sure interrupt enabled. */ - cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base) | (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); @@ -594,7 +594,7 @@ static struct pmu pmu = { static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) { - return (pev->event_id & 0xff); + return M_PERFCTL_EVENT(pev->event_id); } static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) @@ -849,7 +849,7 @@ static void resume_local_counters(void) static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) { - raw_event.event_id = config & 0xff; + raw_event.event_id = M_PERFCTL_EVENT(config); return &raw_event; } From 0246d0aaf0a634a65135050011767b56ba351a8f Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Thu, 15 Jun 2023 14:35:52 +0800 Subject: [PATCH 784/934] LoongArch: Avoid uninitialized alignment_mask The hardware monitoring points for instruction fetching and load/store operations need to align 4 bytes and 1/2/4/8 bytes respectively. Reported-by: Colin King Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/hw_breakpoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c index 2406c95b34cc..021b59c248fa 100644 --- a/arch/loongarch/kernel/hw_breakpoint.c +++ b/arch/loongarch/kernel/hw_breakpoint.c @@ -396,6 +396,8 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE) alignment_mask = 0x7; + else + alignment_mask = 0x3; offset = hw->address & alignment_mask; hw->address &= ~alignment_mask; From 41efbb682de1231c3f6361039f46ad149e3ff5b9 Mon Sep 17 00:00:00 2001 From: Immad Mir Date: Thu, 15 Jun 2023 14:35:56 +0800 Subject: [PATCH 785/934] LoongArch: Fix debugfs_create_dir() error checking The debugfs_create_dir() returns ERR_PTR in case of an error and the correct way of checking it is using the IS_ERR_OR_NULL inline function rather than the simple null comparision. This patch fixes the issue. Cc: stable@vger.kernel.org Suggested-By: Ivan Orlov Signed-off-by: Immad Mir Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unaligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c index bdff825d29ef..85fae3d2d71a 100644 --- a/arch/loongarch/kernel/unaligned.c +++ b/arch/loongarch/kernel/unaligned.c @@ -485,7 +485,7 @@ static int __init debugfs_unaligned(void) struct dentry *d; d = debugfs_create_dir("loongarch", NULL); - if (!d) + if (IS_ERR_OR_NULL(d)) return -ENOMEM; debugfs_create_u32("unaligned_instructions_user", From 78d0f94902afce8ec2c7a60f600cc0e3729d7412 Mon Sep 17 00:00:00 2001 From: Julian Ruess Date: Tue, 13 Jun 2023 14:25:37 +0200 Subject: [PATCH 786/934] s390/ism: Fix trying to free already-freed IRQ by repeated ism_dev_exit() This patch prevents the system from crashing when unloading the ISM module. How to reproduce: Attach an ISM device and execute 'rmmod ism'. Error-Log: - Trying to free already-free IRQ 0 - WARNING: CPU: 1 PID: 966 at kernel/irq/manage.c:1890 free_irq+0x140/0x540 After calling ism_dev_exit() for each ISM device in the exit routine, pci_unregister_driver() will execute ism_remove() for each ISM device. Because ism_remove() also calls ism_dev_exit(), free_irq(pci_irq_vector(pdev, 0), ism) is called twice for each ISM device. This results in a crash with the error 'Trying to free already-free IRQ'. In the exit routine, it is enough to call pci_unregister_driver() because it ensures that ism_dev_exit() is called once per ISM device. Cc: # 6.3+ Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration") Reviewed-by: Niklas Schnelle Signed-off-by: Julian Ruess Signed-off-by: David S. Miller --- drivers/s390/net/ism_drv.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index 8acb9eba691b..c2096e4bba31 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -771,14 +771,6 @@ static int __init ism_init(void) static void __exit ism_exit(void) { - struct ism_dev *ism; - - mutex_lock(&ism_dev_list.mutex); - list_for_each_entry(ism, &ism_dev_list.list, list) { - ism_dev_exit(ism); - } - mutex_unlock(&ism_dev_list.mutex); - pci_unregister_driver(&ism_driver); debug_unregister(ism_debug_info); } From 7d03646d77cf837598b11c645ed2a79f29319111 Mon Sep 17 00:00:00 2001 From: Jan Karcher Date: Wed, 14 Jun 2023 08:54:56 +0200 Subject: [PATCH 787/934] MAINTAINERS: add reviewers for SMC Sockets adding three people from Alibaba as reviewers for SMC. They are currently working on improving SMC on other architectures than s390 and help with reviewing patches on top. Thank you D. Wythe, Tony Lu and Wen Gu for your contributions and collaboration and welcome on board as reviewers! Reviewed-by: Wenjia Zhang Signed-off-by: Jan Karcher Acked-by: Tony Lu Acked-by: Wen Gu Signed-off-by: David S. Miller --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a2f1d14031b8..c6fa6ed454f4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19130,6 +19130,9 @@ SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS M: Karsten Graul M: Wenjia Zhang M: Jan Karcher +R: D. Wythe +R: Tony Lu +R: Wen Gu L: linux-s390@vger.kernel.org S: Supported F: net/smc/ From 9eed321cde22fc1afd76eac563ce19d899e0d6b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Jun 2023 16:18:02 +0000 Subject: [PATCH 788/934] net: lapbether: only support ethernet devices It probbaly makes no sense to support arbitrary network devices for lapbether. syzbot reported: skbuff: skb_under_panic: text:ffff80008934c100 len:44 put:40 head:ffff0000d18dd200 data:ffff0000d18dd1ea tail:0x16 end:0x140 dev:bond1 kernel BUG at net/core/skbuff.c:200 ! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 5643 Comm: dhcpcd Not tainted 6.4.0-rc5-syzkaller-g4641cff8e810 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/25/2023 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : skb_panic net/core/skbuff.c:196 [inline] pc : skb_under_panic+0x13c/0x140 net/core/skbuff.c:210 lr : skb_panic net/core/skbuff.c:196 [inline] lr : skb_under_panic+0x13c/0x140 net/core/skbuff.c:210 sp : ffff8000973b7260 x29: ffff8000973b7270 x28: ffff8000973b7360 x27: dfff800000000000 x26: ffff0000d85d8150 x25: 0000000000000016 x24: ffff0000d18dd1ea x23: ffff0000d18dd200 x22: 000000000000002c x21: 0000000000000140 x20: 0000000000000028 x19: ffff80008934c100 x18: ffff8000973b68a0 x17: 0000000000000000 x16: ffff80008a43bfbc x15: 0000000000000202 x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000001 x11: 0000000000000201 x10: 0000000000000000 x9 : f22f7eb937cced00 x8 : f22f7eb937cced00 x7 : 0000000000000001 x6 : 0000000000000001 x5 : ffff8000973b6b78 x4 : ffff80008df9ee80 x3 : ffff8000805974f4 x2 : 0000000000000001 x1 : 0000000100000201 x0 : 0000000000000086 Call trace: skb_panic net/core/skbuff.c:196 [inline] skb_under_panic+0x13c/0x140 net/core/skbuff.c:210 skb_push+0xf0/0x108 net/core/skbuff.c:2409 ip6gre_header+0xbc/0x738 net/ipv6/ip6_gre.c:1383 dev_hard_header include/linux/netdevice.h:3137 [inline] lapbeth_data_transmit+0x1c4/0x298 drivers/net/wan/lapbether.c:257 lapb_data_transmit+0x8c/0xb0 net/lapb/lapb_iface.c:447 lapb_transmit_buffer+0x178/0x204 net/lapb/lapb_out.c:149 lapb_send_control+0x220/0x320 net/lapb/lapb_subr.c:251 lapb_establish_data_link+0x94/0xec lapb_device_event+0x348/0x4e0 notifier_call_chain+0x1a4/0x510 kernel/notifier.c:93 raw_notifier_call_chain+0x3c/0x50 kernel/notifier.c:461 __dev_notify_flags+0x2bc/0x544 dev_change_flags+0xd0/0x15c net/core/dev.c:8643 devinet_ioctl+0x858/0x17e4 net/ipv4/devinet.c:1150 inet_ioctl+0x2ac/0x4d8 net/ipv4/af_inet.c:979 sock_do_ioctl+0x134/0x2dc net/socket.c:1201 sock_ioctl+0x4ec/0x858 net/socket.c:1318 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __arm64_sys_ioctl+0x14c/0x1c8 fs/ioctl.c:856 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall+0x98/0x2c0 arch/arm64/kernel/syscall.c:52 el0_svc_common+0x138/0x244 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x64/0x198 arch/arm64/kernel/syscall.c:191 el0_svc+0x4c/0x160 arch/arm64/kernel/entry-common.c:647 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:665 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 Code: aa1803e6 aa1903e7 a90023f5 947730f5 (d4210000) Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Martin Schiller Signed-off-by: David S. Miller --- drivers/net/wan/lapbether.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index d62a904d2e42..56326f38fe8a 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -384,6 +384,9 @@ static int lapbeth_new_device(struct net_device *dev) ASSERT_RTNL(); + if (dev->type != ARPHRD_ETHER) + return -EINVAL; + ndev = alloc_netdev(sizeof(*lapbeth), "lapb%d", NET_NAME_UNKNOWN, lapbeth_setup); if (!ndev) From 0c0cf3db83f8c7c9bb141c2771a34043bcf952ef Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Tue, 13 Jun 2023 22:22:20 +0300 Subject: [PATCH 789/934] net: macsec: fix double free of percpu stats Inside macsec_add_dev() we free percpu macsec->secy.tx_sc.stats and macsec->stats on some of the memory allocation failure paths. However, the net_device is already registered to that moment: in macsec_newlink(), just before calling macsec_add_dev(). This means that during unregister process its priv_destructor - macsec_free_netdev() - will be called and will free the stats again. Remove freeing percpu stats inside macsec_add_dev() because macsec_free_netdev() will correctly free the already allocated ones. The pointers to unallocated stats stay NULL, and free_percpu() treats that correctly. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 0a28bfd4971f ("net/macsec: Add MACsec skb_metadata_dst Tx Data path support") Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Fedor Pchelkin Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/macsec.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3427993f94f7..984dfa5d6c11 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3997,17 +3997,15 @@ static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) return -ENOMEM; secy->tx_sc.stats = netdev_alloc_pcpu_stats(struct pcpu_tx_sc_stats); - if (!secy->tx_sc.stats) { - free_percpu(macsec->stats); + if (!secy->tx_sc.stats) return -ENOMEM; - } secy->tx_sc.md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); - if (!secy->tx_sc.md_dst) { - free_percpu(secy->tx_sc.stats); - free_percpu(macsec->stats); + if (!secy->tx_sc.md_dst) + /* macsec and secy percpu stats will be freed when unregistering + * net_device in macsec_free_netdev() + */ return -ENOMEM; - } if (sci == MACSEC_UNDEF_SCI) sci = dev_to_sci(dev, MACSEC_PORT_ES); From e84a1e1e683f3558e30f437d7c99df35afb8b52c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Tue, 13 Jun 2023 15:38:54 +0200 Subject: [PATCH 790/934] sfc: fix XDP queues mode with legacy IRQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In systems without MSI-X capabilities, xdp_txq_queues_mode is calculated in efx_allocate_msix_channels, but when enabling MSI-X fails, it was not changed to a proper default value. This was leading to the driver thinking that it has dedicated XDP queues, when it didn't. Fix it by setting xdp_txq_queues_mode to the correct value if the driver fallbacks to MSI or legacy IRQ mode. The correct value is EFX_XDP_TX_QUEUES_BORROWED because there are no XDP dedicated queues. The issue can be easily visible if the kernel is started with pci=nomsi, then a call trace is shown. It is not shown only with sfc's modparam interrupt_mode=2. Call trace example: WARNING: CPU: 2 PID: 663 at drivers/net/ethernet/sfc/efx_channels.c:828 efx_set_xdp_channels+0x124/0x260 [sfc] [...skip...] Call Trace: efx_set_channels+0x5c/0xc0 [sfc] efx_probe_nic+0x9b/0x15a [sfc] efx_probe_all+0x10/0x1a2 [sfc] efx_pci_probe_main+0x12/0x156 [sfc] efx_pci_probe_post_io+0x18/0x103 [sfc] efx_pci_probe.cold+0x154/0x257 [sfc] local_pci_probe+0x42/0x80 Fixes: 6215b608a8c4 ("sfc: last resort fallback for lack of xdp tx queues") Reported-by: Yanghang Liu Signed-off-by: Íñigo Huguet Acked-by: Martin Habets Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 2 ++ drivers/net/ethernet/sfc/siena/efx_channels.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index fcea3ea809d7..41b33a75333c 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -301,6 +301,7 @@ int efx_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; rc = pci_enable_msi(efx->pci_dev); if (rc == 0) { efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; @@ -322,6 +323,7 @@ int efx_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = efx_separate_tx_channels ? 1 : 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; efx->legacy_irq = efx->pci_dev->irq; } diff --git a/drivers/net/ethernet/sfc/siena/efx_channels.c b/drivers/net/ethernet/sfc/siena/efx_channels.c index 06ed74994e36..1776f7f8a7a9 100644 --- a/drivers/net/ethernet/sfc/siena/efx_channels.c +++ b/drivers/net/ethernet/sfc/siena/efx_channels.c @@ -302,6 +302,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; rc = pci_enable_msi(efx->pci_dev); if (rc == 0) { efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; @@ -323,6 +324,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = efx_siena_separate_tx_channels ? 1 : 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; efx->legacy_irq = efx->pci_dev->irq; } From 9d7054fb3ac2e8d252aae1268f20623f244e644f Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 15 Jun 2023 14:51:45 +0200 Subject: [PATCH 791/934] spi: spi-geni-qcom: correctly handle -EPROBE_DEFER from dma_request_chan() Now spi_geni_grab_gpi_chan() errors are correctly reported, the -EPROBE_DEFER error should be returned from probe in case the GPI dma driver is built as module and/or not probed yet. Fixes: b59c122484ec ("spi: spi-geni-qcom: Add support for GPI dma") Fixes: 6532582c353f ("spi: spi-geni-qcom: fix error handling in spi_geni_grab_gpi_chan()") Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20230615-topic-sm8550-upstream-fix-spi-geni-qcom-probe-v2-1-670c3d9e8c9c@linaro.org Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index a98b781b103a..b293428760bc 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -646,6 +646,8 @@ static int spi_geni_init(struct spi_geni_master *mas) geni_se_select_mode(se, GENI_GPI_DMA); dev_dbg(mas->dev, "Using GPI DMA mode for SPI\n"); break; + } else if (ret == -EPROBE_DEFER) { + goto out_pm; } /* * in case of failure to get gpi dma channel, we can still do the From 22db06337f590d01d79f60f181d8dfe5a9ef9085 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 14 Jun 2023 17:29:21 +0200 Subject: [PATCH 792/934] ACPI: sleep: Avoid breaking S3 wakeup due to might_sleep() The addition of might_sleep() to down_timeout() caused the latter to enable interrupts unconditionally in some cases, which in turn broke the ACPI S3 wakeup path in acpi_suspend_enter(), where down_timeout() is called by acpi_disable_all_gpes() via acpi_ut_acquire_mutex(). Namely, if CONFIG_DEBUG_ATOMIC_SLEEP is set, might_sleep() causes might_resched() to be used and if CONFIG_PREEMPT_VOLUNTARY is set, this triggers __cond_resched() which may call preempt_schedule_common(), so __schedule() gets invoked and it ends up with enabled interrupts (in the prev == next case). Now, enabling interrupts early in the S3 wakeup path causes the kernel to crash. Address this by modifying acpi_suspend_enter() to disable GPEs without attempting to acquire the sleeping lock which is not needed in that code path anyway. Fixes: 99409b935c9a ("locking/semaphore: Add might_sleep() to down_*() family") Reported-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Cc: 5.15+ # 5.15+ --- drivers/acpi/acpica/achware.h | 2 -- drivers/acpi/sleep.c | 16 ++++++++++++---- include/acpi/acpixf.h | 1 + 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h index ebf8fd373cf7..79bbfe00d241 100644 --- a/drivers/acpi/acpica/achware.h +++ b/drivers/acpi/acpica/achware.h @@ -101,8 +101,6 @@ acpi_status acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info, acpi_event_status *event_status); -acpi_status acpi_hw_disable_all_gpes(void); - acpi_status acpi_hw_enable_all_runtime_gpes(void); acpi_status acpi_hw_enable_all_wakeup_gpes(void); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 72470b9f16c4..f32570f72b90 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -636,11 +636,19 @@ static int acpi_suspend_enter(suspend_state_t pm_state) } /* - * Disable and clear GPE status before interrupt is enabled. Some GPEs - * (like wakeup GPE) haven't handler, this can avoid such GPE misfire. - * acpi_leave_sleep_state will reenable specific GPEs later + * Disable all GPE and clear their status bits before interrupts are + * enabled. Some GPEs (like wakeup GPEs) have no handlers and this can + * prevent them from producing spurious interrups. + * + * acpi_leave_sleep_state() will reenable specific GPEs later. + * + * Because this code runs on one CPU with disabled interrupts (all of + * the other CPUs are offline at this time), it need not acquire any + * sleeping locks which may trigger an implicit preemption point even + * if there is no contention, so avoid doing that by using a low-level + * library routine here. */ - acpi_disable_all_gpes(); + acpi_hw_disable_all_gpes(); /* Allow EC transactions to happen. */ acpi_ec_unblock_transactions(); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index e6098a08c914..9ffdc0425bc2 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -761,6 +761,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_event_status *event_status)) ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number)) +ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_hw_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void)) From 0bb619f9227aa370330d2b309733d74750705053 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 14 Jun 2023 12:07:56 +0200 Subject: [PATCH 793/934] thermal/intel/intel_soc_dts_iosf: Fix reporting wrong temperatures Since commit 955fb8719efb ("thermal/intel/intel_soc_dts_iosf: Use Intel TCC library") intel_soc_dts_iosf is reporting the wrong temperature. The driver expects tj_max to be in milli-degrees-celcius but after the switch to the TCC library this is now in degrees celcius so instead of e.g. 90000 it is set to 90 causing a temperature 45 degrees below tj_max to be reported as -44910 milli-degrees instead of as 45000 milli-degrees. Fix this by adding back the lost factor of 1000. Fixes: 955fb8719efb ("thermal/intel/intel_soc_dts_iosf: Use Intel TCC library") Reported-by: Bernhard Krug Signed-off-by: Hans de Goede Acked-by: Zhang Rui Cc: 6.3+ # 6.3+ Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_soc_dts_iosf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c index f99dc7e4ae89..db97499f4f0a 100644 --- a/drivers/thermal/intel/intel_soc_dts_iosf.c +++ b/drivers/thermal/intel/intel_soc_dts_iosf.c @@ -398,7 +398,7 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init( spin_lock_init(&sensors->intr_notify_lock); mutex_init(&sensors->dts_update_lock); sensors->intr_type = intr_type; - sensors->tj_max = tj_max; + sensors->tj_max = tj_max * 1000; if (intr_type == INTEL_SOC_DTS_INTERRUPT_NONE) notification = false; else From 2760904d895279f87196f0fa9ec570c79fe6a2e4 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Thu, 1 Jun 2023 14:14:23 +0800 Subject: [PATCH 794/934] dm: don't lock fs when the map is NULL during suspend or resume As described in commit 38d11da522aa ("dm: don't lock fs when the map is NULL in process of resume"), a deadlock may be triggered between do_resume() and do_mount(). This commit preserves the fix from commit 38d11da522aa but moves it to where it also serves to fix a similar deadlock between do_suspend() and do_mount(). It does so, if the active map is NULL, by clearing DM_SUSPEND_LOCKFS_FLAG in dm_suspend() which is called by both do_suspend() and do_resume(). Fixes: 38d11da522aa ("dm: don't lock fs when the map is NULL in process of resume") Signed-off-by: Li Lingfeng Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 5 +---- drivers/md/dm.c | 4 ++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index cc77cf3d4109..7d5c9c582ed2 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1168,13 +1168,10 @@ static int do_resume(struct dm_ioctl *param) /* Do we need to load a new map ? */ if (new_map) { sector_t old_size, new_size; - int srcu_idx; /* Suspend if it isn't already suspended */ - old_map = dm_get_live_table(md, &srcu_idx); - if ((param->flags & DM_SKIP_LOCKFS_FLAG) || !old_map) + if (param->flags & DM_SKIP_LOCKFS_FLAG) suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; - dm_put_live_table(md, srcu_idx); if (param->flags & DM_NOFLUSH_FLAG) suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG; if (!dm_suspended_md(md)) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3b694ba3a106..8488547fc00d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2808,6 +2808,10 @@ retry: } map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + if (!map) { + /* avoid deadlock with fs/namespace.c:do_mount() */ + suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; + } r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED); if (r) From cb65b282c9640c27d3129e2e04b711ce1b352838 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Tue, 6 Jun 2023 20:20:24 +0800 Subject: [PATCH 795/934] dm thin metadata: check fail_io before using data_sm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Must check pmd->fail_io before using pmd->data_sm since pmd->data_sm may be destroyed by other processes. P1(kworker) P2(message) do_worker process_prepared process_prepared_discard_passdown_pt2 dm_pool_dec_data_range pool_message commit dm_pool_commit_metadata ↓ // commit failed metadata_operation_failed abort_transaction dm_pool_abort_metadata __open_or_format_metadata ↓ dm_sm_disk_open ↓ // open failed // pmd->data_sm is NULL dm_sm_dec_blocks ↓ // try to access pmd->data_sm --> UAF As shown above, if dm_pool_commit_metadata() and dm_pool_abort_metadata() fail in pool_message process, kworker may trigger UAF. Fixes: be500ed721a6 ("dm space maps: improve performance with inc/dec on ranges of blocks") Cc: stable@vger.kernel.org Signed-off-by: Li Lingfeng Signed-off-by: Mike Snitzer --- drivers/md/dm-thin-metadata.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 9f5cb52c5763..b9461faa9f0d 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1756,13 +1756,15 @@ int dm_thin_remove_range(struct dm_thin_device *td, int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) { - int r; + int r = -EINVAL; uint32_t ref_count; down_read(&pmd->root_lock); - r = dm_sm_get_count(pmd->data_sm, b, &ref_count); - if (!r) - *result = (ref_count > 1); + if (!pmd->fail_io) { + r = dm_sm_get_count(pmd->data_sm, b, &ref_count); + if (!r) + *result = (ref_count > 1); + } up_read(&pmd->root_lock); return r; @@ -1770,10 +1772,11 @@ int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *re int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e) { - int r = 0; + int r = -EINVAL; pmd_write_lock(pmd); - r = dm_sm_inc_blocks(pmd->data_sm, b, e); + if (!pmd->fail_io) + r = dm_sm_inc_blocks(pmd->data_sm, b, e); pmd_write_unlock(pmd); return r; @@ -1781,10 +1784,11 @@ int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_ int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e) { - int r = 0; + int r = -EINVAL; pmd_write_lock(pmd); - r = dm_sm_dec_blocks(pmd->data_sm, b, e); + if (!pmd->fail_io) + r = dm_sm_dec_blocks(pmd->data_sm, b, e); pmd_write_unlock(pmd); return r; From 722d90822321497e2837cfc9000202e256e6b32f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 13 Jun 2023 20:05:34 -0400 Subject: [PATCH 796/934] dm thin: fix issue_discard to pass GFP_NOIO to __blkdev_issue_discard issue_discard() passes GFP_NOWAIT to __blkdev_issue_discard() despite its code assuming bio_alloc() always succeeds. Commit 3dba53a958a75 ("dm thin: use __blkdev_issue_discard for async discard support") clearly shows where things went bad: Before commit 3dba53a958a75, dm-thin.c's open-coded __blkdev_issue_discard_async() properly handled using GFP_NOWAIT. Unfortunately __blkdev_issue_discard() doesn't and it was missed during review. Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 2b13c949bd72..39410bf186cf 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -401,8 +401,7 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da sector_t s = block_to_sectors(tc->pool, data_b); sector_t len = block_to_sectors(tc->pool, data_e - data_b); - return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOWAIT, - &op->bio); + return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOIO, &op->bio); } static void end_discard(struct discard_op *op, int r) From be04c14a1bd262a49e5764e5cf864259b7e740fd Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 14 Jun 2023 21:47:46 -0400 Subject: [PATCH 797/934] dm: use op specific max_sectors when splitting abnormal io Split abnormal IO in terms of the corresponding operation specific max_sectors (max_discard_sectors, max_secure_erase_sectors or max_write_zeroes_sectors). This fixes a significant dm-thinp discard performance regression that was introduced with commit e2dd8aca2d76 ("dm bio prison v1: improve concurrent IO performance"). Relative to discard: max_discard_sectors is used instead of max_sectors; which fixes excessive discard splitting (e.g. max_sectors=128K vs max_discard_sectors=64M). Tested by discarding an 1 Petabyte dm-thin device: lvcreate -V 1125899906842624B -T test/pool -n thin time blkdiscard /dev/test/thin Before this fix (splitting discards every 128K): ~116m After this fix (splitting discards every 64M) : 0m33.460s Reported-by: Zorro Lang Fixes: 06961c487a33 ("dm: split discards further if target sets max_discard_granularity") Requires: 13f6facf3fae ("dm: allow targets to require splitting WRITE_ZEROES and SECURE_ERASE") Fixes: e2dd8aca2d76 ("dm bio prison v1: improve concurrent IO performance") Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8488547fc00d..fffb0cbe2ac8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1172,7 +1172,8 @@ static inline sector_t max_io_len_target_boundary(struct dm_target *ti, } static sector_t __max_io_len(struct dm_target *ti, sector_t sector, - unsigned int max_granularity) + unsigned int max_granularity, + unsigned int max_sectors) { sector_t target_offset = dm_target_offset(ti, sector); sector_t len = max_io_len_target_boundary(ti, target_offset); @@ -1186,13 +1187,13 @@ static sector_t __max_io_len(struct dm_target *ti, sector_t sector, if (!max_granularity) return len; return min_t(sector_t, len, - min(queue_max_sectors(ti->table->md->queue), + min(max_sectors ? : queue_max_sectors(ti->table->md->queue), blk_chunk_sectors_left(target_offset, max_granularity))); } static inline sector_t max_io_len(struct dm_target *ti, sector_t sector) { - return __max_io_len(ti, sector, ti->max_io_len); + return __max_io_len(ti, sector, ti->max_io_len, 0); } int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) @@ -1581,12 +1582,13 @@ static void __send_empty_flush(struct clone_info *ci) static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, unsigned int num_bios, - unsigned int max_granularity) + unsigned int max_granularity, + unsigned int max_sectors) { unsigned int len, bios; len = min_t(sector_t, ci->sector_count, - __max_io_len(ti, ci->sector, max_granularity)); + __max_io_len(ti, ci->sector, max_granularity, max_sectors)); atomic_add(num_bios, &ci->io->io_count); bios = __send_duplicate_bios(ci, ti, num_bios, &len); @@ -1623,23 +1625,27 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, { unsigned int num_bios = 0; unsigned int max_granularity = 0; + unsigned int max_sectors = 0; struct queue_limits *limits = dm_get_queue_limits(ti->table->md); switch (bio_op(ci->bio)) { case REQ_OP_DISCARD: num_bios = ti->num_discard_bios; + max_sectors = limits->max_discard_sectors; if (ti->max_discard_granularity) - max_granularity = limits->max_discard_sectors; + max_granularity = max_sectors; break; case REQ_OP_SECURE_ERASE: num_bios = ti->num_secure_erase_bios; + max_sectors = limits->max_secure_erase_sectors; if (ti->max_secure_erase_granularity) - max_granularity = limits->max_secure_erase_sectors; + max_granularity = max_sectors; break; case REQ_OP_WRITE_ZEROES: num_bios = ti->num_write_zeroes_bios; + max_sectors = limits->max_write_zeroes_sectors; if (ti->max_write_zeroes_granularity) - max_granularity = limits->max_write_zeroes_sectors; + max_granularity = max_sectors; break; default: break; @@ -1654,7 +1660,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, if (unlikely(!num_bios)) return BLK_STS_NOTSUPP; - __send_changing_extent_only(ci, ti, num_bios, max_granularity); + __send_changing_extent_only(ci, ti, num_bios, + max_granularity, max_sectors); return BLK_STS_OK; } From 44194cb1b6045dea33ae9a0d54fb7e7cd93a2e09 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 14 Jun 2023 20:06:04 +0800 Subject: [PATCH 798/934] net: tipc: resize nlattr array to correct size According to nla_parse_nested_deprecated(), the tb[] is supposed to the destination array with maxtype+1 elements. In current tipc_nl_media_get() and __tipc_nl_media_set(), a larger array is used which is unnecessary. This patch resize them to a proper size. Fixes: 1e55417d8fc6 ("tipc: add media set to new netlink api") Fixes: 46f15c6794fb ("tipc: add media get/dump to new netlink api") Signed-off-by: Lin Ma Reviewed-by: Florian Westphal Reviewed-by: Tung Nguyen Link: https://lore.kernel.org/r/20230614120604.1196377-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/tipc/bearer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 53881406e200..cdcd2731860b 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1258,7 +1258,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) struct tipc_nl_msg msg; struct tipc_media *media; struct sk_buff *rep; - struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; @@ -1307,7 +1307,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) int err; char *name; struct tipc_media *m; - struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1]; if (!info->attrs[TIPC_NLA_MEDIA]) return -EINVAL; From 30134b7c47bd28fdb4db4d12aef824e0579cfee4 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Wed, 14 Jun 2023 11:17:14 +0200 Subject: [PATCH 799/934] net: ethernet: stmicro: stmmac: fix possible memory leak in __stmmac_open Fix a possible memory leak in __stmmac_open when stmmac_init_phy fails. It's also needed to free everything allocated by stmmac_setup_dma_desc and not just the dma_conf struct. Drop free_dma_desc_resources from __stmmac_open and correctly call free_dma_desc_resources on each user of __stmmac_open on error. Reported-by: Jose Abreu Fixes: ba39b344e924 ("net: ethernet: stmicro: stmmac: generate stmmac dma conf before open") Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org Reviewed-by: Simon Horman Reviewed-by: Jose Abreu Link: https://lore.kernel.org/r/20230614091714.15912-1-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 52cab9de05f2..87510951f4e8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3873,7 +3873,6 @@ irq_error: stmmac_hw_teardown(dev); init_error: - free_dma_desc_resources(priv, &priv->dma_conf); phylink_disconnect_phy(priv->phylink); init_phy_error: pm_runtime_put(priv->device); @@ -3891,6 +3890,9 @@ static int stmmac_open(struct net_device *dev) return PTR_ERR(dma_conf); ret = __stmmac_open(dev, dma_conf); + if (ret) + free_dma_desc_resources(priv, dma_conf); + kfree(dma_conf); return ret; } @@ -5633,12 +5635,15 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) stmmac_release(dev); ret = __stmmac_open(dev, dma_conf); - kfree(dma_conf); if (ret) { + free_dma_desc_resources(priv, dma_conf); + kfree(dma_conf); netdev_err(priv->dev, "failed reopening the interface after MTU change\n"); return ret; } + kfree(dma_conf); + stmmac_set_rx_mode(dev); } From 76a4c8b82938bc5020b67663db41f451684bf327 Mon Sep 17 00:00:00 2001 From: Alex Maftei Date: Thu, 15 Jun 2023 09:34:04 +0100 Subject: [PATCH 800/934] selftests/ptp: Fix timestamp printf format for PTP_SYS_OFFSET Previously, timestamps were printed using "%lld.%u" which is incorrect for nanosecond values lower than 100,000,000 as they're fractional digits, therefore leading zeros are meaningful. This patch changes the format strings to "%lld.%09u" in order to add leading zeros to the nanosecond value. Fixes: 568ebc5985f5 ("ptp: add the PTP_SYS_OFFSET ioctl to the testptp program") Fixes: 4ec54f95736f ("ptp: Fix compiler warnings in the testptp utility") Fixes: 6ab0e475f1f3 ("Documentation: fix misc. warnings") Signed-off-by: Alex Maftei Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20230615083404.57112-1-alex.maftei@amd.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/ptp/testptp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index 198ad5f32187..cfa9562f3cd8 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -502,11 +502,11 @@ int main(int argc, char *argv[]) interval = t2 - t1; offset = (t2 + t1) / 2 - tp; - printf("system time: %lld.%u\n", + printf("system time: %lld.%09u\n", (pct+2*i)->sec, (pct+2*i)->nsec); - printf("phc time: %lld.%u\n", + printf("phc time: %lld.%09u\n", (pct+2*i+1)->sec, (pct+2*i+1)->nsec); - printf("system time: %lld.%u\n", + printf("system time: %lld.%09u\n", (pct+2*i+2)->sec, (pct+2*i+2)->nsec); printf("system/phc clock time offset is %" PRId64 " ns\n" "system clock time delay is %" PRId64 " ns\n", From 9a36e2d44d122fe73a2a76ba73f1d50a65cf8210 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 15 Jun 2023 11:34:00 +0800 Subject: [PATCH 801/934] octeon_ep: Add missing check for ioremap Add check for ioremap() and return the error if it fails in order to guarantee the success of ioremap(). Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Jiasheng Jiang Reviewed-by: Kalesh AP Link: https://lore.kernel.org/r/20230615033400.2971-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index e1853da280f9..43eb6e871351 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -981,6 +981,9 @@ int octep_device_setup(struct octep_device *oct) oct->mmio[i].hw_addr = ioremap(pci_resource_start(oct->pdev, i * 2), pci_resource_len(oct->pdev, i * 2)); + if (!oct->mmio[i].hw_addr) + goto unmap_prev; + oct->mmio[i].mapped = 1; } @@ -1015,7 +1018,9 @@ int octep_device_setup(struct octep_device *oct) return 0; unsupported_dev: - for (i = 0; i < OCTEP_MMIO_REGIONS; i++) + i = OCTEP_MMIO_REGIONS; +unmap_prev: + while (i--) iounmap(oct->mmio[i].hw_addr); kfree(oct->conf); From be28c14ac8bbe1ff0b2a18a06cd10981f90fc741 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 14 Jun 2023 12:47:04 -0700 Subject: [PATCH 802/934] udplite: Print deprecation notice. Recently syzkaller reported a 7-year-old null-ptr-deref [0] that occurs when a UDP-Lite socket tries to allocate a buffer under memory pressure. Someone should have stumbled on the bug much earlier if UDP-Lite had been used in a real app. Also, we do not always need a large UDP-Lite workload to hit the bug since UDP and UDP-Lite share the same memory accounting limit. Removing UDP-Lite would simplify UDP code removing a bunch of conditionals in fast path. Let's add a deprecation notice when UDP-Lite socket is created and schedule its removal to 2025. Link: https://lore.kernel.org/netdev/20230523163305.66466-1-kuniyu@amazon.com/ [0] Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski --- net/ipv4/udplite.c | 2 ++ net/ipv6/udplite.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 56d94d23b9e0..143f93a12f25 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -22,6 +22,8 @@ static int udplite_sk_init(struct sock *sk) { udp_init_sock(sk); udp_sk(sk)->pcflag = UDPLITE_BIT; + pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " + "please contact the netdev mailing list\n"); return 0; } diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 3bab0cc13697..8e010d07917a 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -8,6 +8,8 @@ * Changes: * Fixes: */ +#define pr_fmt(fmt) "UDPLite6: " fmt + #include #include #include "udp_impl.h" @@ -16,6 +18,8 @@ static int udplitev6_sk_init(struct sock *sk) { udpv6_init_sock(sk); udp_sk(sk)->pcflag = UDPLITE_BIT; + pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " + "please contact the netdev mailing list\n"); return 0; } From b144fcaf46d43b1471ad6e4de66235b8cebb3c87 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 14 Jun 2023 12:47:05 -0700 Subject: [PATCH 803/934] dccp: Print deprecation notice. DCCP was marked as Orphan in the MAINTAINERS entry 2 years ago in commit 054c4610bd05 ("MAINTAINERS: dccp: move Gerrit Renker to CREDITS"). It says we haven't heard from the maintainer for five years, so DCCP is not well maintained for 7 years now. Recently DCCP only receives updates for bugs, and major distros disable it by default. Removing DCCP would allow for better organisation of TCP fields to reduce the number of cache lines hit in the fast path. Let's add a deprecation notice when DCCP socket is created and schedule its removal to 2025. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski --- net/dccp/proto.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a06b5641287a..b0ebf853cb07 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -191,6 +191,9 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) struct dccp_sock *dp = dccp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, " + "please contact the netdev mailing list\n"); + icsk->icsk_rto = DCCP_TIMEOUT_INIT; icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; From c8a5d5ea3ba6a18958f8d76430e4cd68eea33943 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 Jun 2023 12:22:11 +1000 Subject: [PATCH 804/934] nouveau: fix client work fence deletion race This seems to have existed for ever but is now more apparant after commit 9bff18d13473 ("drm/ttm: use per BO cleanup workers") My analysis: two threads are running, one in the irq signalling the fence, in dma_fence_signal_timestamp_locked, it has done the DMA_FENCE_FLAG_SIGNALLED_BIT setting, but hasn't yet reached the callbacks. The second thread in nouveau_cli_work_ready, where it sees the fence is signalled, so then puts the fence, cleanups the object and frees the work item, which contains the callback. Thread one goes again and tries to call the callback and causes the use-after-free. Proposed fix: lock the fence signalled check in nouveau_cli_work_ready, so either the callbacks are done or the memory is freed. Reviewed-by: Karol Herbst Fixes: 11e451e74050 ("drm/nouveau: remove fence wait code from deferred client work handler") Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Link: https://lore.kernel.org/dri-devel/20230615024008.1600281-1-airlied@gmail.com/ --- drivers/gpu/drm/nouveau/nouveau_drm.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index cc7c5b4a05fd..7aac9384600e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -137,10 +137,16 @@ nouveau_name(struct drm_device *dev) static inline bool nouveau_cli_work_ready(struct dma_fence *fence) { - if (!dma_fence_is_signaled(fence)) - return false; - dma_fence_put(fence); - return true; + bool ret = true; + + spin_lock_irq(fence->lock); + if (!dma_fence_is_signaled_locked(fence)) + ret = false; + spin_unlock_irq(fence->lock); + + if (ret == true) + dma_fence_put(fence); + return ret; } static void From 372b304c1e517c4d3e5b6ca6c9cfb20f027c3b03 Mon Sep 17 00:00:00 2001 From: Magali Lemes Date: Tue, 13 Jun 2023 09:32:19 -0300 Subject: [PATCH 805/934] selftests/harness: allow tests to be skipped during setup Before executing each test from a fixture, FIXTURE_SETUP is run once. When SKIP is used in FIXTURE_SETUP, the setup function returns early but the test still proceeds to run, unless another SKIP macro is used within the test definition, leading to some code repetition. Therefore, allow tests to be skipped directly from the setup function. Suggested-by: Jakub Kicinski Signed-off-by: Magali Lemes Signed-off-by: Jakub Kicinski --- tools/testing/selftests/kselftest_harness.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index d8bff2005dfc..5fd49ad0c696 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -249,7 +249,7 @@ /** * FIXTURE_SETUP() - Prepares the setup function for the fixture. - * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. + * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly. * * @fixture_name: fixture name * @@ -275,7 +275,7 @@ /** * FIXTURE_TEARDOWN() - * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. + * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly. * * @fixture_name: fixture name * @@ -388,7 +388,7 @@ if (setjmp(_metadata->env) == 0) { \ fixture_name##_setup(_metadata, &self, variant->data); \ /* Let setup failure terminate early. */ \ - if (!_metadata->passed) \ + if (!_metadata->passed || _metadata->skip) \ return; \ _metadata->setup_completed = true; \ fixture_name##_##test_name(_metadata, &self, variant->data); \ From d113c395c67b62fc0d3f2004c0afc406aca0a2b7 Mon Sep 17 00:00:00 2001 From: Magali Lemes Date: Tue, 13 Jun 2023 09:32:20 -0300 Subject: [PATCH 806/934] selftests: net: tls: check if FIPS mode is enabled TLS selftests use the ChaCha20-Poly1305 and SM4 algorithms, which are not FIPS compliant. When fips=1, this set of tests fails. Add a check and only run these tests if not in FIPS mode. Fixes: 4f336e88a870 ("selftests/tls: add CHACHA20-POLY1305 to tls selftests") Fixes: e506342a03c7 ("selftests/tls: add SM4 GCM/CCM to tls selftests") Reviewed-by: Jakub Kicinski Signed-off-by: Magali Lemes Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index e699548d4247..ff36844d14b4 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -25,6 +25,8 @@ #define TLS_PAYLOAD_MAX_LEN 16384 #define SOL_TLS 282 +static int fips_enabled; + struct tls_crypto_info_keys { union { struct tls12_crypto_info_aes_gcm_128 aes128; @@ -235,7 +237,7 @@ FIXTURE_VARIANT(tls) { uint16_t tls_version; uint16_t cipher_type; - bool nopad; + bool nopad, fips_non_compliant; }; FIXTURE_VARIANT_ADD(tls, 12_aes_gcm) @@ -254,24 +256,28 @@ FIXTURE_VARIANT_ADD(tls, 12_chacha) { .tls_version = TLS_1_2_VERSION, .cipher_type = TLS_CIPHER_CHACHA20_POLY1305, + .fips_non_compliant = true, }; FIXTURE_VARIANT_ADD(tls, 13_chacha) { .tls_version = TLS_1_3_VERSION, .cipher_type = TLS_CIPHER_CHACHA20_POLY1305, + .fips_non_compliant = true, }; FIXTURE_VARIANT_ADD(tls, 13_sm4_gcm) { .tls_version = TLS_1_3_VERSION, .cipher_type = TLS_CIPHER_SM4_GCM, + .fips_non_compliant = true, }; FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm) { .tls_version = TLS_1_3_VERSION, .cipher_type = TLS_CIPHER_SM4_CCM, + .fips_non_compliant = true, }; FIXTURE_VARIANT_ADD(tls, 12_aes_ccm) @@ -311,6 +317,9 @@ FIXTURE_SETUP(tls) int one = 1; int ret; + if (fips_enabled && variant->fips_non_compliant) + SKIP(return, "Unsupported cipher in FIPS mode"); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12); @@ -1865,4 +1874,17 @@ TEST(prequeue) { close(cfd); } +static void __attribute__((constructor)) fips_check(void) { + int res; + FILE *f; + + f = fopen("/proc/sys/crypto/fips_enabled", "r"); + if (f) { + res = fscanf(f, "%d", &fips_enabled); + if (res != 1) + ksft_print_msg("ERROR: Couldn't read /proc/sys/crypto/fips_enabled\n"); + fclose(f); + } +} + TEST_HARNESS_MAIN From cb43c60e64ca67fcc9d23bd08f51d2ab8209d9d7 Mon Sep 17 00:00:00 2001 From: Magali Lemes Date: Tue, 13 Jun 2023 09:32:21 -0300 Subject: [PATCH 807/934] selftests: net: vrf-xfrm-tests: change authentication and encryption algos The vrf-xfrm-tests tests use the hmac(md5) and cbc(des3_ede) algorithms for performing authentication and encryption, respectively. This causes the tests to fail when fips=1 is set, since these algorithms are not allowed in FIPS mode. Therefore, switch from hmac(md5) and cbc(des3_ede) to hmac(sha1) and cbc(aes), which are FIPS compliant. Fixes: 3f251d741150 ("selftests: Add tests for vrf and xfrms") Reviewed-by: David Ahern Signed-off-by: Magali Lemes Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/vrf-xfrm-tests.sh | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh index 184da81f554f..452638ae8aed 100755 --- a/tools/testing/selftests/net/vrf-xfrm-tests.sh +++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh @@ -264,60 +264,60 @@ setup_xfrm() ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ - enc 'cbc(des3_ede)' ${ENC_1} \ + auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ + enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_4} dst ${h2_4} ${devarg} ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ - enc 'cbc(des3_ede)' ${ENC_1} \ + auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ + enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_4} dst ${h2_4} ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ - enc 'cbc(des3_ede)' ${ENC_2} \ + auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ + enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_4} dst ${h1_4} ${devarg} ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ - enc 'cbc(des3_ede)' ${ENC_2} \ + auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ + enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_4} dst ${h1_4} ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ - enc 'cbc(des3_ede)' ${ENC_1} \ + auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ + enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_6} dst ${h2_6} ${devarg} ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ - enc 'cbc(des3_ede)' ${ENC_1} \ + auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ + enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_6} dst ${h2_6} ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ - enc 'cbc(des3_ede)' ${ENC_2} \ + auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ + enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_6} dst ${h1_6} ${devarg} ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ - enc 'cbc(des3_ede)' ${ENC_2} \ + auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ + enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_6} dst ${h1_6} } From d7a2fc1437f71cb058c7b11bc33dfc19e4bf277a Mon Sep 17 00:00:00 2001 From: Magali Lemes Date: Tue, 13 Jun 2023 09:32:22 -0300 Subject: [PATCH 808/934] selftests: net: fcnal-test: check if FIPS mode is enabled There are some MD5 tests which fail when the kernel is in FIPS mode, since MD5 is not FIPS compliant. Add a check and only run those tests if FIPS mode is not enabled. Fixes: f0bee1ebb5594 ("fcnal-test: Add TCP MD5 tests") Fixes: 5cad8bce26e01 ("fcnal-test: Add TCP MD5 tests for VRF") Reviewed-by: David Ahern Signed-off-by: Magali Lemes Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 27 ++++++++++++++++------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 21ca91473c09..ee6880ac3e5e 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -92,6 +92,13 @@ NSC_CMD="ip netns exec ${NSC}" which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) +# Check if FIPS mode is enabled +if [ -f /proc/sys/crypto/fips_enabled ]; then + fips_enabled=`cat /proc/sys/crypto/fips_enabled` +else + fips_enabled=0 +fi + ################################################################################ # utilities @@ -1216,7 +1223,7 @@ ipv4_tcp_novrf() run_cmd nettest -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 1 "No server, device client, local conn" - ipv4_tcp_md5_novrf + [ "$fips_enabled" = "1" ] || ipv4_tcp_md5_novrf } ipv4_tcp_vrf() @@ -1270,9 +1277,11 @@ ipv4_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests - setup_vrf_dup - ipv4_tcp_md5 - cleanup_vrf_dup + if [ "$fips_enabled" = "0" ]; then + setup_vrf_dup + ipv4_tcp_md5 + cleanup_vrf_dup + fi # # enable VRF global server @@ -2772,7 +2781,7 @@ ipv6_tcp_novrf() log_test_addr ${a} $? 1 "No server, device client, local conn" done - ipv6_tcp_md5_novrf + [ "$fips_enabled" = "1" ] || ipv6_tcp_md5_novrf } ipv6_tcp_vrf() @@ -2842,9 +2851,11 @@ ipv6_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests - setup_vrf_dup - ipv6_tcp_md5 - cleanup_vrf_dup + if [ "$fips_enabled" = "0" ]; then + setup_vrf_dup + ipv6_tcp_md5 + cleanup_vrf_dup + fi # # enable VRF global server From 297224fc0922e7385573a30c29ffdabb67f27b7d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 12 Jun 2023 14:55:33 +0200 Subject: [PATCH 809/934] ALSA: seq: oss: Fix racy open/close of MIDI devices Although snd_seq_oss_midi_open() and snd_seq_oss_midi_close() can be called concurrently from different code paths, we have no proper data protection against races. Introduce open_mutex to each seq_oss_midi object for avoiding the races. Reported-by: "Gong, Sishuai" Closes: https://lore.kernel.org/r/7DC9AF71-F481-4ABA-955F-76C535661E33@purdue.edu Link: https://lore.kernel.org/r/20230612125533.27461-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_midi.c | 35 +++++++++++++++++++------------ 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index 07efb38f58ac..f2940b29595f 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -37,6 +37,7 @@ struct seq_oss_midi { struct snd_midi_event *coder; /* MIDI event coder */ struct seq_oss_devinfo *devinfo; /* assigned OSSseq device */ snd_use_lock_t use_lock; + struct mutex open_mutex; }; @@ -172,6 +173,7 @@ snd_seq_oss_midi_check_new_port(struct snd_seq_port_info *pinfo) mdev->flags = pinfo->capability; mdev->opened = 0; snd_use_lock_init(&mdev->use_lock); + mutex_init(&mdev->open_mutex); /* copy and truncate the name of synth device */ strscpy(mdev->name, pinfo->name, sizeof(mdev->name)); @@ -322,15 +324,17 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode) int perm; struct seq_oss_midi *mdev; struct snd_seq_port_subscribe subs; + int err; mdev = get_mididev(dp, dev); if (!mdev) return -ENODEV; + mutex_lock(&mdev->open_mutex); /* already used? */ if (mdev->opened && mdev->devinfo != dp) { - snd_use_lock_free(&mdev->use_lock); - return -EBUSY; + err = -EBUSY; + goto unlock; } perm = 0; @@ -340,14 +344,14 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode) perm |= PERM_READ; perm &= mdev->flags; if (perm == 0) { - snd_use_lock_free(&mdev->use_lock); - return -ENXIO; + err = -ENXIO; + goto unlock; } /* already opened? */ if ((mdev->opened & perm) == perm) { - snd_use_lock_free(&mdev->use_lock); - return 0; + err = 0; + goto unlock; } perm &= ~mdev->opened; @@ -372,13 +376,17 @@ snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode) } if (! mdev->opened) { - snd_use_lock_free(&mdev->use_lock); - return -ENXIO; + err = -ENXIO; + goto unlock; } mdev->devinfo = dp; + err = 0; + + unlock: + mutex_unlock(&mdev->open_mutex); snd_use_lock_free(&mdev->use_lock); - return 0; + return err; } /* @@ -393,10 +401,9 @@ snd_seq_oss_midi_close(struct seq_oss_devinfo *dp, int dev) mdev = get_mididev(dp, dev); if (!mdev) return -ENODEV; - if (! mdev->opened || mdev->devinfo != dp) { - snd_use_lock_free(&mdev->use_lock); - return 0; - } + mutex_lock(&mdev->open_mutex); + if (!mdev->opened || mdev->devinfo != dp) + goto unlock; memset(&subs, 0, sizeof(subs)); if (mdev->opened & PERM_WRITE) { @@ -415,6 +422,8 @@ snd_seq_oss_midi_close(struct seq_oss_devinfo *dp, int dev) mdev->opened = 0; mdev->devinfo = NULL; + unlock: + mutex_unlock(&mdev->open_mutex); snd_use_lock_free(&mdev->use_lock); return 0; } From 8ba61c9f6c9bdfbf9d197b0282641d24ae909778 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 12 Jun 2023 15:28:18 +0200 Subject: [PATCH 810/934] ALSA: usb-audio: Fix broken resume due to UAC3 power state As reported in the bugzilla below, the PM resume of a UAC3 device may fail due to the incomplete power state change, stuck at D1. The reason is that the driver expects the full D0 power state change only at hw_params, while the normal PCM resume procedure doesn't call hw_params. For fixing the bug, we add the same power state update to D0 at the prepare callback, which is certainly called by the resume procedure. Note that, with this change, the power state change in the hw_params becomes almost redundant, since snd_usb_hw_params() doesn't touch the parameters (at least it tires so). But dropping it is still a bit risky (e.g. we have the media-driver binding), so I leave the D0 power state change in snd_usb_hw_params() as is for now. Fixes: a0a4959eb4e9 ("ALSA: usb-audio: Operate UAC3 Power Domains in PCM callbacks") Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=217539 Link: https://lore.kernel.org/r/20230612132818.29486-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index eec5232f9fb2..08bf535ed163 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -650,6 +650,10 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream) goto unlock; } + ret = snd_usb_pcm_change_state(subs, UAC3_PD_STATE_D0); + if (ret < 0) + goto unlock; + again: if (subs->sync_endpoint) { ret = snd_usb_endpoint_prepare(chip, subs->sync_endpoint); From 122e2cb7e1a30438cc0e8bf70d4279db245d7d5b Mon Sep 17 00:00:00 2001 From: Lukasz Tyl Date: Wed, 14 Jun 2023 14:25:24 +0200 Subject: [PATCH 811/934] ALSA: usb-audio: Add quirk flag for HEM devices to enable native DSD playback This commit adds new DEVICE_FLG with QUIRK_FLAG_DSD_RAW and Vendor Id for HEM devices which supports native DSD. Prior to this change Linux kernel was not enabling native DSD playback for HEM devices, and as a result, DSD audio was being converted to PCM "on the fly". HEM devices, when connected to the system, would only play audio in PCM format, even if the source material was in DSD format. With the addition of new VENDOR_FLG in the quircks.c file, the devices are now correctly recognized, and raw DSD data is transmitted to the device, allowing for native DSD playback. Signed-off-by: Lukasz Tyl Cc: Link: https://lore.kernel.org/r/20230614122524.30271-1-ltyl@hem-e.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 3ecd1ba7fd4b..6cf55b7f7a04 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2191,6 +2191,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x2ab6, /* T+A devices */ QUIRK_FLAG_DSD_RAW), + VENDOR_FLG(0x3336, /* HEM devices */ + QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x3353, /* Khadas devices */ QUIRK_FLAG_DSD_RAW), VENDOR_FLG(0x3842, /* EVGA */ From f015b900bc3285322029b4a7d132d6aeb0e51857 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 14 Jun 2023 12:02:02 +0200 Subject: [PATCH 812/934] xfrm: Linearize the skb after offloading if needed. With offloading enabled, esp_xmit() gets invoked very late, from within validate_xmit_xfrm() which is after validate_xmit_skb() validates and linearizes the skb if the underlying device does not support fragments. esp_output_tail() may add a fragment to the skb while adding the auth tag/ IV. Devices without the proper support will then send skb->data points to with the correct length so the packet will have garbage at the end. A pcap sniffer will claim that the proper data has been sent since it parses the skb properly. It is not affected with INET_ESP_OFFLOAD disabled. Linearize the skb after offloading if the sending hardware requires it. It was tested on v4, v6 has been adopted. Fixes: 7785bba299a8d ("esp: Add a software GRO codepath") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 3 +++ net/ipv6/esp6_offload.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 3969fa805679..ee848be59e65 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -340,6 +340,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ secpath_reset(skb); + if (skb_needs_linearize(skb, skb->dev->features) && + __skb_linearize(skb)) + return -ENOMEM; return 0; } diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 75c02992c520..772340268997 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -374,6 +374,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features secpath_reset(skb); + if (skb_needs_linearize(skb, skb->dev->features) && + __skb_linearize(skb)) + return -ENOMEM; return 0; } From 5b00369fcf6d1ff9050b94800dc596925ff3623f Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 8 Jun 2023 10:19:34 +0800 Subject: [PATCH 813/934] iommu/amd: Fix possible memory leak of 'domain' Move allocation code down to avoid memory leak. Fixes: 29f54745f245 ("iommu/amd: Add missing domain type checks") Signed-off-by: Su Hui Reviewed-by: Jason Gunthorpe Reviewed-by: Jerry Snitselaar Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20230608021933.856045-1-suhui@nfschina.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index dc1ec6849775..e8a2e5984acb 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2078,10 +2078,6 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) int mode = DEFAULT_PGTABLE_LEVEL; int ret; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!domain) - return NULL; - /* * Force IOMMU v1 page table when iommu=pt and * when allocating domain for pass-through devices. @@ -2097,6 +2093,10 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) return NULL; } + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + switch (pgtable) { case AMD_IOMMU_V1: ret = protection_domain_init_v1(domain, mode); From b50f26a44887f3f71ff5457135ee1d5f1d542d7d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 16 Jun 2023 12:48:31 +0100 Subject: [PATCH 814/934] perf/core: Drop __weak attribute from arch_perf_update_userpage() prototype Reiji reports that the arm64 implementation of arch_perf_update_userpage() is now ignored and replaced by the dummy stub in core code. This seems to happen since the PMUv3 driver was moved to driver/perf. As it turns out, dropping the __weak attribute from the *prototype* of the function solves the problem. You're right, this doesn't seem to make much sense. And yet... It appears that both symbols get flagged as weak, and that the first one to appear in the link order wins: $ nm drivers/perf/arm_pmuv3.o|grep arch_perf_update_userpage 0000000000001db0 W arch_perf_update_userpage Dropping the attribute from the prototype restores the expected behaviour, and arm64 is able to enjoy arch_perf_update_userpage() again. Fixes: 7755cec63ade ("arm64: perf: Move PMUv3 driver to drivers/perf") Fixes: f1ec3a517b43 ("kernel/events: Add a missing prototype for arch_perf_update_userpage()") Reported-by: Reiji Watanabe Signed-off-by: Marc Zyngier Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Tested-by: Reiji Watanabe Link: https://lkml.kernel.org/r/20230616114831.3186980-1-maz@kernel.org --- include/linux/perf_event.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d5628a7b5eaa..c8dcfdbda1f4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1845,9 +1845,9 @@ int perf_event_exit_cpu(unsigned int cpu); #define perf_event_exit_cpu NULL #endif -extern void __weak arch_perf_update_userpage(struct perf_event *event, - struct perf_event_mmap_page *userpg, - u64 now); +extern void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, + u64 now); #ifdef CONFIG_MMU extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr); From a6742cb90b567f952a95efa27dee345748d09fc7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 15 Jun 2023 10:32:42 -0700 Subject: [PATCH 815/934] perf/x86/intel: Fix the FRONTEND encoding on GNR and MTL When counting a FRONTEND event, the MSR_PEBS_FRONTEND is not correctly set on GNR and MTL p-core. The umask value for the FRONTEND events is changed on GNR and MTL. The new umask is missing in the extra_regs[] table. Add a dedicated intel_gnr_extra_regs[] for GNR and MTL p-core. Fixes: bc4000fdb009 ("perf/x86/intel: Add Granite Rapids") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20230615173242.3726364-1-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 89b9c1cebb61..27f3a7b34bd5 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -349,6 +349,16 @@ static struct event_constraint intel_spr_event_constraints[] = { EVENT_CONSTRAINT_END }; +static struct extra_reg intel_gnr_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), + INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END +}; EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); @@ -6496,6 +6506,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + x86_pmu.extra_regs = intel_spr_extra_regs; fallthrough; case INTEL_FAM6_GRANITERAPIDS_X: case INTEL_FAM6_GRANITERAPIDS_D: @@ -6506,7 +6517,8 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_spr_event_constraints; x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints; - x86_pmu.extra_regs = intel_spr_extra_regs; + if (!x86_pmu.extra_regs) + x86_pmu.extra_regs = intel_gnr_extra_regs; x86_pmu.limit_period = spr_limit_period; x86_pmu.pebs_ept = 1; x86_pmu.pebs_aliases = NULL; @@ -6650,6 +6662,7 @@ __init int intel_pmu_init(void) pmu->pebs_constraints = intel_grt_pebs_event_constraints; pmu->extra_regs = intel_grt_extra_regs; if (is_mtl(boot_cpu_data.x86_model)) { + x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs; x86_pmu.pebs_latency_data = mtl_latency_data_small; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; From b9f174c811e3ae4ae8959dc57e6adb9990e913f4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 13 Jun 2023 14:14:56 -0700 Subject: [PATCH 816/934] x86/unwind/orc: Add ELF section with ORC version identifier Commits ffb1b4a41016 ("x86/unwind/orc: Add 'signal' field to ORC metadata") and fb799447ae29 ("x86,objtool: Split UNWIND_HINT_EMPTY in two") changed the ORC format. Although ORC is internal to the kernel, it's the only way for external tools to get reliable kernel stack traces on x86-64. In particular, the drgn debugger [1] uses ORC for stack unwinding, and these format changes broke it [2]. As the drgn maintainer, I don't care how often or how much the kernel changes the ORC format as long as I have a way to detect the change. It suffices to store a version identifier in the vmlinux and kernel module ELF files (to use when parsing ORC sections from ELF), and in kernel memory (to use when parsing ORC from a core dump+symbol table). Rather than hard-coding a version number that needs to be manually bumped, Peterz suggested hashing the definitions from orc_types.h. If there is a format change that isn't caught by this, the hashing script can be updated. This patch adds an .orc_header allocated ELF section containing the 20-byte hash to vmlinux and kernel modules, along with the corresponding __start_orc_header and __stop_orc_header symbols in vmlinux. 1: https://github.com/osandov/drgn 2: https://github.com/osandov/drgn/issues/303 Fixes: ffb1b4a41016 ("x86/unwind/orc: Add 'signal' field to ORC metadata") Fixes: fb799447ae29 ("x86,objtool: Split UNWIND_HINT_EMPTY in two") Signed-off-by: Omar Sandoval Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/aef9c8dc43915b886a8c48509a12ec1b006ca1ca.1686690801.git.osandov@osandov.com --- arch/x86/Makefile | 12 ++++++++++++ arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/orc_header.h | 19 +++++++++++++++++++ arch/x86/kernel/unwind_orc.c | 3 +++ include/asm-generic/vmlinux.lds.h | 3 +++ scripts/mod/modpost.c | 5 +++++ scripts/orc_hash.sh | 16 ++++++++++++++++ 7 files changed, 59 insertions(+) create mode 100644 arch/x86/include/asm/orc_header.h create mode 100644 scripts/orc_hash.sh diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b39975977c03..fdc2e3abd615 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -305,6 +305,18 @@ ifeq ($(RETPOLINE_CFLAGS),) endif endif +ifdef CONFIG_UNWINDER_ORC +orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h +orc_hash_sh := $(srctree)/scripts/orc_hash.sh +targets += $(orc_hash_h) +quiet_cmd_orc_hash = GEN $@ + cmd_orc_hash = mkdir -p $(dir $@); \ + $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@ +$(orc_hash_h): $(srctree)/arch/x86/include/asm/orc_types.h $(orc_hash_sh) FORCE + $(call if_changed,orc_hash) +archprepare: $(orc_hash_h) +endif + archclean: $(Q)rm -rf $(objtree)/arch/i386 $(Q)rm -rf $(objtree)/arch/x86_64 diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 1e51650b79d7..4f1ce5fc4e19 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +generated-y += orc_hash.h generated-y += syscalls_32.h generated-y += syscalls_64.h generated-y += syscalls_x32.h diff --git a/arch/x86/include/asm/orc_header.h b/arch/x86/include/asm/orc_header.h new file mode 100644 index 000000000000..07bacf3e160e --- /dev/null +++ b/arch/x86/include/asm/orc_header.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) Meta Platforms, Inc. and affiliates. */ + +#ifndef _ORC_HEADER_H +#define _ORC_HEADER_H + +#include +#include +#include + +/* + * The header is currently a 20-byte hash of the ORC entry definition; see + * scripts/orc_hash.sh. + */ +#define ORC_HEADER \ + __used __section(".orc_header") __aligned(4) \ + static const u8 orc_header[] = { ORC_HASH } + +#endif /* _ORC_HEADER_H */ diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 3ac50b7298d1..4d8e518365f4 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -7,6 +7,9 @@ #include #include #include +#include + +ORC_HEADER; #define orc_warn(fmt, ...) \ printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index cebdf1ca415d..da9e5629ea43 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -839,6 +839,9 @@ #ifdef CONFIG_UNWINDER_ORC #define ORC_UNWIND_TABLE \ + .orc_header : AT(ADDR(.orc_header) - LOAD_OFFSET) { \ + BOUNDED_SECTION_BY(.orc_header, _orc_header) \ + } \ . = ALIGN(4); \ .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \ BOUNDED_SECTION_BY(.orc_unwind_ip, _orc_unwind_ip) \ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index d4531d09984d..c12150f96b88 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1979,6 +1979,11 @@ static void add_header(struct buffer *b, struct module *mod) buf_printf(b, "#include \n"); buf_printf(b, "#include \n"); buf_printf(b, "\n"); + buf_printf(b, "#ifdef CONFIG_UNWINDER_ORC\n"); + buf_printf(b, "#include \n"); + buf_printf(b, "ORC_HEADER;\n"); + buf_printf(b, "#endif\n"); + buf_printf(b, "\n"); buf_printf(b, "BUILD_SALT;\n"); buf_printf(b, "BUILD_LTO_INFO;\n"); buf_printf(b, "\n"); diff --git a/scripts/orc_hash.sh b/scripts/orc_hash.sh new file mode 100644 index 000000000000..466611aa0053 --- /dev/null +++ b/scripts/orc_hash.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (c) Meta Platforms, Inc. and affiliates. + +set -e + +printf '%s' '#define ORC_HASH ' + +awk ' +/^#define ORC_(REG|TYPE)_/ { print } +/^struct orc_entry {$/ { p=1 } +p { print } +/^}/ { p=0 }' | + sha1sum | + cut -d " " -f 1 | + sed 's/\([0-9a-f]\{2\}\)/0x\1,/g' From 13bb06f8dd42071cb9a49f6e21099eea05d4b856 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Jun 2023 11:18:30 +0200 Subject: [PATCH 817/934] tick/common: Align tick period during sched_timer setup The tick period is aligned very early while the first clock_event_device is registered. At that point the system runs in periodic mode and switches later to one-shot mode if possible. The next wake-up event is programmed based on the aligned value (tick_next_period) but the delta value, that is used to program the clock_event_device, is computed based on ktime_get(). With the subtracted offset, the device fires earlier than the exact time frame. With a large enough offset the system programs the timer for the next wake-up and the remaining time left is too small to make any boot progress. The system hangs. Move the alignment later to the setup of tick_sched timer. At this point the system switches to oneshot mode and a high resolution clocksource is available. At this point it is safe to align tick_next_period because ktime_get() will now return accurate (not jiffies based) time. [bigeasy: Patch description + testing]. Fixes: e9523a0d81899 ("tick/common: Align tick period with the HZ tick.") Reported-by: Mathias Krause Reported-by: "Bhatnagar, Rishabh" Suggested-by: Mathias Krause Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Tested-by: Richard W.M. Jones Tested-by: Mathias Krause Acked-by: SeongJae Park Cc: stable@vger.kernel.org Link: https://lore.kernel.org/5a56290d-806e-b9a5-f37c-f21958b5a8c0@grsecurity.net Link: https://lore.kernel.org/12c6f9a3-d087-b824-0d05-0d18c9bc1bf3@amazon.com Link: https://lore.kernel.org/r/20230615091830.RxMV2xf_@linutronix.de --- kernel/time/tick-common.c | 13 +------------ kernel/time/tick-sched.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 65b8658da829..e9138cd7a0f5 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -218,19 +218,8 @@ static void tick_setup_device(struct tick_device *td, * this cpu: */ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { - ktime_t next_p; - u32 rem; - tick_do_timer_cpu = cpu; - - next_p = ktime_get(); - div_u64_rem(next_p, TICK_NSEC, &rem); - if (rem) { - next_p -= rem; - next_p += TICK_NSEC; - } - - tick_next_period = next_p; + tick_next_period = ktime_get(); #ifdef CONFIG_NO_HZ_FULL /* * The boot CPU may be nohz_full, in which case set diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 52254679ec48..42c0be3080bd 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -161,8 +161,19 @@ static ktime_t tick_init_jiffy_update(void) raw_spin_lock(&jiffies_lock); write_seqcount_begin(&jiffies_seq); /* Did we start the jiffies update yet ? */ - if (last_jiffies_update == 0) + if (last_jiffies_update == 0) { + u32 rem; + + /* + * Ensure that the tick is aligned to a multiple of + * TICK_NSEC. + */ + div_u64_rem(tick_next_period, TICK_NSEC, &rem); + if (rem) + tick_next_period += TICK_NSEC - rem; + last_jiffies_update = tick_next_period; + } period = last_jiffies_update; write_seqcount_end(&jiffies_seq); raw_spin_unlock(&jiffies_lock); From d082d48737c75d2b3cc1f972b8c8674c25131534 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 14 Jun 2023 17:38:54 +0100 Subject: [PATCH 818/934] x86/mm: Avoid using set_pgd() outside of real PGD pages KPTI keeps around two PGDs: one for userspace and another for the kernel. Among other things, set_pgd() contains infrastructure to ensure that updates to the kernel PGD are reflected in the user PGD as well. One side-effect of this is that set_pgd() expects to be passed whole pages. Unfortunately, init_trampoline_kaslr() passes in a single entry: 'trampoline_pgd_entry'. When KPTI is on, set_pgd() will update 'trampoline_pgd_entry' (an 8-Byte globally stored [.bss] variable) and will then proceed to replicate that value into the non-existent neighboring user page (located +4k away), leading to the corruption of other global [.bss] stored variables. Fix it by directly assigning 'trampoline_pgd_entry' and avoiding set_pgd(). [ dhansen: tweak subject and changelog ] Fixes: 0925dda5962e ("x86/mm/KASLR: Use only one PUD entry for real mode trampoline") Suggested-by: Dave Hansen Signed-off-by: Lee Jones Signed-off-by: Dave Hansen Cc: Link: https://lore.kernel.org/all/20230614163859.924309-1-lee@kernel.org/g --- arch/x86/mm/kaslr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 557f0fe25dff..37db264866b6 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -172,10 +172,10 @@ void __meminit init_trampoline_kaslr(void) set_p4d(p4d_tramp, __p4d(_KERNPG_TABLE | __pa(pud_page_tramp))); - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp))); + trampoline_pgd_entry = + __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)); } else { - set_pgd(&trampoline_pgd_entry, - __pgd(_KERNPG_TABLE | __pa(pud_page_tramp))); + trampoline_pgd_entry = + __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)); } } From 4e7401fc8c8d048a813e0221a706be84182a76c1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 14 Jun 2023 12:00:05 +0300 Subject: [PATCH 819/934] net/mlx5e: XDP, Allow growing tail for XDP multi buffer The cited commits missed passing frag_size to __xdp_rxq_info_reg, which is required by bpf_xdp_adjust_tail to support growing the tail pointer in fragmented packets. Pass the missing parameter when the current RQ mode allows XDP multi buffer. Fixes: ea5d49bdae8b ("net/mlx5e: Add XDP multi buffer support to the non-linear legacy RQ") Fixes: 9cb9482ef10e ("net/mlx5e: Use fragments of the same size in non-linear legacy RQ with XDP") Signed-off-by: Maxim Mikityanskiy Cc: Tariq Toukan Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 8 ++++++-- drivers/net/ethernet/mellanox/mlx5/core/en/params.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 9c94807097cb..5ce28ff7685f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -732,7 +732,8 @@ static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params, static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_frags_info *info) + struct mlx5e_rq_frags_info *info, + u32 *xdp_frag_size) { u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); int frag_size_max = DEFAULT_FRAG_SIZE; @@ -845,6 +846,8 @@ out: info->log_num_frags = order_base_2(info->num_frags); + *xdp_frag_size = info->num_frags > 1 && params->xdp_prog ? PAGE_SIZE : 0; + return 0; } @@ -989,7 +992,8 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info, + ¶m->xdp_frag_size); if (err) return err; ndsegs = param->frags_info.num_frags; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index a5d20f6d6d9c..6800949dafbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -24,6 +24,7 @@ struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; struct mlx5e_rq_frags_info frags_info; + u32 xdp_frag_size; }; struct mlx5e_sq_param { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a7c526ee5024..a5bdf78955d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -641,7 +641,7 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) } static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq *rq) + u32 xdp_frag_size, struct mlx5e_rq *rq) { struct mlx5_core_dev *mdev = c->mdev; int err; @@ -665,7 +665,8 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param if (err) return err; - return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id); + return __xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id, + xdp_frag_size); } static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, @@ -2240,7 +2241,7 @@ static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param { int err; - err = mlx5e_init_rxq_rq(c, params, &c->rq); + err = mlx5e_init_rxq_rq(c, params, rq_params->xdp_frag_size, &c->rq); if (err) return err; From 62a522d3354d81a86dd56feeb40e5ced36d72737 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 14 Jun 2023 12:00:06 +0300 Subject: [PATCH 820/934] net/mlx5e: xsk: Set napi_id to support busy polling on XSK RQ The cited commit missed setting napi_id on XSK RQs, it only affected regular RQs. Add the missing part to support socket busy polling on XSK RQs. Fixes: a2740f529da2 ("net/mlx5e: xsk: Set napi_id to support busy polling") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index ed279f450976..36826b582484 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -86,7 +86,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, if (err) return err; - return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0); + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, c->napi.napi_id); } static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params, From 0ab999d4a1bfe8251538be1e7e495c50433475a8 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 3 May 2023 15:10:05 +0300 Subject: [PATCH 821/934] net/mlx5: Fix driver load with single msix vector When a PCI device has just one msix vector available, we want to share this vector between async and completion events. Current code fails to do that assuming it will always have at least one dedicated vector for completion events. Fix this by detecting when the pool contains just a single vector. Fixes: 3354822cde5a ("net/mlx5: Use dynamic msix vectors allocation") Signed-off-by: Eli Cohen Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 843da89a9035..33b9359de53d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -565,15 +565,21 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, struct mlx5_irq **irqs, struct cpu_rmap **rmap) { + struct mlx5_irq_table *table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = table->pcif_pool; struct irq_affinity_desc af_desc; struct mlx5_irq *irq; + int offset = 1; int i; + if (!pool->xa_num_irqs.max) + offset = 0; + af_desc.is_managed = false; for (i = 0; i < nirqs; i++) { cpumask_clear(&af_desc.mask); cpumask_set_cpu(cpus[i], &af_desc.mask); - irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap); + irq = mlx5_irq_request(dev, i + offset, &af_desc, rmap); if (IS_ERR(irq)) break; irqs[i] = irq; From b100573ab76ee5b0cb8f9129b568d9e7da795f76 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 11 Apr 2023 11:17:35 +0300 Subject: [PATCH 822/934] net/mlx5e: TC, Add null pointer check for hardware miss support The cited commits add hardware miss support to tc action. But if the rules can't be offloaded, the pointers are null and system will panic when accessing them. Fix it by checking null pointer. Fixes: 08fe94ec5f77 ("net/mlx5e: TC, Remove special handling of CT action") Fixes: 6702782845a5 ("net/mlx5e: TC, Set CT miss to the specific ct action instance") Signed-off-by: Chris Mi Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index ead38ef69483..a254e728ac95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2021,6 +2021,8 @@ void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr) { + if (!attr->ct_attr.ft) /* no ct action, return */ + return; if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ return; From fb7be476ab7e797b18c6f0047041635c41b3b5a4 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 6 Apr 2023 09:38:09 +0300 Subject: [PATCH 823/934] net/mlx5e: TC, Cleanup ct resources for nic flow The cited commit removes special handling of CT action. But it removes too much. Pre ct/ct_nat tables and some other resources are not destroyed due to the cited commit. Fix it by adding it back. Fixes: 08fe94ec5f77 ("net/mlx5e: TC, Remove special handling of CT action") Signed-off-by: Chris Mi Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8a5a8703f0a3..b9b1da751a3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1439,6 +1439,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, mlx5e_hairpin_flow_del(priv, flow); free_flow_post_acts(flow); + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); kvfree(attr->parse_attr); kfree(flow->attr); From 87cd0649176c0588daf2cad53058143f808b0905 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 12 Jun 2023 01:05:48 +0300 Subject: [PATCH 824/934] net/mlx5: DR, Support SW created encap actions for FW table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases, steering might need to use SW-created action in FW table, which results in wrong packet reformat being used: mlx5_core 0000:81:00.1: mlx5_cmd_check:756:(pid 1154): SET_FLOW_TABLE_ENTRY(0×936) op_mod(0×0) failed, status bad resource(0×5), syndrome (0xf2ff71) This patch adds support for usage of SW-created packet reformat (encap) actions in FW tables, and adds clear error flow for attempt to use SW-created modify header on FW tables. Fixes: 6a48faeeca10 ("net/mlx5: Add direct rule fs_cmd implementation") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 50 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/fs_core.h | 7 +++ .../mellanox/mlx5/core/steering/dr_action.c | 5 ++ .../mellanox/mlx5/core/steering/fs_dr.c | 27 +++++++++- .../mellanox/mlx5/core/steering/fs_dr.h | 7 +++ .../mellanox/mlx5/core/steering/mlx5dr.h | 2 + 6 files changed, 83 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 144e59480686..ec83e6483d1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -511,10 +511,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, struct mlx5_flow_rule *dst; void *in_flow_context, *vlan; void *in_match_value; + int reformat_id = 0; unsigned int inlen; int dst_cnt_size; + u32 *in, action; void *in_dests; - u32 *in; int err; if (mlx5_set_extended_dest(dev, fte, &extended_dest)) @@ -553,22 +554,42 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); - if (extended_dest) { - u32 action; - action = fte->action.action & - ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - MLX5_SET(flow_context, in_flow_context, action, action); - } else { - MLX5_SET(flow_context, in_flow_context, action, - fte->action.action); - if (fte->action.pkt_reformat) - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, - fte->action.pkt_reformat->id); + action = fte->action.action; + if (extended_dest) + action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + + MLX5_SET(flow_context, in_flow_context, action, action); + + if (!extended_dest && fte->action.pkt_reformat) { + struct mlx5_pkt_reformat *pkt_reformat = fte->action.pkt_reformat; + + if (pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_SW) { + reformat_id = mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat); + if (reformat_id < 0) { + mlx5_core_err(dev, + "Unsupported SW-owned pkt_reformat type (%d) in FW-owned table\n", + pkt_reformat->reformat_type); + err = reformat_id; + goto err_out; + } + } else { + reformat_id = fte->action.pkt_reformat->id; + } } - if (fte->action.modify_hdr) + + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, (u32)reformat_id); + + if (fte->action.modify_hdr) { + if (fte->action.modify_hdr->owner == MLX5_FLOW_RESOURCE_OWNER_SW) { + mlx5_core_err(dev, "Can't use SW-owned modify_hdr in FW-owned table\n"); + err = -EOPNOTSUPP; + goto err_out; + } + MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->action.modify_hdr->id); + } MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_type, fte->action.crypto.type); @@ -885,6 +906,8 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); + pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_FW; + kfree(in); return err; } @@ -969,6 +992,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_FW; kfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index f137a0611b77..b043190e50a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -54,8 +54,14 @@ struct mlx5_flow_definer { u32 id; }; +enum mlx5_flow_resource_owner { + MLX5_FLOW_RESOURCE_OWNER_FW, + MLX5_FLOW_RESOURCE_OWNER_SW, +}; + struct mlx5_modify_hdr { enum mlx5_flow_namespace_type ns_type; + enum mlx5_flow_resource_owner owner; union { struct mlx5_fs_dr_action action; u32 id; @@ -65,6 +71,7 @@ struct mlx5_modify_hdr { struct mlx5_pkt_reformat { enum mlx5_flow_namespace_type ns_type; int reformat_type; /* from mlx5_ifc */ + enum mlx5_flow_resource_owner owner; union { struct mlx5_fs_dr_action action; u32 id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 0eb9a8d7f282..57e22c5170df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -2129,6 +2129,11 @@ mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, u32 obj_id, return action; } +u32 mlx5dr_action_get_pkt_reformat_id(struct mlx5dr_action *action) +{ + return action->reformat->id; +} + int mlx5dr_action_destroy(struct mlx5dr_action *action) { if (WARN_ON_ONCE(refcount_read(&action->refcount) > 1)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 984653756779..cc215beb7436 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -331,8 +331,16 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, } if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { - bool is_decap = fte->action.pkt_reformat->reformat_type == - MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + bool is_decap; + + if (fte->action.pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) { + err = -EINVAL; + mlx5dr_err(domain, "FW-owned reformat can't be used in SW rule\n"); + goto free_actions; + } + + is_decap = fte->action.pkt_reformat->reformat_type == + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; if (is_decap) actions[num_actions++] = @@ -661,6 +669,7 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns return -EINVAL; } + pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_SW; pkt_reformat->action.dr_action = action; return 0; @@ -691,6 +700,7 @@ static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns, return -EINVAL; } + modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW; modify_hdr->action.dr_action = action; return 0; @@ -816,6 +826,19 @@ static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns, return steering_caps; } +int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat) +{ + switch (pkt_reformat->reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + case MLX5_REFORMAT_TYPE_INSERT_HDR: + return mlx5dr_action_get_pkt_reformat_id(pkt_reformat->action.dr_action); + } + return -EOPNOTSUPP; +} + bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) { return mlx5dr_is_supported(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h index d168622063d5..99a3b2eff6b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h @@ -38,6 +38,8 @@ struct mlx5_fs_dr_table { bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev); +int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat); + const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void); #else @@ -47,6 +49,11 @@ static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) return NULL; } +static inline u32 mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat) +{ + return 0; +} + static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) { return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 9afd268a2573..d1c04f43d86d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -150,6 +150,8 @@ mlx5dr_action_create_dest_match_range(struct mlx5dr_domain *dmn, int mlx5dr_action_destroy(struct mlx5dr_action *action); +u32 mlx5dr_action_get_pkt_reformat_id(struct mlx5dr_action *action); + int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id, u8 *dw_selectors, u8 *byte_selectors, u8 *match_mask, u32 *definer_id); From ef4c5afc783dc3d47640270a9b94713229c697e8 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 4 Jun 2023 21:07:04 +0300 Subject: [PATCH 825/934] net/mlx5: DR, Fix wrong action data allocation in decap action When TUNNEL_L3_TO_L2 decap action was created, a pointer to a local variable was passed as its HW action data, resulting in attempt to free invalid address: BUG: KASAN: invalid-free in mlx5dr_action_destroy+0x318/0x410 [mlx5_core] Fixes: 4781df92f4da ("net/mlx5: DR, Move STEv0 modify header logic") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 57e22c5170df..0f783e7906cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -1421,9 +1421,13 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, } case DR_ACTION_TYP_TNL_L3_TO_L2: { - u8 hw_actions[DR_ACTION_CACHE_LINE_SIZE] = {}; + u8 *hw_actions; int ret; + hw_actions = kzalloc(DR_ACTION_CACHE_LINE_SIZE, GFP_KERNEL); + if (!hw_actions) + return -ENOMEM; + ret = mlx5dr_ste_set_action_decap_l3_list(dmn->ste_ctx, data, data_sz, hw_actions, @@ -1431,6 +1435,7 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, &action->rewrite->num_of_actions); if (ret) { mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n"); + kfree(hw_actions); return ret; } @@ -1440,6 +1445,7 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, ret = mlx5dr_ste_alloc_modify_hdr(action); if (ret) { mlx5dr_dbg(dmn, "Failed preparing reformat data\n"); + kfree(hw_actions); return ret; } return 0; From 314ded538e5f22e7610b1bf621402024a180ec80 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 8 Jun 2023 12:00:54 -0700 Subject: [PATCH 826/934] net/mlx5: Free IRQ rmap and notifier on kernel shutdown The kernel IRQ system needs the irq affinity notifier to be clear before attempting to free the irq, see WARN_ON log below. On a normal driver unload we don't have this issue since we do the complete cleanup of the irq resources. To fix this, put the important resources cleanup in a helper function and use it in both normal driver unload and shutdown flows. [ 4497.498434] ------------[ cut here ]------------ [ 4497.498726] WARNING: CPU: 0 PID: 9 at kernel/irq/manage.c:2034 free_irq+0x295/0x340 [ 4497.499193] Modules linked in: [ 4497.499386] CPU: 0 PID: 9 Comm: kworker/0:1 Tainted: G W 6.4.0-rc4+ #10 [ 4497.499876] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 [ 4497.500518] Workqueue: events do_poweroff [ 4497.500849] RIP: 0010:free_irq+0x295/0x340 [ 4497.501132] Code: 85 c0 0f 84 1d ff ff ff 48 89 ef ff d0 0f 1f 00 e9 10 ff ff ff 0f 0b e9 72 ff ff ff 49 8d 7f 28 ff d0 0f 1f 00 e9 df fd ff ff <0f> 0b 48 c7 80 c0 008 [ 4497.502269] RSP: 0018:ffffc90000053da0 EFLAGS: 00010282 [ 4497.502589] RAX: ffff888100949600 RBX: ffff88810330b948 RCX: 0000000000000000 [ 4497.503035] RDX: ffff888100949600 RSI: ffff888100400490 RDI: 0000000000000023 [ 4497.503472] RBP: ffff88810330c7e0 R08: ffff8881004005d0 R09: ffffffff8273a260 [ 4497.503923] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8881009ae000 [ 4497.504359] R13: ffff8881009ae148 R14: 0000000000000000 R15: ffff888100949600 [ 4497.504804] FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 [ 4497.505302] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4497.505671] CR2: 00007fce98806298 CR3: 000000000262e005 CR4: 0000000000370ef0 [ 4497.506104] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 4497.506540] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 4497.507002] Call Trace: [ 4497.507158] [ 4497.507299] ? free_irq+0x295/0x340 [ 4497.507522] ? __warn+0x7c/0x130 [ 4497.507740] ? free_irq+0x295/0x340 [ 4497.507963] ? report_bug+0x171/0x1a0 [ 4497.508197] ? handle_bug+0x3c/0x70 [ 4497.508417] ? exc_invalid_op+0x17/0x70 [ 4497.508662] ? asm_exc_invalid_op+0x1a/0x20 [ 4497.508926] ? free_irq+0x295/0x340 [ 4497.509146] mlx5_irq_pool_free_irqs+0x48/0x90 [ 4497.509421] mlx5_irq_table_free_irqs+0x38/0x50 [ 4497.509714] mlx5_core_eq_free_irqs+0x27/0x40 [ 4497.509984] shutdown+0x7b/0x100 [ 4497.510184] pci_device_shutdown+0x30/0x60 [ 4497.510440] device_shutdown+0x14d/0x240 [ 4497.510698] kernel_power_off+0x30/0x70 [ 4497.510938] process_one_work+0x1e6/0x3e0 [ 4497.511183] worker_thread+0x49/0x3b0 [ 4497.511407] ? __pfx_worker_thread+0x10/0x10 [ 4497.511679] kthread+0xe0/0x110 [ 4497.511879] ? __pfx_kthread+0x10/0x10 [ 4497.512114] ret_from_fork+0x29/0x50 [ 4497.512342] Fixes: 9c2d08010963 ("net/mlx5: Free irqs only on shutdown callback") Signed-off-by: Saeed Mahameed Reviewed-by: Shay Drory --- .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 33b9359de53d..98412bd5a696 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -126,14 +126,22 @@ out: return ret; } -static void irq_release(struct mlx5_irq *irq) +/* mlx5_system_free_irq - Free an IRQ + * @irq: IRQ to free + * + * Free the IRQ and other resources such as rmap from the system. + * BUT doesn't free or remove reference from mlx5. + * This function is very important for the shutdown flow, where we need to + * cleanup system resoruces but keep mlx5 objects alive, + * see mlx5_irq_table_free_irqs(). + */ +static void mlx5_system_free_irq(struct mlx5_irq *irq) { struct mlx5_irq_pool *pool = irq->pool; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif - xa_erase(&pool->irqs, irq->pool_index); /* free_irq requires that affinity_hint and rmap will be cleared before * calling it. To satisfy this requirement, we call * irq_cpu_rmap_remove() to remove the notifier @@ -145,10 +153,18 @@ static void irq_release(struct mlx5_irq *irq) irq_cpu_rmap_remove(rmap, irq->map.virq); #endif - free_cpumask_var(irq->mask); free_irq(irq->map.virq, &irq->nh); if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev)) pci_msix_free_irq(pool->dev->pdev, irq->map); +} + +static void irq_release(struct mlx5_irq *irq) +{ + struct mlx5_irq_pool *pool = irq->pool; + + xa_erase(&pool->irqs, irq->pool_index); + mlx5_system_free_irq(irq); + free_cpumask_var(irq->mask); kfree(irq); } @@ -705,7 +721,8 @@ static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool) unsigned long index; xa_for_each(&pool->irqs, index, irq) - free_irq(irq->map.virq, &irq->nh); + mlx5_system_free_irq(irq); + } static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table) From cf5bb02320d4c4cf4e827efc0314b6ca4082799c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Jun 2023 11:09:49 +0300 Subject: [PATCH 827/934] net/mlx5e: Don't delay release of hardware objects XFRM core provides two callbacks to release resources, one is .xdo_dev_policy_delete() and another is .xdo_dev_policy_free(). This separation allows delayed release so "ip xfrm policy free" commands won't starve. Unfortunately, mlx5 command interface can't run in .xdo_dev_policy_free() callbacks as the latter runs in ATOMIC context. BUG: scheduling while atomic: swapper/7/0/0x00000100 Modules linked in: act_mirred act_tunnel_key cls_flower sch_ingress vxlan mlx5_vdpa vringh vhost_iotlb vdpa rpcrdma rdma_ucm ib_iser libiscsi ib_umad scsi_transport_iscsi rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_core zram zsmalloc fuse CPU: 7 PID: 0 Comm: swapper/7 Not tainted 6.3.0+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x33/0x50 __schedule_bug+0x4e/0x60 __schedule+0x5d5/0x780 ? __mod_timer+0x286/0x3d0 schedule+0x50/0x90 schedule_timeout+0x7c/0xf0 ? __bpf_trace_tick_stop+0x10/0x10 __wait_for_common+0x88/0x190 ? usleep_range_state+0x90/0x90 cmd_exec+0x42e/0xb40 [mlx5_core] mlx5_cmd_do+0x1e/0x40 [mlx5_core] mlx5_cmd_exec+0x18/0x30 [mlx5_core] mlx5_cmd_delete_fte+0xa8/0xd0 [mlx5_core] del_hw_fte+0x60/0x120 [mlx5_core] mlx5_del_flow_rules+0xec/0x270 [mlx5_core] ? default_send_IPI_single_phys+0x26/0x30 mlx5e_accel_ipsec_fs_del_pol+0x1a/0x60 [mlx5_core] mlx5e_xfrm_free_policy+0x15/0x20 [mlx5_core] xfrm_policy_destroy+0x5a/0xb0 xfrm4_dst_destroy+0x7b/0x100 dst_destroy+0x37/0x120 rcu_core+0x2d6/0x540 __do_softirq+0xcd/0x273 irq_exit_rcu+0x82/0xb0 sysvec_apic_timer_interrupt+0x72/0x90 asm_sysvec_apic_timer_interrupt+0x16/0x20 RIP: 0010:default_idle+0x13/0x20 Code: c0 08 00 00 00 4d 29 c8 4c 01 c7 4c 29 c2 e9 72 ff ff ff cc cc cc cc 8b 05 7a 4d ee 00 85 c0 7e 07 0f 00 2d 2f 98 2e 00 fb f4 c3 66 66 2e 0f 1f 84 00 00 00 00 00 65 48 8b 04 25 40 b4 02 00 RSP: 0018:ffff888100843ee0 EFLAGS: 00000242 RAX: 0000000000000001 RBX: ffff888100812b00 RCX: 4000000000000000 RDX: 0000000000000001 RSI: 0000000000000083 RDI: 000000000002d2ec RBP: 0000000000000007 R08: 00000021daeded59 R09: 0000000000000001 R10: 0000000000000000 R11: 000000000000000f R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 default_idle_call+0x30/0xb0 do_idle+0x1c1/0x1d0 cpu_startup_entry+0x19/0x20 start_secondary+0xfe/0x120 secondary_startup_64_no_verify+0xf3/0xfb bad: scheduling from the idle thread! Fixes: a5b8ca9471d3 ("net/mlx5e: Add XFRM policy offload logic") Signed-off-by: Leon Romanovsky Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 55b38544422f..d1c801723d35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -1040,11 +1040,17 @@ err_fs: return err; } -static void mlx5e_xfrm_free_policy(struct xfrm_policy *x) +static void mlx5e_xfrm_del_policy(struct xfrm_policy *x) { struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x); mlx5e_accel_ipsec_fs_del_pol(pol_entry); +} + +static void mlx5e_xfrm_free_policy(struct xfrm_policy *x) +{ + struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x); + kfree(pol_entry); } @@ -1065,6 +1071,7 @@ static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = { .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft, .xdo_dev_policy_add = mlx5e_xfrm_add_policy, + .xdo_dev_policy_delete = mlx5e_xfrm_del_policy, .xdo_dev_policy_free = mlx5e_xfrm_free_policy, }; From fef06678931ff67b158d337b581e5cf5ca40a3a3 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 5 Jun 2023 11:09:50 +0300 Subject: [PATCH 828/934] net/mlx5e: Fix ESN update kernel panic Previously during mlx5e_ipsec_handle_event the driver tried to execute an operation that could sleep, while holding a spinlock, which caused the kernel panic mentioned below. Move the function call that can sleep outside of the spinlock context. Call Trace: dump_stack_lvl+0x49/0x6c __schedule_bug.cold+0x42/0x4e schedule_debug.constprop.0+0xe0/0x118 __schedule+0x59/0x58a ? __mod_timer+0x2a1/0x3ef schedule+0x5e/0xd4 schedule_timeout+0x99/0x164 ? __pfx_process_timeout+0x10/0x10 __wait_for_common+0x90/0x1da ? __pfx_schedule_timeout+0x10/0x10 wait_func+0x34/0x142 [mlx5_core] mlx5_cmd_invoke+0x1f3/0x313 [mlx5_core] cmd_exec+0x1fe/0x325 [mlx5_core] mlx5_cmd_do+0x22/0x50 [mlx5_core] mlx5_cmd_exec+0x1c/0x40 [mlx5_core] mlx5_modify_ipsec_obj+0xb2/0x17f [mlx5_core] mlx5e_ipsec_update_esn_state+0x69/0xf0 [mlx5_core] ? wake_affine+0x62/0x1f8 mlx5e_ipsec_handle_event+0xb1/0xc0 [mlx5_core] process_one_work+0x1e2/0x3e6 ? __pfx_worker_thread+0x10/0x10 worker_thread+0x54/0x3ad ? __pfx_worker_thread+0x10/0x10 kthread+0xda/0x101 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x29/0x37 BUG: workqueue leaked lock or atomic: kworker/u256:4/0x7fffffff/189754#012 last function: mlx5e_ipsec_handle_event [mlx5_core] CPU: 66 PID: 189754 Comm: kworker/u256:4 Kdump: loaded Tainted: G W 6.2.0-2596.20230309201517_5.el8uek.rc1.x86_64 #2 Hardware name: Oracle Corporation ORACLE SERVER X9-2/ASMMBX9-2, BIOS 61070300 08/17/2022 Workqueue: mlx5e_ipsec: eth%d mlx5e_ipsec_handle_event [mlx5_core] Call Trace: dump_stack_lvl+0x49/0x6c process_one_work.cold+0x2b/0x3c ? __pfx_worker_thread+0x10/0x10 worker_thread+0x54/0x3ad ? __pfx_worker_thread+0x10/0x10 kthread+0xda/0x101 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x29/0x37 BUG: scheduling while atomic: kworker/u256:4/189754/0x00000000 Fixes: cee137a63431 ("net/mlx5e: Handle ESN update events") Signed-off-by: Patrisious Haddad Signed-off-by: Leon Romanovsky Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/ipsec_offload.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index df90e19066bc..ca16cb9807ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -305,7 +305,17 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, } mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); + + /* It is safe to execute the modify below unlocked since the only flows + * that could affect this HW object, are create, destroy and this work. + * + * Creation flow can't co-exist with this modify work, the destruction + * flow would cancel this work, and this work is a single entity that + * can't conflict with it self. + */ + spin_unlock_bh(&sa_entry->x->lock); mlx5_accel_esp_modify_xfrm(sa_entry, &attrs); + spin_lock_bh(&sa_entry->x->lock); data.data_offset_condition_operand = MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; @@ -431,7 +441,7 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) aso = sa_entry->ipsec->aso; attrs = &sa_entry->attrs; - spin_lock(&sa_entry->x->lock); + spin_lock_bh(&sa_entry->x->lock); ret = mlx5e_ipsec_aso_query(sa_entry, NULL); if (ret) goto unlock; @@ -447,7 +457,7 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) mlx5e_ipsec_handle_limits(sa_entry); unlock: - spin_unlock(&sa_entry->x->lock); + spin_unlock_bh(&sa_entry->x->lock); kfree(work); } From c75b94255aaa45d9e531df2763baa67020bb6fa9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Jun 2023 11:09:51 +0300 Subject: [PATCH 829/934] net/mlx5e: Drop XFRM state lock when modifying flow steering XFRM state which is changed to be XFRM_STATE_EXPIRED doesn't really need to hold lock while modifying flow steering rules to drop traffic. That state can be deleted only and as such mlx5e_ipsec_handle_tx_limit() work will be canceled anyway and won't run in parallel. Fixes: b2f7b01d36a9 ("net/mlx5e: Simulate missing IPsec TX limits hardware functionality") Signed-off-by: Leon Romanovsky Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index d1c801723d35..891d39b4bfd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -61,16 +61,19 @@ static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work) struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry; struct xfrm_state *x = sa_entry->x; - spin_lock(&x->lock); + if (sa_entry->attrs.drop) + return; + + spin_lock_bh(&x->lock); xfrm_state_check_expire(x); if (x->km.state == XFRM_STATE_EXPIRED) { sa_entry->attrs.drop = true; - mlx5e_accel_ipsec_fs_modify(sa_entry); - } - spin_unlock(&x->lock); + spin_unlock_bh(&x->lock); - if (sa_entry->attrs.drop) + mlx5e_accel_ipsec_fs_modify(sa_entry); return; + } + spin_unlock_bh(&x->lock); queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, MLX5_IPSEC_RESCHED); From a128f9d4c1227dfcf7f2328070760cb7ed1ec08d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Jun 2023 11:09:52 +0300 Subject: [PATCH 830/934] net/mlx5e: Fix scheduling of IPsec ASO query while in atomic ASO query can be scheduled in atomic context as such it can't use usleep. Use udelay as recommended in Documentation/timers/timers-howto.rst. Fixes: 76e463f6508b ("net/mlx5e: Overcome slow response for first IPsec ASO WQE") Signed-off-by: Leon Romanovsky Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index ca16cb9807ea..a3554bde3e07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -606,7 +606,8 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, do { ret = mlx5_aso_poll_cq(aso->aso, false); if (ret) - usleep_range(2, 10); + /* We are in atomic context */ + udelay(10); } while (ret && time_is_after_jiffies(expires)); spin_unlock_bh(&aso->lock); return ret; From cd9125030689dda69f73f6c2843d63135cb383f0 Mon Sep 17 00:00:00 2001 From: Azeem Shaikh Date: Tue, 13 Jun 2023 00:33:25 +0000 Subject: [PATCH 831/934] ieee802154: Replace strlcpy with strscpy strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated [1]. In an effort to remove strlcpy() completely [2], replace strlcpy() here with strscpy(). Direct replacement is safe here since the return values from the helper macros are ignored by the callers. [1] https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [2] https://github.com/KSPP/linux/issues/89 Signed-off-by: Azeem Shaikh Reviewed-by: Kees Cook Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230613003326.3538391-1-azeemshaikh38@gmail.com Signed-off-by: Stefan Schmidt --- net/ieee802154/trace.h | 2 +- net/mac802154/trace.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h index e5d8439b9e45..c16db0b326fa 100644 --- a/net/ieee802154/trace.h +++ b/net/ieee802154/trace.h @@ -13,7 +13,7 @@ #define MAXNAME 32 #define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME) -#define WPAN_PHY_ASSIGN strlcpy(__entry->wpan_phy_name, \ +#define WPAN_PHY_ASSIGN strscpy(__entry->wpan_phy_name, \ wpan_phy_name(wpan_phy), \ MAXNAME) #define WPAN_PHY_PR_FMT "%s" diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h index 689396d6c76a..1574ecc48075 100644 --- a/net/mac802154/trace.h +++ b/net/mac802154/trace.h @@ -14,7 +14,7 @@ #define MAXNAME 32 #define LOCAL_ENTRY __array(char, wpan_phy_name, MAXNAME) -#define LOCAL_ASSIGN strlcpy(__entry->wpan_phy_name, \ +#define LOCAL_ASSIGN strscpy(__entry->wpan_phy_name, \ wpan_phy_name(local->hw.phy), MAXNAME) #define LOCAL_PR_FMT "%s" #define LOCAL_PR_ARG __entry->wpan_phy_name From ba00b190670809c1a89326d80de96d714f6004f2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 16 Jun 2023 22:39:39 +0100 Subject: [PATCH 832/934] afs: Fix vlserver probe RTT handling In the same spirit as commit ca57f02295f1 ("afs: Fix fileserver probe RTT handling"), don't rule out using a vlserver just because there haven't been enough packets yet to calculate a real rtt. Always set the server's probe rtt from the estimate provided by rxrpc_kernel_get_srtt, which is capped at 1 second. This could lead to EDESTADDRREQ errors when accessing a cell for the first time, even though the vl servers are known and have responded to a probe. Fixes: 1d4adfaf6574 ("rxrpc: Make rxrpc_kernel_get_srtt() indicate validity") Signed-off-by: Marc Dionne Signed-off-by: David Howells cc: linux-afs@lists.infradead.org Link: http://lists.infradead.org/pipermail/linux-afs/2023-June/006746.html Signed-off-by: Linus Torvalds --- fs/afs/vl_probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index d1c7068b4346..58452b86e672 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -115,8 +115,8 @@ responded: } } - if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && - rtt_us < server->probe.rtt) { + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; From 2b9b8f3b68edb3d67d79962f02e26dbb5ae3808d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 5 Jun 2023 01:57:34 +0900 Subject: [PATCH 833/934] ksmbd: validate command payload size ->StructureSize2 indicates command payload size. ksmbd should validate this size with rfc1002 length before accessing it. This patch remove unneeded check and add the validation for this. [ 8.912583] BUG: KASAN: slab-out-of-bounds in ksmbd_smb2_check_message+0x12a/0xc50 [ 8.913051] Read of size 2 at addr ffff88800ac7d92c by task kworker/0:0/7 ... [ 8.914967] Call Trace: [ 8.915126] [ 8.915267] dump_stack_lvl+0x33/0x50 [ 8.915506] print_report+0xcc/0x620 [ 8.916558] kasan_report+0xae/0xe0 [ 8.917080] kasan_check_range+0x35/0x1b0 [ 8.917334] ksmbd_smb2_check_message+0x12a/0xc50 [ 8.917935] ksmbd_verify_smb_message+0xae/0xd0 [ 8.918223] handle_ksmbd_work+0x192/0x820 [ 8.918478] process_one_work+0x419/0x760 [ 8.918727] worker_thread+0x2a2/0x6f0 [ 8.919222] kthread+0x187/0x1d0 [ 8.919723] ret_from_fork+0x1f/0x30 [ 8.919954] Cc: stable@vger.kernel.org Reported-by: Chih-Yen Chang Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2misc.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/smb/server/smb2misc.c b/fs/smb/server/smb2misc.c index 0ffe663b7590..57749f41b991 100644 --- a/fs/smb/server/smb2misc.c +++ b/fs/smb/server/smb2misc.c @@ -351,6 +351,7 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) int command; __u32 clc_len; /* calculated length */ __u32 len = get_rfc1002_len(work->request_buf); + __u32 req_struct_size; if (le32_to_cpu(hdr->NextCommand) > 0) len = le32_to_cpu(hdr->NextCommand); @@ -373,17 +374,9 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) } if (smb2_req_struct_sizes[command] != pdu->StructureSize2) { - if (command != SMB2_OPLOCK_BREAK_HE && - (hdr->Status == 0 || pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2_LE)) { - /* error packets have 9 byte structure size */ - ksmbd_debug(SMB, - "Illegal request size %u for command %d\n", - le16_to_cpu(pdu->StructureSize2), command); - return 1; - } else if (command == SMB2_OPLOCK_BREAK_HE && - hdr->Status == 0 && - le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 && - le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) { + if (command == SMB2_OPLOCK_BREAK_HE && + le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 && + le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) { /* special case for SMB2.1 lease break message */ ksmbd_debug(SMB, "Illegal request size %d for oplock break\n", @@ -392,6 +385,14 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) } } + req_struct_size = le16_to_cpu(pdu->StructureSize2) + + __SMB2_HEADER_STRUCTURE_SIZE; + if (command == SMB2_LOCK_HE) + req_struct_size -= sizeof(struct smb2_lock_element); + + if (req_struct_size > len + 1) + return 1; + if (smb2_calc_size(hdr, &clc_len)) return 1; From 40b268d384a22276dca1450549f53eed60e21deb Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 15 Jun 2023 15:56:32 +0900 Subject: [PATCH 834/934] ksmbd: add mnt_want_write to ksmbd vfs functions ksmbd is doing write access using vfs helpers. There are the cases that mnt_want_write() is not called in vfs helper. This patch add missing mnt_want_write() to ksmbd vfs functions. Cc: stable@vger.kernel.org Cc: Amir Goldstein Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 26 ++++----- fs/smb/server/smbacl.c | 10 ++-- fs/smb/server/vfs.c | 117 ++++++++++++++++++++++++++++++-------- fs/smb/server/vfs.h | 17 +++--- fs/smb/server/vfs_cache.c | 2 +- 5 files changed, 117 insertions(+), 55 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 25c0ba04c59d..ccecdb71d2bc 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2249,7 +2249,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, /* delete the EA only when it exits */ if (rc > 0) { rc = ksmbd_vfs_remove_xattr(idmap, - path->dentry, + path, attr_name); if (rc < 0) { @@ -2263,8 +2263,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, /* if the EA doesn't exist, just do nothing. */ rc = 0; } else { - rc = ksmbd_vfs_setxattr(idmap, - path->dentry, attr_name, value, + rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value, le16_to_cpu(eabuf->EaValueLength), 0); if (rc < 0) { ksmbd_debug(SMB, @@ -2321,8 +2320,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path, return -EBADF; } - rc = ksmbd_vfs_setxattr(idmap, path->dentry, - xattr_stream_name, NULL, 0, 0); + rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0); if (rc < 0) pr_err("Failed to store XATTR stream name :%d\n", rc); return 0; @@ -2350,7 +2348,7 @@ static int smb2_remove_smb_xattrs(const struct path *path) if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) { - err = ksmbd_vfs_remove_xattr(idmap, path->dentry, + err = ksmbd_vfs_remove_xattr(idmap, path, name); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", @@ -2397,8 +2395,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path * da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), - path->dentry, &da); + rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da); if (rc) ksmbd_debug(SMB, "failed to store file attribute into xattr\n"); } @@ -2972,7 +2969,7 @@ int smb2_open(struct ksmbd_work *work) struct inode *inode = d_inode(path.dentry); posix_acl_rc = ksmbd_vfs_inherit_posix_acl(idmap, - path.dentry, + &path, d_inode(path.dentry->d_parent)); if (posix_acl_rc) ksmbd_debug(SMB, "inherit posix acl failed : %d\n", posix_acl_rc); @@ -2988,7 +2985,7 @@ int smb2_open(struct ksmbd_work *work) if (rc) { if (posix_acl_rc) ksmbd_vfs_set_init_posix_acl(idmap, - path.dentry); + &path); if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) { @@ -3028,7 +3025,7 @@ int smb2_open(struct ksmbd_work *work) rc = ksmbd_vfs_set_sd_xattr(conn, idmap, - path.dentry, + &path, pntsd, pntsd_size); kfree(pntsd); @@ -5464,7 +5461,7 @@ static int smb2_rename(struct ksmbd_work *work, goto out; rc = ksmbd_vfs_setxattr(file_mnt_idmap(fp->filp), - fp->filp->f_path.dentry, + &fp->filp->f_path, xattr_stream_name, NULL, 0, 0); if (rc < 0) { @@ -5629,8 +5626,7 @@ static int set_file_basic_info(struct ksmbd_file *fp, da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME | XATTR_DOSINFO_ITIME; - rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, - filp->f_path.dentry, &da); + rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da); if (rc) ksmbd_debug(SMB, "failed to restore file attribute in EA\n"); @@ -7485,7 +7481,7 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id, da.attr = le32_to_cpu(fp->f_ci->m_fattr); ret = ksmbd_vfs_set_dos_attrib_xattr(idmap, - fp->filp->f_path.dentry, &da); + &fp->filp->f_path, &da); if (ret) fp->f_ci->m_fattr = old_fattr; } diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c index 0a5862a61c77..ad919a4239d0 100644 --- a/fs/smb/server/smbacl.c +++ b/fs/smb/server/smbacl.c @@ -1162,8 +1162,7 @@ pass: pntsd_size += sizeof(struct smb_acl) + nt_size; } - ksmbd_vfs_set_sd_xattr(conn, idmap, - path->dentry, pntsd, pntsd_size); + ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size); kfree(pntsd); } @@ -1383,7 +1382,7 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, newattrs.ia_valid |= ATTR_MODE; newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777); - ksmbd_vfs_remove_acl_xattrs(idmap, path->dentry); + ksmbd_vfs_remove_acl_xattrs(idmap, path); /* Update posix acls */ if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) { rc = set_posix_acl(idmap, path->dentry, @@ -1414,9 +1413,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon, if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) { /* Update WinACL in xattr */ - ksmbd_vfs_remove_sd_xattrs(idmap, path->dentry); - ksmbd_vfs_set_sd_xattr(conn, idmap, - path->dentry, pntsd, ntsd_len); + ksmbd_vfs_remove_sd_xattrs(idmap, path); + ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len); } out: diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index f9fb778247e7..81489fdedd8e 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -170,6 +170,10 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) return err; } + err = mnt_want_write(path.mnt); + if (err) + goto out_err; + mode |= S_IFREG; err = vfs_create(mnt_idmap(path.mnt), d_inode(path.dentry), dentry, mode, true); @@ -179,6 +183,9 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) } else { pr_err("File(%s): creation failed (err:%d)\n", name, err); } + mnt_drop_write(path.mnt); + +out_err: done_path_create(&path, dentry); return err; } @@ -209,30 +216,35 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) return err; } + err = mnt_want_write(path.mnt); + if (err) + goto out_err2; + idmap = mnt_idmap(path.mnt); mode |= S_IFDIR; err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); - if (err) { - goto out; - } else if (d_unhashed(dentry)) { + if (!err && d_unhashed(dentry)) { struct dentry *d; d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, dentry->d_name.len); if (IS_ERR(d)) { err = PTR_ERR(d); - goto out; + goto out_err1; } if (unlikely(d_is_negative(d))) { dput(d); err = -ENOENT; - goto out; + goto out_err1; } ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); dput(d); } -out: + +out_err1: + mnt_drop_write(path.mnt); +out_err2: done_path_create(&path, dentry); if (err) pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); @@ -443,7 +455,7 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, memcpy(&stream_buf[*pos], buf, count); err = ksmbd_vfs_setxattr(idmap, - fp->filp->f_path.dentry, + &fp->filp->f_path, fp->stream.name, (void *)stream_buf, size, @@ -589,6 +601,10 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path) goto out_err; } + err = mnt_want_write(path->mnt); + if (err) + goto out_err; + idmap = mnt_idmap(path->mnt); if (S_ISDIR(d_inode(path->dentry)->i_mode)) { err = vfs_rmdir(idmap, d_inode(parent), path->dentry); @@ -599,6 +615,7 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path) if (err) ksmbd_debug(VFS, "unlink failed, err %d\n", err); } + mnt_drop_write(path->mnt); out_err: ksmbd_revert_fsids(work); @@ -644,11 +661,16 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname, goto out3; } + err = mnt_want_write(newpath.mnt); + if (err) + goto out3; + err = vfs_link(oldpath.dentry, mnt_idmap(newpath.mnt), d_inode(newpath.dentry), dentry, NULL); if (err) ksmbd_debug(VFS, "vfs_link failed err %d\n", err); + mnt_drop_write(newpath.mnt); out3: done_path_create(&newpath, dentry); @@ -694,6 +716,10 @@ retry: goto out2; } + err = mnt_want_write(old_path->mnt); + if (err) + goto out2; + trap = lock_rename_child(old_child, new_path.dentry); old_parent = dget(old_child->d_parent); @@ -757,6 +783,7 @@ out4: out3: dput(old_parent); unlock_rename(old_parent, new_path.dentry); + mnt_drop_write(old_path->mnt); out2: path_put(&new_path); @@ -897,19 +924,24 @@ ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap, * Return: 0 on success, otherwise error */ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *attr_name, + const struct path *path, const char *attr_name, void *attr_value, size_t attr_size, int flags) { int err; + err = mnt_want_write(path->mnt); + if (err) + return err; + err = vfs_setxattr(idmap, - dentry, + path->dentry, attr_name, attr_value, attr_size, flags); if (err) ksmbd_debug(VFS, "setxattr failed, err %d\n", err); + mnt_drop_write(path->mnt); return err; } @@ -1013,9 +1045,18 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, } int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, - struct dentry *dentry, char *attr_name) + const struct path *path, char *attr_name) { - return vfs_removexattr(idmap, dentry, attr_name); + int err; + + err = mnt_want_write(path->mnt); + if (err) + return err; + + err = vfs_removexattr(idmap, path->dentry, attr_name); + mnt_drop_write(path->mnt); + + return err; } int ksmbd_vfs_unlink(struct file *filp) @@ -1024,6 +1065,10 @@ int ksmbd_vfs_unlink(struct file *filp) struct dentry *dir, *dentry = filp->f_path.dentry; struct mnt_idmap *idmap = file_mnt_idmap(filp); + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + dir = dget_parent(dentry); err = ksmbd_vfs_lock_parent(dir, dentry); if (err) @@ -1041,6 +1086,7 @@ int ksmbd_vfs_unlink(struct file *filp) ksmbd_debug(VFS, "failed to delete, err %d\n", err); out: dput(dir); + mnt_drop_write(filp->f_path.mnt); return err; } @@ -1244,13 +1290,13 @@ struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work, } int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap, - struct dentry *dentry) + const struct path *path) { char *name, *xattr_list = NULL; ssize_t xattr_list_len; int err = 0; - xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list); + xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list); if (xattr_list_len < 0) { goto out; } else if (!xattr_list_len) { @@ -1258,6 +1304,10 @@ int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap, goto out; } + err = mnt_want_write(path->mnt); + if (err) + goto out; + for (name = xattr_list; name - xattr_list < xattr_list_len; name += strlen(name) + 1) { ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); @@ -1266,25 +1316,26 @@ int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap, sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) || !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) { - err = vfs_remove_acl(idmap, dentry, name); + err = vfs_remove_acl(idmap, path->dentry, name); if (err) ksmbd_debug(SMB, "remove acl xattr failed : %s\n", name); } } + mnt_drop_write(path->mnt); + out: kvfree(xattr_list); return err; } -int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, - struct dentry *dentry) +int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path) { char *name, *xattr_list = NULL; ssize_t xattr_list_len; int err = 0; - xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list); + xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list); if (xattr_list_len < 0) { goto out; } else if (!xattr_list_len) { @@ -1297,7 +1348,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) { - err = ksmbd_vfs_remove_xattr(idmap, dentry, name); + err = ksmbd_vfs_remove_xattr(idmap, path, name); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", name); } @@ -1374,13 +1425,14 @@ out: int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, - struct dentry *dentry, + const struct path *path, struct smb_ntsd *pntsd, int len) { int rc; struct ndr sd_ndr = {0}, acl_ndr = {0}; struct xattr_ntacl acl = {0}; struct xattr_smb_acl *smb_acl, *def_smb_acl = NULL; + struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); acl.version = 4; @@ -1432,7 +1484,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, goto out; } - rc = ksmbd_vfs_setxattr(idmap, dentry, + rc = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_SD, sd_ndr.data, sd_ndr.offset, 0); if (rc < 0) @@ -1522,7 +1574,7 @@ free_n_data: } int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, - struct dentry *dentry, + const struct path *path, struct xattr_dos_attrib *da) { struct ndr n; @@ -1532,7 +1584,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, if (err) return err; - err = ksmbd_vfs_setxattr(idmap, dentry, XATTR_NAME_DOS_ATTRIBUTE, + err = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_DOS_ATTRIBUTE, (void *)n.data, n.offset, 0); if (err) ksmbd_debug(SMB, "failed to store dos attribute in xattr\n"); @@ -1769,10 +1821,11 @@ void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock) } int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, - struct dentry *dentry) + struct path *path) { struct posix_acl_state acl_state; struct posix_acl *acls; + struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); int rc; @@ -1802,6 +1855,11 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, return -ENOMEM; } posix_state_to_acl(&acl_state, acls->a_entries); + + rc = mnt_want_write(path->mnt); + if (rc) + goto out_err; + rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", @@ -1813,16 +1871,20 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", rc); } + mnt_drop_write(path->mnt); + +out_err: free_acl_state(&acl_state); posix_acl_release(acls); return rc; } int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, - struct dentry *dentry, struct inode *parent_inode) + struct path *path, struct inode *parent_inode) { struct posix_acl *acls; struct posix_acl_entry *pace; + struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); int rc, i; @@ -1841,6 +1903,10 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, } } + rc = mnt_want_write(path->mnt); + if (rc) + goto out_err; + rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls); if (rc < 0) ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n", @@ -1852,6 +1918,9 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n", rc); } + mnt_drop_write(path->mnt); + +out_err: posix_acl_release(acls); return rc; } diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index a4ae89f3230d..8c0931d4d531 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -108,12 +108,12 @@ ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap, struct dentry *dentry, char *attr_name, int attr_name_len); int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *attr_name, + const struct path *path, const char *attr_name, void *attr_value, size_t attr_size, int flags); int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, size_t *xattr_stream_name_size, int s_type); int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, - struct dentry *dentry, char *attr_name); + const struct path *path, char *attr_name); int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, unsigned int flags, struct path *path, bool caseless); @@ -139,26 +139,25 @@ void ksmbd_vfs_posix_lock_wait(struct file_lock *flock); int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout); void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock); int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap, - struct dentry *dentry); -int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, - struct dentry *dentry); + const struct path *path); +int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path); int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, - struct dentry *dentry, + const struct path *path, struct smb_ntsd *pntsd, int len); int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, struct mnt_idmap *idmap, struct dentry *dentry, struct smb_ntsd **pntsd); int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap, - struct dentry *dentry, + const struct path *path, struct xattr_dos_attrib *da); int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap, struct dentry *dentry, struct xattr_dos_attrib *da); int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap, - struct dentry *dentry); + struct path *path); int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap, - struct dentry *dentry, + struct path *path, struct inode *parent_inode); #endif /* __KSMBD_VFS_H__ */ diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 2d0138e72d78..f41f8d6108ce 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -252,7 +252,7 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) if (ksmbd_stream_fd(fp) && (ci->m_flags & S_DEL_ON_CLS_STREAM)) { ci->m_flags &= ~S_DEL_ON_CLS_STREAM; err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp), - filp->f_path.dentry, + &filp->f_path, fp->stream.name); if (err) pr_err("remove xattr failed : %s\n", From 5fe7f7b78290638806211046a99f031ff26164e1 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 15 Jun 2023 22:04:40 +0900 Subject: [PATCH 835/934] ksmbd: fix out-of-bound read in smb2_write ksmbd_smb2_check_message doesn't validate hdr->NextCommand. If ->NextCommand is bigger than Offset + Length of smb2 write, It will allow oversized smb2 write length. It will cause OOB read in smb2_write. Cc: stable@vger.kernel.org Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-21164 Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2misc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/smb2misc.c b/fs/smb/server/smb2misc.c index 57749f41b991..33b7e6c4ceff 100644 --- a/fs/smb/server/smb2misc.c +++ b/fs/smb/server/smb2misc.c @@ -351,10 +351,16 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) int command; __u32 clc_len; /* calculated length */ __u32 len = get_rfc1002_len(work->request_buf); - __u32 req_struct_size; + __u32 req_struct_size, next_cmd = le32_to_cpu(hdr->NextCommand); - if (le32_to_cpu(hdr->NextCommand) > 0) - len = le32_to_cpu(hdr->NextCommand); + if ((u64)work->next_smb2_rcv_hdr_off + next_cmd > len) { + pr_err("next command(%u) offset exceeds smb msg size\n", + next_cmd); + return 1; + } + + if (next_cmd > 0) + len = next_cmd; else if (work->next_smb2_rcv_hdr_off) len -= work->next_smb2_rcv_hdr_off; From 5005bcb4219156f1bf7587b185080ec1da08518e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 15 Jun 2023 22:05:29 +0900 Subject: [PATCH 836/934] ksmbd: validate session id and tree id in the compound request This patch validate session id and tree id in compound request. If first operation in the compound is SMB2 ECHO request, ksmbd bypass session and tree validation. So work->sess and work->tcon could be NULL. If secound request in the compound access work->sess or tcon, It cause NULL pointer dereferecing error. Cc: stable@vger.kernel.org Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-21165 Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/server.c | 33 +++++++++++++++++++------------ fs/smb/server/smb2pdu.c | 44 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index f9b2e0f19b03..ced7a9e916f0 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -185,24 +185,31 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, goto send; } - if (conn->ops->check_user_session) { - rc = conn->ops->check_user_session(work); - if (rc < 0) { - command = conn->ops->get_cmd_val(work); - conn->ops->set_rsp_status(work, - STATUS_USER_SESSION_DELETED); - goto send; - } else if (rc > 0) { - rc = conn->ops->get_ksmbd_tcon(work); + do { + if (conn->ops->check_user_session) { + rc = conn->ops->check_user_session(work); if (rc < 0) { - conn->ops->set_rsp_status(work, - STATUS_NETWORK_NAME_DELETED); + if (rc == -EINVAL) + conn->ops->set_rsp_status(work, + STATUS_INVALID_PARAMETER); + else + conn->ops->set_rsp_status(work, + STATUS_USER_SESSION_DELETED); goto send; + } else if (rc > 0) { + rc = conn->ops->get_ksmbd_tcon(work); + if (rc < 0) { + if (rc == -EINVAL) + conn->ops->set_rsp_status(work, + STATUS_INVALID_PARAMETER); + else + conn->ops->set_rsp_status(work, + STATUS_NETWORK_NAME_DELETED); + goto send; + } } } - } - do { rc = __process_request(work, conn, &command); if (rc == SERVER_HANDLER_ABORT) break; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index ccecdb71d2bc..da1787c68ba0 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -91,7 +91,6 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work) unsigned int cmd = le16_to_cpu(req_hdr->Command); int tree_id; - work->tcon = NULL; if (cmd == SMB2_TREE_CONNECT_HE || cmd == SMB2_CANCEL_HE || cmd == SMB2_LOGOFF_HE) { @@ -105,10 +104,28 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work) } tree_id = le32_to_cpu(req_hdr->Id.SyncId.TreeId); + + /* + * If request is not the first in Compound request, + * Just validate tree id in header with work->tcon->id. + */ + if (work->next_smb2_rcv_hdr_off) { + if (!work->tcon) { + pr_err("The first operation in the compound does not have tcon\n"); + return -EINVAL; + } + if (work->tcon->id != tree_id) { + pr_err("tree id(%u) is different with id(%u) in first operation\n", + tree_id, work->tcon->id); + return -EINVAL; + } + return 1; + } + work->tcon = ksmbd_tree_conn_lookup(work->sess, tree_id); if (!work->tcon) { pr_err("Invalid tid %d\n", tree_id); - return -EINVAL; + return -ENOENT; } return 1; @@ -547,7 +564,6 @@ int smb2_check_user_session(struct ksmbd_work *work) unsigned int cmd = conn->ops->get_cmd_val(work); unsigned long long sess_id; - work->sess = NULL; /* * SMB2_ECHO, SMB2_NEGOTIATE, SMB2_SESSION_SETUP command do not * require a session id, so no need to validate user session's for @@ -558,15 +574,33 @@ int smb2_check_user_session(struct ksmbd_work *work) return 0; if (!ksmbd_conn_good(conn)) - return -EINVAL; + return -EIO; sess_id = le64_to_cpu(req_hdr->SessionId); + + /* + * If request is not the first in Compound request, + * Just validate session id in header with work->sess->id. + */ + if (work->next_smb2_rcv_hdr_off) { + if (!work->sess) { + pr_err("The first operation in the compound does not have sess\n"); + return -EINVAL; + } + if (work->sess->id != sess_id) { + pr_err("session id(%llu) is different with the first operation(%lld)\n", + sess_id, work->sess->id); + return -EINVAL; + } + return 1; + } + /* Check for validity of user session */ work->sess = ksmbd_session_lookup_all(conn, sess_id); if (work->sess) return 1; ksmbd_debug(SMB, "Invalid user session, Uid %llu\n", sess_id); - return -EINVAL; + return -ENOENT; } static void destroy_previous_session(struct ksmbd_conn *conn, From b5b2a02bcaac7c287694aa0db4837a07bf178626 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 15 Jun 2023 00:00:02 +0200 Subject: [PATCH 837/934] parisc: Delete redundant register definitions in We define sp and ipsw in using ".reg", and when using current binutils (snapshot 2.40.50.20230611) the definitions in using "=" conflict with those: arch/parisc/include/asm/assembly.h: Assembler messages: arch/parisc/include/asm/assembly.h:93: Error: symbol `sp' is already defined arch/parisc/include/asm/assembly.h:95: Error: symbol `ipsw' is already defined Delete the duplicate definitions in . Also delete the definition of gp, which isn't used anywhere. Signed-off-by: Ben Hutchings Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/assembly.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 0f0d4a496fef..75677b526b2b 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -90,10 +90,6 @@ #include #include - sp = 30 - gp = 27 - ipsw = 22 - /* * We provide two versions of each macro to convert from physical * to virtual and vice versa. The "_r1" versions take one argument From 4aaf2c52834b7f95acdf9fb0211a1b60adbf421b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= Date: Thu, 15 Jun 2023 10:49:29 +0200 Subject: [PATCH 838/934] sfc: use budget for TX completions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running workloads heavy unbalanced towards TX (high TX, low RX traffic), sfc driver can retain the CPU during too long times. Although in many cases this is not enough to be visible, it can affect performance and system responsiveness. A way to reproduce it is to use a debug kernel and run some parallel netperf TX tests. In some systems, this will lead to this message being logged: kernel:watchdog: BUG: soft lockup - CPU#12 stuck for 22s! The reason is that sfc driver doesn't account any NAPI budget for the TX completion events work. With high-TX/low-RX traffic, this makes that the CPU is held for long time for NAPI poll. Documentations says "drivers can process completions for any number of Tx packets but should only process up to budget number of Rx packets". However, many drivers do limit the amount of TX completions that they process in a single NAPI poll. In the same way, this patch adds a limit for the TX work in sfc. With the patch applied, the watchdog warning never appears. Tested with netperf in different combinations: single process / parallel processes, TCP / UDP and different sizes of UDP messages. Repeated the tests before and after the patch, without any noticeable difference in network or CPU performance. Test hardware: Intel(R) Xeon(R) CPU E5-1620 v4 @ 3.50GHz (4 cores, 2 threads/core) Solarflare Communications XtremeScale X2522-25G Network Adapter Fixes: 5227ecccea2d ("sfc: remove tx and MCDI handling from NAPI budget consideration") Fixes: d19a53721863 ("sfc_ef100: TX path for EF100 NICs") Reported-by: Fei Liu Signed-off-by: Íñigo Huguet Acked-by: Martin Habets Link: https://lore.kernel.org/r/20230615084929.10506-1-ihuguet@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef10.c | 25 ++++++++++++++++++------- drivers/net/ethernet/sfc/ef100_nic.c | 7 ++++++- drivers/net/ethernet/sfc/ef100_tx.c | 4 ++-- drivers/net/ethernet/sfc/ef100_tx.h | 2 +- drivers/net/ethernet/sfc/tx_common.c | 4 +++- drivers/net/ethernet/sfc/tx_common.h | 2 +- 6 files changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index d30459dbfe8f..b63e47af6365 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2950,7 +2950,7 @@ static u32 efx_ef10_extract_event_ts(efx_qword_t *event) return tstamp; } -static void +static int efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) { struct efx_nic *efx = channel->efx; @@ -2958,13 +2958,14 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) unsigned int tx_ev_desc_ptr; unsigned int tx_ev_q_label; unsigned int tx_ev_type; + int work_done; u64 ts_part; if (unlikely(READ_ONCE(efx->reset_pending))) - return; + return 0; if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT))) - return; + return 0; /* Get the transmit queue */ tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL); @@ -2973,8 +2974,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) if (!tx_queue->timestamping) { /* Transmit completion */ tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX); - efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask); - return; + return efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask); } /* Transmit timestamps are only available for 8XXX series. They result @@ -3000,6 +3000,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) * fields in the event. */ tx_ev_type = EFX_QWORD_FIELD(*event, ESF_EZ_TX_SOFT1); + work_done = 0; switch (tx_ev_type) { case TX_TIMESTAMP_EVENT_TX_EV_COMPLETION: @@ -3016,6 +3017,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) tx_queue->completed_timestamp_major = ts_part; efx_xmit_done_single(tx_queue); + work_done = 1; break; default: @@ -3026,6 +3028,8 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) EFX_QWORD_VAL(*event)); break; } + + return work_done; } static void @@ -3081,13 +3085,16 @@ static void efx_ef10_handle_driver_generated_event(struct efx_channel *channel, } } +#define EFX_NAPI_MAX_TX 512 + static int efx_ef10_ev_process(struct efx_channel *channel, int quota) { struct efx_nic *efx = channel->efx; efx_qword_t event, *p_event; unsigned int read_ptr; - int ev_code; + int spent_tx = 0; int spent = 0; + int ev_code; if (quota <= 0) return spent; @@ -3126,7 +3133,11 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota) } break; case ESE_DZ_EV_CODE_TX_EV: - efx_ef10_handle_tx_event(channel, &event); + spent_tx += efx_ef10_handle_tx_event(channel, &event); + if (spent_tx >= EFX_NAPI_MAX_TX) { + spent = quota; + goto out; + } break; case ESE_DZ_EV_CODE_DRIVER_EV: efx_ef10_handle_driver_event(channel, &event); diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 4dc643b0d2db..7adde9639c8a 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -253,6 +253,8 @@ static void ef100_ev_read_ack(struct efx_channel *channel) efx_reg(channel->efx, ER_GZ_EVQ_INT_PRIME)); } +#define EFX_NAPI_MAX_TX 512 + static int ef100_ev_process(struct efx_channel *channel, int quota) { struct efx_nic *efx = channel->efx; @@ -260,6 +262,7 @@ static int ef100_ev_process(struct efx_channel *channel, int quota) bool evq_phase, old_evq_phase; unsigned int read_ptr; efx_qword_t *p_event; + int spent_tx = 0; int spent = 0; bool ev_phase; int ev_type; @@ -295,7 +298,9 @@ static int ef100_ev_process(struct efx_channel *channel, int quota) efx_mcdi_process_event(channel, p_event); break; case ESE_GZ_EF100_EV_TX_COMPLETION: - ef100_ev_tx(channel, p_event); + spent_tx += ef100_ev_tx(channel, p_event); + if (spent_tx >= EFX_NAPI_MAX_TX) + spent = quota; break; case ESE_GZ_EF100_EV_DRIVER: netif_info(efx, drv, efx->net_dev, diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c index 29ffaf35559d..849e5555bd12 100644 --- a/drivers/net/ethernet/sfc/ef100_tx.c +++ b/drivers/net/ethernet/sfc/ef100_tx.c @@ -346,7 +346,7 @@ void ef100_tx_write(struct efx_tx_queue *tx_queue) ef100_tx_push_buffers(tx_queue); } -void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event) +int ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event) { unsigned int tx_done = EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_NUM_DESC); @@ -357,7 +357,7 @@ void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event) unsigned int tx_index = (tx_queue->read_count + tx_done - 1) & tx_queue->ptr_mask; - efx_xmit_done(tx_queue, tx_index); + return efx_xmit_done(tx_queue, tx_index); } /* Add a socket buffer to a TX queue diff --git a/drivers/net/ethernet/sfc/ef100_tx.h b/drivers/net/ethernet/sfc/ef100_tx.h index e9e11540fcde..d9a0819c5a72 100644 --- a/drivers/net/ethernet/sfc/ef100_tx.h +++ b/drivers/net/ethernet/sfc/ef100_tx.h @@ -20,7 +20,7 @@ void ef100_tx_init(struct efx_tx_queue *tx_queue); void ef100_tx_write(struct efx_tx_queue *tx_queue); unsigned int ef100_tx_max_skb_descs(struct efx_nic *efx); -void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event); +int ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event); netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb, diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index 67e789b96c43..755aa92bf823 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -249,7 +249,7 @@ void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue) } } -void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) +int efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) { unsigned int fill_level, pkts_compl = 0, bytes_compl = 0; unsigned int efv_pkts_compl = 0; @@ -279,6 +279,8 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) } efx_xmit_done_check_empty(tx_queue); + + return pkts_compl + efv_pkts_compl; } /* Remove buffers put into a tx_queue for the current packet. diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h index d87aecbc7bf1..1e9f42938aac 100644 --- a/drivers/net/ethernet/sfc/tx_common.h +++ b/drivers/net/ethernet/sfc/tx_common.h @@ -28,7 +28,7 @@ static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer) } void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue); -void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); +int efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, unsigned int insert_count); From 9636be85cc5bdd8b7a7f6a53405cbcc52161c93c Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 23 May 2023 10:14:21 -0700 Subject: [PATCH 839/934] x86/hyperv: Fix hyperv_pcpu_input_arg handling when CPUs go online/offline These commits a494aef23dfc ("PCI: hv: Replace retarget_msi_interrupt_params with hyperv_pcpu_input_arg") 2c6ba4216844 ("PCI: hv: Enable PCI pass-thru devices in Confidential VMs") update the Hyper-V virtual PCI driver to use the hyperv_pcpu_input_arg because that memory will be correctly marked as decrypted or encrypted for all VM types (CoCo or normal). But problems ensue when CPUs in the VM go online or offline after virtual PCI devices have been configured. When a CPU is brought online, the hyperv_pcpu_input_arg for that CPU is initialized by hv_cpu_init() running under state CPUHP_AP_ONLINE_DYN. But this state occurs after state CPUHP_AP_IRQ_AFFINITY_ONLINE, which may call the virtual PCI driver and fault trying to use the as yet uninitialized hyperv_pcpu_input_arg. A similar problem occurs in a CoCo VM if the MMIO read and write hypercalls are used from state CPUHP_AP_IRQ_AFFINITY_ONLINE. When a CPU is taken offline, IRQs may be reassigned in state CPUHP_TEARDOWN_CPU. Again, the virtual PCI driver may fault trying to use the hyperv_pcpu_input_arg that has already been freed by a higher state. Fix the onlining problem by adding state CPUHP_AP_HYPERV_ONLINE immediately after CPUHP_AP_ONLINE_IDLE (similar to CPUHP_AP_KVM_ONLINE) and before CPUHP_AP_IRQ_AFFINITY_ONLINE. Use this new state for Hyper-V initialization so that hyperv_pcpu_input_arg is allocated early enough. Fix the offlining problem by not freeing hyperv_pcpu_input_arg when a CPU goes offline. Retain the allocated memory, and reuse it if the CPU comes back online later. Signed-off-by: Michael Kelley Reviewed-by: Vitaly Kuznetsov Acked-by: Borislav Petkov (AMD) Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/1684862062-51576-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- arch/x86/hyperv/hv_init.c | 2 +- drivers/hv/hv_common.c | 48 +++++++++++++++++++------------------- include/linux/cpuhotplug.h | 1 + 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a5f9474f08e1..6c04b52f139b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -416,7 +416,7 @@ void __init hyperv_init(void) goto free_vp_assist_page; } - cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", + cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online", hv_cpu_init, hv_cpu_die); if (cpuhp < 0) goto free_ghcb_page; diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 64f9ceca887b..542a1d53b303 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -364,13 +364,20 @@ int hv_common_cpu_init(unsigned int cpu) flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL; inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); - if (!(*inputarg)) - return -ENOMEM; - if (hv_root_partition) { - outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); - *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; + /* + * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already + * allocated if this CPU was previously online and then taken offline + */ + if (!*inputarg) { + *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); + if (!(*inputarg)) + return -ENOMEM; + + if (hv_root_partition) { + outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); + *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; + } } msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); @@ -385,24 +392,17 @@ int hv_common_cpu_init(unsigned int cpu) int hv_common_cpu_die(unsigned int cpu) { - unsigned long flags; - void **inputarg, **outputarg; - void *mem; - - local_irq_save(flags); - - inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - mem = *inputarg; - *inputarg = NULL; - - if (hv_root_partition) { - outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); - *outputarg = NULL; - } - - local_irq_restore(flags); - - kfree(mem); + /* + * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory + * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg + * may be used by the Hyper-V vPCI driver in reassigning interrupts + * as part of the offlining process. The interrupt reassignment + * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and + * called this function. + * + * If a previously offlined CPU is brought back online again, the + * originally allocated memory is reused in hv_common_cpu_init(). + */ return 0; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 0f1001dca0e0..3ceb9dfa0993 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -200,6 +200,7 @@ enum cpuhp_state { /* Online section invoked on the hotplugged CPU from the hotplug thread */ CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_HYPERV_ONLINE, CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, From 52ae076c3a9b366b6fa9f7c7e67aed8b28716ed9 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 23 May 2023 10:14:22 -0700 Subject: [PATCH 840/934] arm64/hyperv: Use CPUHP_AP_HYPERV_ONLINE state to fix CPU online sequencing State CPUHP_AP_HYPERV_ONLINE has been introduced to correctly sequence the initialization of hyperv_pcpu_input_arg. Use this new state for Hyper-V initialization so that hyperv_pcpu_input_arg is allocated early enough. Signed-off-by: Michael Kelley Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/1684862062-51576-2-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- arch/arm64/hyperv/mshyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c index a406454578f0..f1b8a04ee9f2 100644 --- a/arch/arm64/hyperv/mshyperv.c +++ b/arch/arm64/hyperv/mshyperv.c @@ -67,7 +67,7 @@ static int __init hyperv_init(void) if (ret) return ret; - ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online", + ret = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "arm64/hyperv_init:online", hv_common_cpu_init, hv_common_cpu_die); if (ret < 0) { hv_common_free(); From ef7dfac51d8ed961b742218f526bd589f3900a59 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 17 Jun 2023 19:50:24 -0600 Subject: [PATCH 841/934] io_uring/poll: serialize poll linked timer start with poll removal We selectively grab the ctx->uring_lock for poll update/removal, but we really should grab it from the start to fully synchronize with linked timeouts. Normally this is indeed the case, but if requests are forced async by the application, we don't fully cover removal and timer disarm within the uring_lock. Make this simpler by having consistent locking state for poll removal. Cc: stable@vger.kernel.org # 6.1+ Reported-by: Querijn Voet Signed-off-by: Jens Axboe --- io_uring/poll.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index c90e47dc1e29..a78b8af7d9ab 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -977,8 +977,9 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) struct io_hash_bucket *bucket; struct io_kiocb *preq; int ret2, ret = 0; - struct io_tw_state ts = {}; + struct io_tw_state ts = { .locked = true }; + io_ring_submit_lock(ctx, issue_flags); preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); ret2 = io_poll_disarm(preq); if (bucket) @@ -990,12 +991,10 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) goto out; } - io_ring_submit_lock(ctx, issue_flags); preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket); ret2 = io_poll_disarm(preq); if (bucket) spin_unlock(&bucket->lock); - io_ring_submit_unlock(ctx, issue_flags); if (ret2) { ret = ret2; goto out; @@ -1019,7 +1018,7 @@ found: if (poll_update->update_user_data) preq->cqe.user_data = poll_update->new_user_data; - ret2 = io_poll_add(preq, issue_flags); + ret2 = io_poll_add(preq, issue_flags & ~IO_URING_F_UNLOCKED); /* successfully updated, don't complete poll request */ if (!ret2 || ret2 == -EIOCBQUEUED) goto out; @@ -1027,9 +1026,9 @@ found: req_set_fail(preq); io_req_set_res(preq, -ECANCELED, 0); - ts.locked = !(issue_flags & IO_URING_F_UNLOCKED); io_req_task_complete(preq, &ts); out: + io_ring_submit_unlock(ctx, issue_flags); if (ret < 0) { req_set_fail(req); return ret; From 6aa0365a3c8512587fffd42fe438768709ddef8e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 15 Jun 2023 17:18:53 +0900 Subject: [PATCH 842/934] ata: libata-scsi: Avoid deadlock on rescan after device resume When an ATA port is resumed from sleep, the port is reset and a power management request issued to libata EH to reset the port and rescanning the device(s) attached to the port. Device rescanning is done by scheduling an ata_scsi_dev_rescan() work, which will execute scsi_rescan_device(). However, scsi_rescan_device() takes the generic device lock, which is also taken by dpm_resume() when the SCSI device is resumed as well. If a device rescan execution starts before the completion of the SCSI device resume, the rcu locking used to refresh the cached VPD pages of the device, combined with the generic device locking from scsi_rescan_device() and from dpm_resume() can cause a deadlock. Avoid this situation by changing struct ata_port scsi_rescan_task to be a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is modified to check if the SCSI device associated with the ATA device that must be rescanned is not suspended. If the SCSI device is still suspended, ata_scsi_dev_rescan() returns early and reschedule itself for execution after an arbitrary delay of 5ms. Reported-by: Kai-Heng Feng Reported-by: Joe Breuer Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530 Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management") Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Tested-by: Kai-Heng Feng Tested-by: Joe Breuer --- drivers/ata/libata-core.c | 3 ++- drivers/ata/libata-eh.c | 2 +- drivers/ata/libata-scsi.c | 22 +++++++++++++++++++++- include/linux/libata.h | 2 +- 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8bf612bdd61a..b4f246f0cac7 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5348,7 +5348,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host) mutex_init(&ap->scsi_scan_mutex); INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug); - INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan); + INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan); INIT_LIST_HEAD(&ap->eh_done_q); init_waitqueue_head(&ap->eh_wait_q); init_completion(&ap->park_req_pending); @@ -5954,6 +5954,7 @@ static void ata_port_detach(struct ata_port *ap) WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED)); cancel_delayed_work_sync(&ap->hotplug_task); + cancel_delayed_work_sync(&ap->scsi_rescan_task); skip_eh: /* clean up zpodd on port removal */ diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index a6c901811802..6f8d14191593 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2984,7 +2984,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, ehc->i.flags |= ATA_EHI_SETMODE; /* schedule the scsi_rescan_device() here */ - schedule_work(&(ap->scsi_rescan_task)); + schedule_delayed_work(&ap->scsi_rescan_task, 0); } else if (dev->class == ATA_DEV_UNKNOWN && ehc->tries[dev->devno] && ata_class_enabled(ehc->classes[dev->devno])) { diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 8ce90284eb34..551077cea4e4 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4597,10 +4597,11 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, void ata_scsi_dev_rescan(struct work_struct *work) { struct ata_port *ap = - container_of(work, struct ata_port, scsi_rescan_task); + container_of(work, struct ata_port, scsi_rescan_task.work); struct ata_link *link; struct ata_device *dev; unsigned long flags; + bool delay_rescan = false; mutex_lock(&ap->scsi_scan_mutex); spin_lock_irqsave(ap->lock, flags); @@ -4614,6 +4615,21 @@ void ata_scsi_dev_rescan(struct work_struct *work) if (scsi_device_get(sdev)) continue; + /* + * If the rescan work was scheduled because of a resume + * event, the port is already fully resumed, but the + * SCSI device may not yet be fully resumed. In such + * case, executing scsi_rescan_device() may cause a + * deadlock with the PM code on device_lock(). Prevent + * this by giving up and retrying rescan after a short + * delay. + */ + delay_rescan = sdev->sdev_gendev.power.is_suspended; + if (delay_rescan) { + scsi_device_put(sdev); + break; + } + spin_unlock_irqrestore(ap->lock, flags); scsi_rescan_device(&(sdev->sdev_gendev)); scsi_device_put(sdev); @@ -4623,4 +4639,8 @@ void ata_scsi_dev_rescan(struct work_struct *work) spin_unlock_irqrestore(ap->lock, flags); mutex_unlock(&ap->scsi_scan_mutex); + + if (delay_rescan) + schedule_delayed_work(&ap->scsi_rescan_task, + msecs_to_jiffies(5)); } diff --git a/include/linux/libata.h b/include/linux/libata.h index 311cd93377c7..dd5797fb6305 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -836,7 +836,7 @@ struct ata_port { struct mutex scsi_scan_mutex; struct delayed_work hotplug_task; - struct work_struct scsi_rescan_task; + struct delayed_work scsi_rescan_task; unsigned int hsm_task_state; From 440b5e3663271b0ffbd4908115044a6a51fb938b Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 14 Jun 2023 21:44:47 -0700 Subject: [PATCH 843/934] PCI: hv: Fix a race condition bug in hv_pci_query_relations() Since day 1 of the driver, there has been a race between hv_pci_query_relations() and survey_child_resources(): during fast device hotplug, hv_pci_query_relations() may error out due to device-remove and the stack variable 'comp' is no longer valid; however, pci_devices_present_work() -> survey_child_resources() -> complete() may be running on another CPU and accessing the no-longer-valid 'comp'. Fix the race by flushing the workqueue before we exit from hv_pci_query_relations(). Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs") Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Acked-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230615044451.5580-2-decui@microsoft.com Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index bc32662c6bb7..ea8862e656b6 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -3401,6 +3401,24 @@ static int hv_pci_query_relations(struct hv_device *hdev) if (!ret) ret = wait_for_response(hdev, &comp); + /* + * In the case of fast device addition/removal, it's possible that + * vmbus_sendpacket() or wait_for_response() returns -ENODEV but we + * already got a PCI_BUS_RELATIONS* message from the host and the + * channel callback already scheduled a work to hbus->wq, which can be + * running pci_devices_present_work() -> survey_child_resources() -> + * complete(&hbus->survey_event), even after hv_pci_query_relations() + * exits and the stack variable 'comp' is no longer valid; as a result, + * a hang or a page fault may happen when the complete() calls + * raw_spin_lock_irqsave(). Flush hbus->wq before we exit from + * hv_pci_query_relations() to avoid the issues. Note: if 'ret' is + * -ENODEV, there can't be any more work item scheduled to hbus->wq + * after the flush_workqueue(): see vmbus_onoffer_rescind() -> + * vmbus_reset_channel_cb(), vmbus_rescind_cleanup() -> + * channel->rescind = true. + */ + flush_workqueue(hbus->wq); + return ret; } From 2738d5ab7929a845b654cd171a1e275c37eb428e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 14 Jun 2023 21:44:48 -0700 Subject: [PATCH 844/934] PCI: hv: Fix a race condition in hv_irq_unmask() that can cause panic When the host tries to remove a PCI device, the host first sends a PCI_EJECT message to the guest, and the guest is supposed to gracefully remove the PCI device and send a PCI_EJECTION_COMPLETE message to the host; the host then sends a VMBus message CHANNELMSG_RESCIND_CHANNELOFFER to the guest (when the guest receives this message, the device is already unassigned from the guest) and the guest can do some final cleanup work; if the guest fails to respond to the PCI_EJECT message within one minute, the host sends the VMBus message CHANNELMSG_RESCIND_CHANNELOFFER and removes the PCI device forcibly. In the case of fast device addition/removal, it's possible that the PCI device driver is still configuring MSI-X interrupts when the guest receives the PCI_EJECT message; the channel callback calls hv_pci_eject_device(), which sets hpdev->state to hv_pcichild_ejecting, and schedules a work hv_eject_device_work(); if the PCI device driver is calling pci_alloc_irq_vectors() -> ... -> hv_compose_msi_msg(), we can break the while loop in hv_compose_msi_msg() due to the updated hpdev->state, and leave data->chip_data with its default value of NULL; later, when the PCI device driver calls request_irq() -> ... -> hv_irq_unmask(), the guest crashes in hv_arch_irq_unmask() due to data->chip_data being NULL. Fix the issue by not testing hpdev->state in the while loop: when the guest receives PCI_EJECT, the device is still assigned to the guest, and the guest has one minute to finish the device removal gracefully. We don't really need to (and we should not) test hpdev->state in the loop. Fixes: de0aa7b2f97d ("PCI: hv: Fix 2 hang issues in hv_compose_msi_msg()") Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230615044451.5580-3-decui@microsoft.com Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ea8862e656b6..733637d96765 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -635,6 +635,11 @@ static void hv_arch_irq_unmask(struct irq_data *data) pbus = pdev->bus; hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata); int_desc = data->chip_data; + if (!int_desc) { + dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n", + __func__, data->irq); + return; + } local_irq_save(flags); @@ -2004,12 +2009,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) hv_pci_onchannelcallback(hbus); spin_unlock_irqrestore(&channel->sched_lock, flags); - if (hpdev->state == hv_pcichild_ejecting) { - dev_err_once(&hbus->hdev->device, - "the device is being ejected\n"); - goto enable_tasklet; - } - udelay(100); } From add9195e69c94b32e96f78c2f9cea68f0e850b3f Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 14 Jun 2023 21:44:49 -0700 Subject: [PATCH 845/934] PCI: hv: Remove the useless hv_pcichild_state from struct hv_pci_dev The hpdev->state is never really useful. The only use in hv_pci_eject_device() and hv_eject_device_work() is not really necessary. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Acked-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230615044451.5580-4-decui@microsoft.com Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 733637d96765..a826b41c949a 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -545,19 +545,10 @@ struct hv_dr_state { struct hv_pcidev_description func[]; }; -enum hv_pcichild_state { - hv_pcichild_init = 0, - hv_pcichild_requirements, - hv_pcichild_resourced, - hv_pcichild_ejecting, - hv_pcichild_maximum -}; - struct hv_pci_dev { /* List protected by pci_rescan_remove_lock */ struct list_head list_entry; refcount_t refs; - enum hv_pcichild_state state; struct pci_slot *pci_slot; struct hv_pcidev_description desc; bool reported_missing; @@ -2843,8 +2834,6 @@ static void hv_eject_device_work(struct work_struct *work) hpdev = container_of(work, struct hv_pci_dev, wrk); hbus = hpdev->hbus; - WARN_ON(hpdev->state != hv_pcichild_ejecting); - /* * Ejection can come before or after the PCI bus has been set up, so * attempt to find it and tear down the bus state, if it exists. This @@ -2901,7 +2890,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev) return; } - hpdev->state = hv_pcichild_ejecting; get_pcichild(hpdev); INIT_WORK(&hpdev->wrk, hv_eject_device_work); queue_work(hbus->wq, &hpdev->wrk); From a847234e24d03d01a9566d1d9dcce018cc018d67 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 14 Jun 2023 21:44:50 -0700 Subject: [PATCH 846/934] Revert "PCI: hv: Fix a timing issue which causes kdump to fail occasionally" This reverts commit d6af2ed29c7c1c311b96dac989dcb991e90ee195. The statement "the hv_pci_bus_exit() call releases structures of all its child devices" in commit d6af2ed29c7c is not true: in the path hv_pci_probe() -> hv_pci_enter_d0() -> hv_pci_bus_exit(hdev, true): the parameter "keep_devs" is true, so hv_pci_bus_exit() does *not* release the child "struct hv_pci_dev *hpdev" that is created earlier in pci_devices_present_work() -> new_pcichild_device(). The commit d6af2ed29c7c was originally made in July 2020 for RHEL 7.7, where the old version of hv_pci_bus_exit() was used; when the commit was rebased and merged into the upstream, people didn't notice that it's not really necessary. The commit itself doesn't cause any issue, but it makes hv_pci_probe() more complicated. Revert it to facilitate some upcoming changes to hv_pci_probe(). Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Acked-by: Wei Hu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230615044451.5580-5-decui@microsoft.com Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 71 ++++++++++++++--------------- 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index a826b41c949a..1a5296fad1c4 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -3318,8 +3318,10 @@ static int hv_pci_enter_d0(struct hv_device *hdev) struct pci_bus_d0_entry *d0_entry; struct hv_pci_compl comp_pkt; struct pci_packet *pkt; + bool retry = true; int ret; +enter_d0_retry: /* * Tell the host that the bus is ready to use, and moved into the * powered-on state. This includes telling the host which region @@ -3346,6 +3348,38 @@ static int hv_pci_enter_d0(struct hv_device *hdev) if (ret) goto exit; + /* + * In certain case (Kdump) the pci device of interest was + * not cleanly shut down and resource is still held on host + * side, the host could return invalid device status. + * We need to explicitly request host to release the resource + * and try to enter D0 again. + */ + if (comp_pkt.completion_status < 0 && retry) { + retry = false; + + dev_err(&hdev->device, "Retrying D0 Entry\n"); + + /* + * Hv_pci_bus_exit() calls hv_send_resource_released() + * to free up resources of its child devices. + * In the kdump kernel we need to set the + * wslot_res_allocated to 255 so it scans all child + * devices to release resources allocated in the + * normal kernel before panic happened. + */ + hbus->wslot_res_allocated = 255; + + ret = hv_pci_bus_exit(hdev, true); + + if (ret == 0) { + kfree(pkt); + goto enter_d0_retry; + } + dev_err(&hdev->device, + "Retrying D0 failed with ret %d\n", ret); + } + if (comp_pkt.completion_status < 0) { dev_err(&hdev->device, "PCI Pass-through VSP failed D0 Entry with status %x\n", @@ -3591,7 +3625,6 @@ static int hv_pci_probe(struct hv_device *hdev, struct hv_pcibus_device *hbus; u16 dom_req, dom; char *name; - bool enter_d0_retry = true; int ret; bridge = devm_pci_alloc_host_bridge(&hdev->device, 0); @@ -3708,47 +3741,11 @@ static int hv_pci_probe(struct hv_device *hdev, if (ret) goto free_fwnode; -retry: ret = hv_pci_query_relations(hdev); if (ret) goto free_irq_domain; ret = hv_pci_enter_d0(hdev); - /* - * In certain case (Kdump) the pci device of interest was - * not cleanly shut down and resource is still held on host - * side, the host could return invalid device status. - * We need to explicitly request host to release the resource - * and try to enter D0 again. - * Since the hv_pci_bus_exit() call releases structures - * of all its child devices, we need to start the retry from - * hv_pci_query_relations() call, requesting host to send - * the synchronous child device relations message before this - * information is needed in hv_send_resources_allocated() - * call later. - */ - if (ret == -EPROTO && enter_d0_retry) { - enter_d0_retry = false; - - dev_err(&hdev->device, "Retrying D0 Entry\n"); - - /* - * Hv_pci_bus_exit() calls hv_send_resources_released() - * to free up resources of its child devices. - * In the kdump kernel we need to set the - * wslot_res_allocated to 255 so it scans all child - * devices to release resources allocated in the - * normal kernel before panic happened. - */ - hbus->wslot_res_allocated = 255; - ret = hv_pci_bus_exit(hdev, true); - - if (ret == 0) - goto retry; - - dev_err(&hdev->device, - "Retrying D0 failed with ret %d\n", ret); - } if (ret) goto free_irq_domain; From 067d6ec7ed5b49380688e06c1e5f883a71bef4fe Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 14 Jun 2023 21:44:51 -0700 Subject: [PATCH 847/934] PCI: hv: Add a per-bus mutex state_lock In the case of fast device addition/removal, it's possible that hv_eject_device_work() can start to run before create_root_hv_pci_bus() starts to run; as a result, the pci_get_domain_bus_and_slot() in hv_eject_device_work() can return a 'pdev' of NULL, and hv_eject_device_work() can remove the 'hpdev', and immediately send a message PCI_EJECTION_COMPLETE to the host, and the host immediately unassigns the PCI device from the guest; meanwhile, create_root_hv_pci_bus() and the PCI device driver can be probing the dead PCI device and reporting timeout errors. Fix the issue by adding a per-bus mutex 'state_lock' and grabbing the mutex before powering on the PCI bus in hv_pci_enter_d0(): when hv_eject_device_work() starts to run, it's able to find the 'pdev' and call pci_stop_and_remove_bus_device(pdev): if the PCI device driver has loaded, the PCI device driver's probe() function is already called in create_root_hv_pci_bus() -> pci_bus_add_devices(), and now hv_eject_device_work() -> pci_stop_and_remove_bus_device() is able to call the PCI device driver's remove() function and remove the device reliably; if the PCI device driver hasn't loaded yet, the function call hv_eject_device_work() -> pci_stop_and_remove_bus_device() is able to remove the PCI device reliably and the PCI device driver's probe() function won't be called; if the PCI device driver's probe() is already running (e.g., systemd-udev is loading the PCI device driver), it must be holding the per-device lock, and after the probe() finishes and releases the lock, hv_eject_device_work() -> pci_stop_and_remove_bus_device() is able to proceed to remove the device reliably. Fixes: 4daace0d8ce8 ("PCI: hv: Add paravirtual PCI front-end for Microsoft Hyper-V VMs") Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Acked-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230615044451.5580-6-decui@microsoft.com Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 1a5296fad1c4..2d93d0c4f10d 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -489,7 +489,10 @@ struct hv_pcibus_device { struct fwnode_handle *fwnode; /* Protocol version negotiated with the host */ enum pci_protocol_version_t protocol_version; + + struct mutex state_lock; enum hv_pcibus_state state; + struct hv_device *hdev; resource_size_t low_mmio_space; resource_size_t high_mmio_space; @@ -2605,6 +2608,8 @@ static void pci_devices_present_work(struct work_struct *work) if (!dr) return; + mutex_lock(&hbus->state_lock); + /* First, mark all existing children as reported missing. */ spin_lock_irqsave(&hbus->device_list_lock, flags); list_for_each_entry(hpdev, &hbus->children, list_entry) { @@ -2686,6 +2691,8 @@ static void pci_devices_present_work(struct work_struct *work) break; } + mutex_unlock(&hbus->state_lock); + kfree(dr); } @@ -2834,6 +2841,8 @@ static void hv_eject_device_work(struct work_struct *work) hpdev = container_of(work, struct hv_pci_dev, wrk); hbus = hpdev->hbus; + mutex_lock(&hbus->state_lock); + /* * Ejection can come before or after the PCI bus has been set up, so * attempt to find it and tear down the bus state, if it exists. This @@ -2870,6 +2879,8 @@ static void hv_eject_device_work(struct work_struct *work) put_pcichild(hpdev); put_pcichild(hpdev); /* hpdev has been freed. Do not use it any more. */ + + mutex_unlock(&hbus->state_lock); } /** @@ -3636,6 +3647,7 @@ static int hv_pci_probe(struct hv_device *hdev, return -ENOMEM; hbus->bridge = bridge; + mutex_init(&hbus->state_lock); hbus->state = hv_pcibus_init; hbus->wslot_res_allocated = -1; @@ -3745,9 +3757,11 @@ static int hv_pci_probe(struct hv_device *hdev, if (ret) goto free_irq_domain; + mutex_lock(&hbus->state_lock); + ret = hv_pci_enter_d0(hdev); if (ret) - goto free_irq_domain; + goto release_state_lock; ret = hv_pci_allocate_bridge_windows(hbus); if (ret) @@ -3765,12 +3779,15 @@ static int hv_pci_probe(struct hv_device *hdev, if (ret) goto free_windows; + mutex_unlock(&hbus->state_lock); return 0; free_windows: hv_pci_free_bridge_windows(hbus); exit_d0: (void) hv_pci_bus_exit(hdev, true); +release_state_lock: + mutex_unlock(&hbus->state_lock); free_irq_domain: irq_domain_remove(hbus->irq_domain); free_fwnode: @@ -4020,20 +4037,26 @@ static int hv_pci_resume(struct hv_device *hdev) if (ret) goto out; + mutex_lock(&hbus->state_lock); + ret = hv_pci_enter_d0(hdev); if (ret) - goto out; + goto release_state_lock; ret = hv_send_resources_allocated(hdev); if (ret) - goto out; + goto release_state_lock; prepopulate_bars(hbus); hv_pci_restore_msi_state(hbus); hbus->state = hv_pcibus_installed; + mutex_unlock(&hbus->state_lock); return 0; + +release_state_lock: + mutex_unlock(&hbus->state_lock); out: vmbus_close(hdev->channel); return ret; From f593a94b530aee4c7f2511c9e48eb495dff03991 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Fri, 16 Jun 2023 14:18:07 +0200 Subject: [PATCH 848/934] ieee802154/adf7242: Add MODULE_FIRMWARE macro The module loads firmware so add a MODULE_FIRMWARE macro to provide that information via modinfo. Signed-off-by: Juerg Haefliger Signed-off-by: David S. Miller --- drivers/net/ieee802154/adf7242.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index f9972b8140f9..a03490ba2e5b 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1348,3 +1348,5 @@ module_spi_driver(adf7242_driver); MODULE_AUTHOR("Michael Hennerich "); MODULE_DESCRIPTION("ADF7242 IEEE802.15.4 Transceiver Driver"); MODULE_LICENSE("GPL"); + +MODULE_FIRMWARE(FIRMWARE); From eb09fc2d14163c0c217846cfabec3d0cce7c8f8c Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Fri, 16 Jun 2023 14:22:18 +0200 Subject: [PATCH 849/934] nfc: fdp: Add MODULE_FIRMWARE macros The module loads firmware so add MODULE_FIRMWARE macros to provide that information via modinfo. Signed-off-by: Juerg Haefliger Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/fdp/fdp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index f12f903a9dd1..da3e2dce8e70 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -762,3 +762,6 @@ EXPORT_SYMBOL(fdp_nci_remove); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("NFC NCI driver for Intel Fields Peak NFC controller"); MODULE_AUTHOR("Robert Dolca "); + +MODULE_FIRMWARE(FDP_OTP_PATCH_NAME); +MODULE_FIRMWARE(FDP_RAM_PATCH_NAME); From 606c812eb1d5b5fb0dd9e330ca94b52d7c227830 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Sat, 17 Jun 2023 20:47:08 -0400 Subject: [PATCH 850/934] mm/mmap: Fix error path in do_vmi_align_munmap() The error unrolling was leaving the VMAs detached in many cases and leaving the locked_vm statistic altered, and skipping the unrolling entirely in the case of the vma tree write failing. Fix the error path by re-attaching the detached VMAs and adding the necessary goto for the failed vma tree write, and fix the locked_vm statistic by only updating after the vma tree write succeeds. Fixes: 763ecb035029 ("mm: remove the vma linked list") Reported-by: Vegard Nossum Signed-off-by: Liam R. Howlett Signed-off-by: Linus Torvalds --- mm/mmap.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 13678edaa22c..d600404580b2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2318,21 +2318,6 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, return __split_vma(vmi, vma, addr, new_below); } -static inline int munmap_sidetree(struct vm_area_struct *vma, - struct ma_state *mas_detach) -{ - vma_start_write(vma); - mas_set_range(mas_detach, vma->vm_start, vma->vm_end - 1); - if (mas_store_gfp(mas_detach, vma, GFP_KERNEL)) - return -ENOMEM; - - vma_mark_detached(vma, true); - if (vma->vm_flags & VM_LOCKED) - vma->vm_mm->locked_vm -= vma_pages(vma); - - return 0; -} - /* * do_vmi_align_munmap() - munmap the aligned region from @start to @end. * @vmi: The vma iterator @@ -2354,6 +2339,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, struct maple_tree mt_detach; int count = 0; int error = -ENOMEM; + unsigned long locked_vm = 0; MA_STATE(mas_detach, &mt_detach, 0, 0); mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); mt_set_external_lock(&mt_detach, &mm->mmap_lock); @@ -2399,9 +2385,13 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, if (error) goto end_split_failed; } - error = munmap_sidetree(next, &mas_detach); - if (error) - goto munmap_sidetree_failed; + vma_start_write(next); + mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1); + if (mas_store_gfp(&mas_detach, next, GFP_KERNEL)) + goto munmap_gather_failed; + vma_mark_detached(next, true); + if (next->vm_flags & VM_LOCKED) + locked_vm += vma_pages(next); count++; #ifdef CONFIG_DEBUG_VM_MAPLE_TREE @@ -2447,10 +2437,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, } #endif /* Point of no return */ + error = -ENOMEM; vma_iter_set(vmi, start); if (vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL)) - return -ENOMEM; + goto clear_tree_failed; + mm->locked_vm -= locked_vm; mm->map_count -= count; /* * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or @@ -2480,9 +2472,14 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, validate_mm(mm); return downgrade ? 1 : 0; +clear_tree_failed: userfaultfd_error: -munmap_sidetree_failed: +munmap_gather_failed: end_split_failed: + mas_set(&mas_detach, 0); + mas_for_each(&mas_detach, next, end) + vma_mark_detached(next, false); + __mt_destroy(&mt_detach); start_split_failed: map_count_exceeded: From c938ab4da0eb1620ae3243b0b24c572ddfc318fc Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 17 Jun 2023 17:55:00 +0200 Subject: [PATCH 851/934] net: phy: Manual remove LEDs to ensure correct ordering If the core is left to remove the LEDs via devm_, it is performed too late, after the PHY driver is removed from the PHY. This results in dereferencing a NULL pointer when the LED core tries to turn the LED off before destroying the LED. Manually unregister the LEDs at a safe point in phy_remove. Cc: stable@vger.kernel.org Reported-by: Florian Fainelli Suggested-by: Florian Fainelli Fixes: 01e5b728e9e4 ("net: phy: Add a binding for PHY LEDs") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 17d0d0555a79..53598210be6c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3021,6 +3021,15 @@ static int phy_led_blink_set(struct led_classdev *led_cdev, return err; } +static void phy_leds_unregister(struct phy_device *phydev) +{ + struct phy_led *phyled; + + list_for_each_entry(phyled, &phydev->leds, list) { + led_classdev_unregister(&phyled->led_cdev); + } +} + static int of_phy_led(struct phy_device *phydev, struct device_node *led) { @@ -3054,7 +3063,7 @@ static int of_phy_led(struct phy_device *phydev, init_data.fwnode = of_fwnode_handle(led); init_data.devname_mandatory = true; - err = devm_led_classdev_register_ext(dev, cdev, &init_data); + err = led_classdev_register_ext(dev, cdev, &init_data); if (err) return err; @@ -3083,6 +3092,7 @@ static int of_phy_leds(struct phy_device *phydev) err = of_phy_led(phydev, led); if (err) { of_node_put(led); + phy_leds_unregister(phydev); return err; } } @@ -3305,6 +3315,9 @@ static int phy_remove(struct device *dev) cancel_delayed_work_sync(&phydev->state_queue); + if (IS_ENABLED(CONFIG_PHYLIB_LEDS)) + phy_leds_unregister(phydev); + phydev->state = PHY_DOWN; sfp_bus_del_upstream(phydev->sfp_bus); From 45a3e24f65e90a047bef86f927ebdc4c710edaa1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Jun 2023 14:06:27 -0700 Subject: [PATCH 852/934] Linux 6.4-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0d3a9d3e73c1..b68b43c19072 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 92717c2356cb62c89e8a3dc37cbbab2502562524 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 14 Jun 2023 23:06:56 +0200 Subject: [PATCH 853/934] net: qca_spi: Avoid high load if QCA7000 is not available In case the QCA7000 is not available via SPI (e.g. in reset), the driver will cause a high load. The reason for this is that the synchronization is never finished and schedule() is never called. Since the synchronization is not timing critical, it's safe to drop this from the scheduling condition. Signed-off-by: Stefan Wahren Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/qca_spi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index c865a4be05ee..4a1b94e5a8ea 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -582,8 +582,7 @@ qcaspi_spi_thread(void *data) while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); if ((qca->intr_req == qca->intr_svc) && - (qca->txr.skb[qca->txr.head] == NULL) && - (qca->sync == QCASPI_SYNC_READY)) + !qca->txr.skb[qca->txr.head]) schedule(); set_current_state(TASK_RUNNING); From f334ad47683606b682b4166b800d8b372d315436 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sat, 17 Jun 2023 16:53:19 +0800 Subject: [PATCH 854/934] mmc: litex_mmc: set PROBE_PREFER_ASYNCHRONOUS mmc host drivers should have enabled the asynchronous probe option, but it seems like we didn't set it for litex_mmc when introducing litex mmc support, so let's set it now. Tested with linux-on-litex-vexriscv on sipeed tang nano 20K fpga. Signed-off-by: Jisheng Zhang Acked-by: Gabriel Somlo Fixes: 92e099104729 ("mmc: Add driver for LiteX's LiteSDCard interface") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230617085319.2139-1-jszhang@kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/litex_mmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/litex_mmc.c b/drivers/mmc/host/litex_mmc.c index 39c6707fdfdb..9af6b0902efe 100644 --- a/drivers/mmc/host/litex_mmc.c +++ b/drivers/mmc/host/litex_mmc.c @@ -649,6 +649,7 @@ static struct platform_driver litex_mmc_driver = { .driver = { .name = "litex-mmc", .of_match_table = litex_match, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; module_platform_driver(litex_mmc_driver); From 71150ac12558bcd9d75e6e24cf7c872c2efd80f3 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:11 +0300 Subject: [PATCH 855/934] mmc: bcm2835: fix deferred probing The driver overrides the error codes and IRQ0 returned by platform_get_irq() to -EINVAL, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Since commit ce753ad1549c ("platform: finally disallow IRQ0 in platform_get_irq() and its ilk") IRQ0 is no longer returned by those APIs, so we now can safely ignore it... Fixes: 660fc733bd74 ("mmc: bcm2835: Add new driver for the sdhost controller.") Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20230617203622.6812-2-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/bcm2835.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c index 8648f7e63ca1..eea208856ce0 100644 --- a/drivers/mmc/host/bcm2835.c +++ b/drivers/mmc/host/bcm2835.c @@ -1403,8 +1403,8 @@ static int bcm2835_probe(struct platform_device *pdev) host->max_clk = clk_get_rate(clk); host->irq = platform_get_irq(pdev, 0); - if (host->irq <= 0) { - ret = -EINVAL; + if (host->irq < 0) { + ret = host->irq; goto err; } From b8ada54fa1b83f3b6480d4cced71354301750153 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:12 +0300 Subject: [PATCH 856/934] mmc: meson-gx: fix deferred probing The driver overrides the error codes and IRQ0 returned by platform_get_irq() to -EINVAL, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Since commit ce753ad1549c ("platform: finally disallow IRQ0 in platform_get_irq() and its ilk") IRQ0 is no longer returned by those APIs, so we now can safely ignore it... Fixes: cbcaac6d7dd2 ("mmc: meson-gx-mmc: Fix platform_get_irq's error checking") Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Sergey Shtylyov Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20230617203622.6812-3-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index f90b0fd8d8b0..ee9a25b900ae 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -1186,8 +1186,8 @@ static int meson_mmc_probe(struct platform_device *pdev) return PTR_ERR(host->regs); host->irq = platform_get_irq(pdev, 0); - if (host->irq <= 0) - return -EINVAL; + if (host->irq < 0) + return host->irq; cd_irq = platform_get_irq_optional(pdev, 1); mmc_gpio_set_cd_irq(mmc, cd_irq); From 0c4dc0f054891a2cbde0426b0c0fdf232d89f47f Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:13 +0300 Subject: [PATCH 857/934] mmc: mtk-sd: fix deferred probing The driver overrides the error codes returned by platform_get_irq() to -EINVAL, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Fixes: 208489032bdd ("mmc: mediatek: Add Mediatek MMC driver") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20230617203622.6812-4-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index edade0e54a0c..9785ec91654f 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2680,7 +2680,7 @@ static int msdc_drv_probe(struct platform_device *pdev) host->irq = platform_get_irq(pdev, 0); if (host->irq < 0) { - ret = -EINVAL; + ret = host->irq; goto host_free; } From 8d84064da0d4672e74f984e8710f27881137472c Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:14 +0300 Subject: [PATCH 858/934] mmc: mvsdio: fix deferred probing The driver overrides the error codes returned by platform_get_irq() to -ENXIO, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Fixes: 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20230617203622.6812-5-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/mvsdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c index 629efbe639c4..b4f6a0a2fcb5 100644 --- a/drivers/mmc/host/mvsdio.c +++ b/drivers/mmc/host/mvsdio.c @@ -704,7 +704,7 @@ static int mvsd_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENXIO; + return irq; mmc = mmc_alloc_host(sizeof(struct mvsd_host), &pdev->dev); if (!mmc) { From aedf4ba1ad00aaa94c1b66c73ecaae95e2564b95 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:15 +0300 Subject: [PATCH 859/934] mmc: omap: fix deferred probing The driver overrides the error codes returned by platform_get_irq() to -ENXIO, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Fixes: 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20230617203622.6812-6-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index ce78edfb402b..86454f1182bb 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -1343,7 +1343,7 @@ static int mmc_omap_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENXIO; + return irq; host->virt_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(host->virt_base)) From fb51b74a57859b707c3e8055ed0c25a7ca4f6a29 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:16 +0300 Subject: [PATCH 860/934] mmc: omap_hsmmc: fix deferred probing The driver overrides the error codes returned by platform_get_irq() to -ENXIO, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Fixes: 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20230617203622.6812-7-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/omap_hsmmc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 517dde777413..1e0f2d7774bd 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -1791,9 +1791,11 @@ static int omap_hsmmc_probe(struct platform_device *pdev) } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - irq = platform_get_irq(pdev, 0); - if (res == NULL || irq < 0) + if (!res) return -ENXIO; + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(base)) From 3c482e1e830d79b9be8afb900a965135c01f7893 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:17 +0300 Subject: [PATCH 861/934] mmc: owl: fix deferred probing The driver overrides the error codes returned by platform_get_irq() to -EINVAL, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Fixes: ff65ffe46d28 ("mmc: Add Actions Semi Owl SoCs SD/MMC driver") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20230617203622.6812-8-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/owl-mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/owl-mmc.c b/drivers/mmc/host/owl-mmc.c index 6f9d31a886ba..1bf22b08b373 100644 --- a/drivers/mmc/host/owl-mmc.c +++ b/drivers/mmc/host/owl-mmc.c @@ -637,7 +637,7 @@ static int owl_mmc_probe(struct platform_device *pdev) owl_host->irq = platform_get_irq(pdev, 0); if (owl_host->irq < 0) { - ret = -EINVAL; + ret = owl_host->irq; goto err_release_channel; } From b465dea5e1540c7d7b5211adaf94926980d3014b Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:18 +0300 Subject: [PATCH 862/934] mmc: sdhci-acpi: fix deferred probing The driver overrides the error codes returned by platform_get_irq() to -EINVAL, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Fixes: 1b7ba57ecc86 ("mmc: sdhci-acpi: Handle return value of platform_get_irq") Signed-off-by: Sergey Shtylyov Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20230617203622.6812-9-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 8f0e639236b1..edf2e6c14dc6 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -829,7 +829,7 @@ static int sdhci_acpi_probe(struct platform_device *pdev) host->ops = &sdhci_acpi_ops_dflt; host->irq = platform_get_irq(pdev, 0); if (host->irq < 0) { - err = -EINVAL; + err = host->irq; goto err_free; } From 8d0caeedcd05a721f3cc2537b0ea212ec4027307 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:19 +0300 Subject: [PATCH 863/934] mmc: sdhci-spear: fix deferred probing The driver overrides the error codes and IRQ0 returned by platform_get_irq() to -EINVAL, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Since commit ce753ad1549c ("platform: finally disallow IRQ0 in platform_get_irq() and its ilk") IRQ0 is no longer returned by those APIs, so we now can safely ignore it... Fixes: 682798a596a6 ("mmc: sdhci-spear: Handle return value of platform_get_irq") Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Sergey Shtylyov Acked-by: Viresh Kumar Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20230617203622.6812-10-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-spear.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c index d463e2fd5b1a..c79035727b20 100644 --- a/drivers/mmc/host/sdhci-spear.c +++ b/drivers/mmc/host/sdhci-spear.c @@ -65,8 +65,8 @@ static int sdhci_probe(struct platform_device *pdev) host->hw_name = "sdhci"; host->ops = &sdhci_pltfm_ops; host->irq = platform_get_irq(pdev, 0); - if (host->irq <= 0) { - ret = -EINVAL; + if (host->irq < 0) { + ret = host->irq; goto err_host; } host->quirks = SDHCI_QUIRK_BROKEN_ADMA; From 5b067d7f855c61df7f8e2e8ccbcee133c282415e Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:20 +0300 Subject: [PATCH 864/934] mmc: sh_mmcif: fix deferred probing The driver overrides the error codes returned by platform_get_irq() to -ENXIO, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Fixes: 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20230617203622.6812-11-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/sh_mmcif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index 0fd4c9d644dd..5cf53348372a 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -1400,7 +1400,7 @@ static int sh_mmcif_probe(struct platform_device *pdev) irq[0] = platform_get_irq(pdev, 0); irq[1] = platform_get_irq_optional(pdev, 1); if (irq[0] < 0) - return -ENXIO; + return irq[0]; reg = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(reg)) From c2df53c5806cfd746dae08e07bc8c4ad247c3b70 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:21 +0300 Subject: [PATCH 865/934] mmc: sunxi: fix deferred probing The driver overrides the error codes and IRQ0 returned by platform_get_irq() to -EINVAL, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating the error codes upstream. Since commit ce753ad1549c ("platform: finally disallow IRQ0 in platform_get_irq() and its ilk") IRQ0 is no longer returned by those APIs, so we now can safely ignore it... Fixes: 2408a08583d2 ("mmc: sunxi-mmc: Handle return value of platform_get_irq") Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Sergey Shtylyov Reviewed-by: Jernej Skrabec Link: https://lore.kernel.org/r/20230617203622.6812-12-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/sunxi-mmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index 3db9f32d6a7b..69dcb8805e05 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -1350,8 +1350,8 @@ static int sunxi_mmc_resource_request(struct sunxi_mmc_host *host, return ret; host->irq = platform_get_irq(pdev, 0); - if (host->irq <= 0) { - ret = -EINVAL; + if (host->irq < 0) { + ret = host->irq; goto error_disable_mmc; } From 413db499730248431c1005b392e8ed82c4fa19bf Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 17 Jun 2023 23:36:22 +0300 Subject: [PATCH 866/934] mmc: usdhi60rol0: fix deferred probing The driver overrides the error codes returned by platform_get_irq_byname() to -ENODEV, so if it returns -EPROBE_DEFER, the driver will fail the probe permanently instead of the deferred probing. Switch to propagating error codes upstream. Fixes: 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq") Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20230617203622.6812-13-s.shtylyov@omp.ru Signed-off-by: Ulf Hansson --- drivers/mmc/host/usdhi6rol0.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c index 2f59917b105e..2e17903658fc 100644 --- a/drivers/mmc/host/usdhi6rol0.c +++ b/drivers/mmc/host/usdhi6rol0.c @@ -1757,8 +1757,10 @@ static int usdhi6_probe(struct platform_device *pdev) irq_cd = platform_get_irq_byname(pdev, "card detect"); irq_sd = platform_get_irq_byname(pdev, "data"); irq_sdio = platform_get_irq_byname(pdev, "SDIO"); - if (irq_sd < 0 || irq_sdio < 0) - return -ENODEV; + if (irq_sd < 0) + return irq_sd; + if (irq_sdio < 0) + return irq_sdio; mmc = mmc_alloc_host(sizeof(struct usdhi6_host), dev); if (!mmc) From 1f1ef7e5bbe2de125da413e238915f6047ea4ba2 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 14 Jun 2023 15:21:16 +0100 Subject: [PATCH 867/934] ASoC: intel: sof_sdw: Fixup typo in device link checking The loop checking for multiple different devices on a single sdw link contains a typo accidentally using i twice instead of j. Correct to the correct index variable. Fixes: dc5a3e60a4b5 ("ASoC: Intel: sof_sdw: append codec type to dai link name") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20230614142116.1059677-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 6faf4a43eaf5..144f082c63fd 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -1347,7 +1347,7 @@ static int sof_card_dai_links_create(struct device *dev, if ((SDW_PART_ID(adr_link->adr_d[i].adr) != SDW_PART_ID(adr_link->adr_d[j].adr)) || (SDW_MFG_ID(adr_link->adr_d[i].adr) != - SDW_MFG_ID(adr_link->adr_d[i].adr))) { + SDW_MFG_ID(adr_link->adr_d[j].adr))) { append_codec_type = true; goto out; } From 555434fd5c6b3589d9511ab6e88faf50346e19da Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Mon, 19 Jun 2023 18:03:20 +1200 Subject: [PATCH 868/934] ALSA: hda/realtek: Add quirk for ASUS ROG G634Z Adds the required quirk to enable the Cirrus amp and correct pins on the ASUS ROG G634Z series. While this works if the related _DSD properties are made available, these aren't included in the ACPI of these laptops (yet). Signed-off-by: Luke D. Jones Link: https://lore.kernel.org/r/20230619060320.1336455-1-luke@ljones.dev Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 308ec7034cc9..17d5bdd4be6f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9552,6 +9552,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS), + SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), From ff7a1790fbf92f1bdd0966d3f0da3ea808ede876 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 19 Jun 2023 10:56:07 +0200 Subject: [PATCH 869/934] gpiolib: Fix irq_domain resource tracking for gpiochip_irqchip_add_domain() Up until commit 6a45b0e2589f ("gpiolib: Introduce gpiochip_irqchip_add_domain()") all irq_domains were allocated by gpiolib itself and thus gpiolib also takes care of freeing it. With gpiochip_irqchip_add_domain() a user of gpiolib can associate an irq_domain with the gpio_chip. This irq_domain is not managed by gpiolib and therefore must not be freed by gpiolib. Fixes: 6a45b0e2589f ("gpiolib: Introduce gpiochip_irqchip_add_domain()") Reported-by: Jiawen Wu Signed-off-by: Michael Walle Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 3 ++- include/linux/gpio/driver.h | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 9ecf93cbd801..5be8ad61523e 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1745,7 +1745,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gc) } /* Remove all IRQ mappings and delete the domain */ - if (gc->irq.domain) { + if (!gc->irq.domain_is_allocated_externally && gc->irq.domain) { unsigned int irq; for (offset = 0; offset < gc->ngpio; offset++) { @@ -1791,6 +1791,7 @@ int gpiochip_irqchip_add_domain(struct gpio_chip *gc, gc->to_irq = gpiochip_to_irq; gc->irq.domain = domain; + gc->irq.domain_is_allocated_externally = true; /* * Using barrier() here to prevent compiler from reordering diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 5c6db5533be6..67b8774eed8f 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -251,6 +251,14 @@ struct gpio_irq_chip { */ bool initialized; + /** + * @domain_is_allocated_externally: + * + * True it the irq_domain was allocated outside of gpiolib, in which + * case gpiolib won't free the irq_domain itself. + */ + bool domain_is_allocated_externally; + /** * @init_hw: optional routine to initialize hardware before * an IRQ chip will be added. This is quite useful when From a2b6f2ab3e144f8e23666aafeba0e4d9ea4b7975 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 7 Jun 2023 13:41:19 -0700 Subject: [PATCH 870/934] afs: Fix dangling folio ref counts in writeback Commit acc8d8588cb7 converted afs_writepages_region() to write back a folio batch. If writeback needs rescheduling, the function exits without dropping the references to the folios in fbatch. This patch fixes that. [DH: Moved the added line before the _leave()] Fixes: acc8d8588cb7 ("afs: convert afs_writepages_region() to use filemap_get_folios_tag()") Signed-off-by: Vishal Moola (Oracle) Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/20230607204120.89416-1-vishal.moola@gmail.com/ --- fs/afs/write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/write.c b/fs/afs/write.c index c822d6006033..fd433024070e 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -763,6 +763,7 @@ static int afs_writepages_region(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_NONE) { if (skips >= 5 || need_resched()) { *_next = start; + folio_batch_release(&fbatch); _leave(" = 0 [%llx]", *_next); return 0; } From 819da022dd007398d0c42ebcd8dbb1b681acea53 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 7 Jun 2023 13:41:20 -0700 Subject: [PATCH 871/934] afs: Fix waiting for writeback then skipping folio Commit acc8d8588cb7 converted afs_writepages_region() to write back a folio batch. The function waits for writeback to a folio, but then proceeds to the rest of the batch without trying to write that folio again. This patch fixes has it attempt to write the folio again. [DH: Also remove an 'else' that adding a goto makes redundant] Fixes: acc8d8588cb7 ("afs: convert afs_writepages_region() to use filemap_get_folios_tag()") Signed-off-by: Vishal Moola (Oracle) Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/20230607204120.89416-2-vishal.moola@gmail.com/ --- fs/afs/write.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index fd433024070e..8750b99c3f56 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -731,6 +731,7 @@ static int afs_writepages_region(struct address_space *mapping, * (changing page->mapping to NULL), or even swizzled * back from swapper_space to tmpfs file mapping */ +try_again: if (wbc->sync_mode != WB_SYNC_NONE) { ret = folio_lock_killable(folio); if (ret < 0) { @@ -757,9 +758,10 @@ static int afs_writepages_region(struct address_space *mapping, #ifdef CONFIG_AFS_FSCACHE folio_wait_fscache(folio); #endif - } else { - start += folio_size(folio); + goto try_again; } + + start += folio_size(folio); if (wbc->sync_mode == WB_SYNC_NONE) { if (skips >= 5 || need_resched()) { *_next = start; From d7fce52fdf96663ddc2eb21afecff3775588612a Mon Sep 17 00:00:00 2001 From: Terin Stock Date: Fri, 9 Jun 2023 22:58:42 +0200 Subject: [PATCH 872/934] ipvs: align inner_mac_header for encapsulation When using encapsulation the original packet's headers are copied to the inner headers. This preserves the space for an inner mac header, which is not used by the inner payloads for the encapsulation types supported by IPVS. If a packet is using GUE or GRE encapsulation and needs to be segmented, flow can be passed to __skb_udp_tunnel_segment() which calculates a negative tunnel header length. A negative tunnel header length causes pskb_may_pull() to fail, dropping the packet. This can be observed by attaching probes to ip_vs_in_hook(), __dev_queue_xmit(), and __skb_udp_tunnel_segment(): perf probe --add '__dev_queue_xmit skb->inner_mac_header \ skb->inner_network_header skb->mac_header skb->network_header' perf probe --add '__skb_udp_tunnel_segment:7 tnl_hlen' perf probe -m ip_vs --add 'ip_vs_in_hook skb->inner_mac_header \ skb->inner_network_header skb->mac_header skb->network_header' These probes the headers and tunnel header length for packets which traverse the IPVS encapsulation path. A TCP packet can be forced into the segmentation path by being smaller than a calculated clamped MSS, but larger than the advertised MSS. probe:ip_vs_in_hook: inner_mac_header=0x0 inner_network_header=0x0 mac_header=0x44 network_header=0x52 probe:ip_vs_in_hook: inner_mac_header=0x44 inner_network_header=0x52 mac_header=0x44 network_header=0x32 probe:dev_queue_xmit: inner_mac_header=0x44 inner_network_header=0x52 mac_header=0x44 network_header=0x32 probe:__skb_udp_tunnel_segment_L7: tnl_hlen=-2 When using veth-based encapsulation, the interfaces are set to be mac-less, which does not preserve space for an inner mac header. This prevents this issue from occurring. In our real-world testing of sending a 32KB file we observed operation time increasing from ~75ms for veth-based encapsulation to over 1.5s using IPVS encapsulation due to retries from dropped packets. This changeset modifies the packet on the encapsulation path in ip_vs_tunnel_xmit() and ip_vs_tunnel_xmit_v6() to remove the inner mac header offset. This fixes UDP segmentation for both encapsulation types, and corrects the inner headers for any IPIP flows that may use it. Fixes: 84c0d5e96f3a ("ipvs: allow tunneling with gue encapsulation") Signed-off-by: Terin Stock Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_xmit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index feb1d7fcb09f..a80b960223e1 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -1207,6 +1207,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->transport_header = skb->network_header; skb_set_inner_ipproto(skb, next_protocol); + skb_set_inner_mac_header(skb, skb_inner_network_offset(skb)); if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { bool check = false; @@ -1349,6 +1350,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->transport_header = skb->network_header; skb_set_inner_ipproto(skb, next_protocol); + skb_set_inner_mac_header(skb, skb_inner_network_offset(skb)); if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { bool check = false; From 7257d930aadcd62d1c7971ab14f3b1126356abdc Mon Sep 17 00:00:00 2001 From: Teresa Remmet Date: Wed, 14 Jun 2023 14:52:40 +0200 Subject: [PATCH 873/934] regulator: pca9450: Fix LDO3OUT and LDO4OUT MASK L3_OUT and L4_OUT Bit fields range from Bit 0:4 and thus the mask should be 0x1F instead of 0x0F. Fixes: 0935ff5f1f0a ("regulator: pca9450: add pca9450 pmic driver") Signed-off-by: Teresa Remmet Reviewed-by: Frieder Schrempf Link: https://lore.kernel.org/r/20230614125240.3946519-1-t.remmet@phytec.de Signed-off-by: Mark Brown --- include/linux/regulator/pca9450.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index 3c01c2bf84f5..505c908dbb81 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -196,11 +196,11 @@ enum { /* PCA9450_REG_LDO3_VOLT bits */ #define LDO3_EN_MASK 0xC0 -#define LDO3OUT_MASK 0x0F +#define LDO3OUT_MASK 0x1F /* PCA9450_REG_LDO4_VOLT bits */ #define LDO4_EN_MASK 0xC0 -#define LDO4OUT_MASK 0x0F +#define LDO4OUT_MASK 0x1F /* PCA9450_REG_LDO5_VOLT bits */ #define LDO5L_EN_MASK 0xC0 From 85d38d5810e285d5aec7fb5283107d1da70c12a9 Mon Sep 17 00:00:00 2001 From: Dheeraj Kumar Srivastava Date: Sat, 17 Jun 2023 02:52:36 +0530 Subject: [PATCH 874/934] x86/apic: Fix kernel panic when booting with intremap=off and x2apic_phys When booting with "intremap=off" and "x2apic_phys" on the kernel command line, the physical x2APIC driver ends up being used even when x2APIC mode is disabled ("intremap=off" disables x2APIC mode). This happens because the first compound condition check in x2apic_phys_probe() is false due to x2apic_mode == 0 and so the following one returns true after default_acpi_madt_oem_check() having already selected the physical x2APIC driver. This results in the following panic: kernel BUG at arch/x86/kernel/apic/io_apic.c:2409! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.4.0-rc2-ver4.1rc2 #2 Hardware name: Dell Inc. PowerEdge R6515/07PXPY, BIOS 2.3.6 07/06/2021 RIP: 0010:setup_IO_APIC+0x9c/0xaf0 Call Trace: ? native_read_msr apic_intr_mode_init x86_late_time_init start_kernel x86_64_start_reservations x86_64_start_kernel secondary_startup_64_no_verify which is: setup_IO_APIC: apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); for_each_ioapic(ioapic) BUG_ON(mp_irqdomain_create(ioapic)); Return 0 to denote that x2APIC has not been enabled when probing the physical x2APIC driver. [ bp: Massage commit message heavily. ] Fixes: 9ebd680bd029 ("x86, apic: Use probe routines to simplify apic selection") Signed-off-by: Dheeraj Kumar Srivastava Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Kishon Vijay Abraham I Reviewed-by: Vasant Hegde Reviewed-by: Cyrill Gorcunov Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230616212236.1389-1-dheerajkumar.srivastava@amd.com --- arch/x86/kernel/apic/x2apic_phys.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 6bde05a86b4e..896bc41cb2ba 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -97,7 +97,10 @@ static void init_x2apic_ldr(void) static int x2apic_phys_probe(void) { - if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) + if (!x2apic_mode) + return 0; + + if (x2apic_phys || x2apic_fadt_phys()) return 1; return apic == &apic_x2apic_phys; From 9a43827e876c9a071826cc81783aa2222b020f1d Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Fri, 16 Jun 2023 14:14:14 +0300 Subject: [PATCH 875/934] net: dpaa2-mac: add 25gbase-r support Layerscape MACs support 25Gbps network speed with dpmac "CAUI" mode. Add the mappings between DPMAC_ETH_IF_* and HY_INTERFACE_MODE_*, as well as the 25000 mac capability. Tested on SolidRun LX2162a Clearfog, serdes 1 protocol 18. Signed-off-by: Josua Mayer Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index b1871e6c4006..00e50bd30189 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -54,6 +54,9 @@ static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode) case DPMAC_ETH_IF_XFI: *if_mode = PHY_INTERFACE_MODE_10GBASER; break; + case DPMAC_ETH_IF_CAUI: + *if_mode = PHY_INTERFACE_MODE_25GBASER; + break; default: return -EINVAL; } @@ -79,6 +82,8 @@ static enum dpmac_eth_if dpmac_eth_if_mode(phy_interface_t if_mode) return DPMAC_ETH_IF_XFI; case PHY_INTERFACE_MODE_1000BASEX: return DPMAC_ETH_IF_1000BASEX; + case PHY_INTERFACE_MODE_25GBASER: + return DPMAC_ETH_IF_CAUI; default: return DPMAC_ETH_IF_MII; } @@ -418,7 +423,7 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac) mac->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE | MAC_10FD | MAC_100FD | MAC_1000FD | MAC_2500FD | MAC_5000FD | - MAC_10000FD; + MAC_10000FD | MAC_25000FD; dpaa2_mac_set_supported_interfaces(mac); From ff221029a51fd54cacac66e193e0c75e4de940e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 17 Jun 2023 09:26:44 +0300 Subject: [PATCH 876/934] net: dsa: mt7530: set all CPU ports in MT7531_CPU_PMAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MT7531_CPU_PMAP represents the destination port mask for trapped-to-CPU frames (further restricted by PCR_MATRIX). Currently the driver sets the first CPU port as the single port in this bit mask, which works fine regardless of whether the device tree defines port 5, 6 or 5+6 as CPU ports. This is because the logic coincides with DSA's logic of picking the first CPU port as the CPU port that all user ports are affine to, by default. An upcoming change would like to influence DSA's selection of the default CPU port to no longer be the first one, and in that case, this logic needs adaptation. Since there is no observed leakage or duplication of frames if all CPU ports are defined in this bit mask, simply include them all. Suggested-by: Russell King (Oracle) Suggested-by: Vladimir Oltean Signed-off-by: Arınç ÜNAL Reviewed-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 15 ++++++++------- drivers/net/dsa/mt7530.h | 1 + 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 9bc54e1348cb..f8503155f179 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1010,6 +1010,13 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) if (priv->id == ID_MT7621) mt7530_rmw(priv, MT7530_MFC, CPU_MASK, CPU_EN | CPU_PORT(port)); + /* Add the CPU port to the CPU port bitmap for MT7531 and the switch on + * the MT7988 SoC. Trapped frames will be forwarded to the CPU port that + * is affine to the inbound user port. + */ + if (priv->id == ID_MT7531 || priv->id == ID_MT7988) + mt7530_set(priv, MT7531_CFC, MT7531_CPU_PMAP(BIT(port))); + /* CPU port gets connected to all user ports of * the switch. */ @@ -2352,15 +2359,9 @@ static int mt7531_setup_common(struct dsa_switch *ds) { struct mt7530_priv *priv = ds->priv; - struct dsa_port *cpu_dp; int ret, i; - /* BPDU to CPU port */ - dsa_switch_for_each_cpu_port(cpu_dp, ds) { - mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK, - BIT(cpu_dp->index)); - break; - } + /* Trap BPDUs to the CPU port(s) */ mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, MT753X_BPDU_CPU_ONLY); diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 5084f48a8869..e590cf43f3ae 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -54,6 +54,7 @@ enum mt753x_id { #define MT7531_MIRROR_PORT_GET(x) (((x) >> 16) & MIRROR_MASK) #define MT7531_MIRROR_PORT_SET(x) (((x) & MIRROR_MASK) << 16) #define MT7531_CPU_PMAP_MASK GENMASK(7, 0) +#define MT7531_CPU_PMAP(x) FIELD_PREP(MT7531_CPU_PMAP_MASK, x) #define MT753X_MIRROR_REG(id) ((((id) == ID_MT7531) || ((id) == ID_MT7988)) ? \ MT7531_CFC : MT7530_MFC) From 4ae90f90e4909e3014e2dc6a0627964617a7b824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 17 Jun 2023 09:26:45 +0300 Subject: [PATCH 877/934] net: dsa: mt7530: fix trapping frames on non-MT7621 SoC MT7530 switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All MT7530 switch IP variants share the MT7530_MFC register, but the current driver only writes it for the switch variant that is integrated in the MT7621 SoC. Modify the code to include all MT7530 derivatives. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Suggested-by: Vladimir Oltean Signed-off-by: Arınç ÜNAL Reviewed-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index f8503155f179..8c9acf109a4e 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1007,7 +1007,7 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) UNU_FFP(BIT(port))); /* Set CPU port number */ - if (priv->id == ID_MT7621) + if (priv->id == ID_MT7530 || priv->id == ID_MT7621) mt7530_rmw(priv, MT7530_MFC, CPU_MASK, CPU_EN | CPU_PORT(port)); /* Add the CPU port to the CPU port bitmap for MT7531 and the switch on From d7c66073559386b836bded7cdc8b66ee5c049129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 17 Jun 2023 09:26:46 +0300 Subject: [PATCH 878/934] net: dsa: mt7530: fix handling of BPDUs on MT7530 switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BPDUs are link-local frames, therefore they must be trapped to the CPU port. Currently, the MT7530 switch treats BPDUs as regular multicast frames, therefore flooding them to user ports. To fix this, set BPDUs to be trapped to the CPU port. Group this on mt7530_setup() and mt7531_setup_common() into mt753x_trap_frames() and call that. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Reviewed-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 8c9acf109a4e..5e4f6965cebd 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -985,6 +985,14 @@ unlock_exit: mutex_unlock(&priv->reg_mutex); } +static void +mt753x_trap_frames(struct mt7530_priv *priv) +{ + /* Trap BPDUs to the CPU port(s) */ + mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, + MT753X_BPDU_CPU_ONLY); +} + static int mt753x_cpu_port_enable(struct dsa_switch *ds, int port) { @@ -2262,6 +2270,8 @@ mt7530_setup(struct dsa_switch *ds) priv->p6_interface = PHY_INTERFACE_MODE_NA; + mt753x_trap_frames(priv); + /* Enable and reset MIB counters */ mt7530_mib_reset(ds); @@ -2361,9 +2371,7 @@ mt7531_setup_common(struct dsa_switch *ds) struct mt7530_priv *priv = ds->priv; int ret, i; - /* Trap BPDUs to the CPU port(s) */ - mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, - MT753X_BPDU_CPU_ONLY); + mt753x_trap_frames(priv); /* Enable and reset MIB counters */ mt7530_mib_reset(ds); From 8332cf6fd7c7087dbc2067115b33979c9851bbc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 17 Jun 2023 09:26:47 +0300 Subject: [PATCH 879/934] net: dsa: mt7530: fix handling of LLDP frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LLDP frames are link-local frames, therefore they must be trapped to the CPU port. Currently, the MT753X switches treat LLDP frames as regular multicast frames, therefore flooding them to user ports. To fix this, set LLDP frames to be trapped to the CPU port(s). Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: Arınç ÜNAL Reviewed-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 4 ++++ drivers/net/dsa/mt7530.h | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 5e4f6965cebd..6d6ff293900c 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -991,6 +991,10 @@ mt753x_trap_frames(struct mt7530_priv *priv) /* Trap BPDUs to the CPU port(s) */ mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, MT753X_BPDU_CPU_ONLY); + + /* Trap LLDP frames with :0E MAC DA to the CPU port(s) */ + mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_PORT_FW_MASK, + MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY)); } static int diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index e590cf43f3ae..08045b035e6a 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -67,6 +67,11 @@ enum mt753x_id { #define MT753X_BPC 0x24 #define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) +/* Register for :03 and :0E MAC DA frame control */ +#define MT753X_RGAC2 0x2c +#define MT753X_R0E_PORT_FW_MASK GENMASK(18, 16) +#define MT753X_R0E_PORT_FW(x) FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x) + enum mt753x_bpdu_port_fw { MT753X_BPDU_FOLLOW_MFC, MT753X_BPDU_CPU_EXCLUDE = 4, From b79d7c14f48083abb3fb061370c0c64a569edf4c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 17 Jun 2023 09:26:48 +0300 Subject: [PATCH 880/934] net: dsa: introduce preferred_default_local_cpu_port and use on MT7530 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the introduction of the OF bindings, DSA has always had a policy that in case multiple CPU ports are present in the device tree, the numerically smallest one is always chosen. The MT7530 switch family, except the switch on the MT7988 SoC, has 2 CPU ports, 5 and 6, where port 6 is preferable on the MT7531BE switch because it has higher bandwidth. The MT7530 driver developers had 3 options: - to modify DSA when the MT7531 switch support was introduced, such as to prefer the better port - to declare both CPU ports in device trees as CPU ports, and live with the sub-optimal performance resulting from not preferring the better port - to declare just port 6 in the device tree as a CPU port Of course they chose the path of least resistance (3rd option), kicking the can down the road. The hardware description in the device tree is supposed to be stable - developers are not supposed to adopt the strategy of piecemeal hardware description, where the device tree is updated in lockstep with the features that the kernel currently supports. Now, as a result of the fact that they did that, any attempts to modify the device tree and describe both CPU ports as CPU ports would make DSA change its default selection from port 6 to 5, effectively resulting in a performance degradation visible to users with the MT7531BE switch as can be seen below. Without preferring port 6: [ ID][Role] Interval Transfer Bitrate Retr [ 5][TX-C] 0.00-20.00 sec 374 MBytes 157 Mbits/sec 734 sender [ 5][TX-C] 0.00-20.00 sec 373 MBytes 156 Mbits/sec receiver [ 7][RX-C] 0.00-20.00 sec 1.81 GBytes 778 Mbits/sec 0 sender [ 7][RX-C] 0.00-20.00 sec 1.81 GBytes 777 Mbits/sec receiver With preferring port 6: [ ID][Role] Interval Transfer Bitrate Retr [ 5][TX-C] 0.00-20.00 sec 1.99 GBytes 856 Mbits/sec 273 sender [ 5][TX-C] 0.00-20.00 sec 1.99 GBytes 855 Mbits/sec receiver [ 7][RX-C] 0.00-20.00 sec 1.72 GBytes 737 Mbits/sec 15 sender [ 7][RX-C] 0.00-20.00 sec 1.71 GBytes 736 Mbits/sec receiver Using one port for WAN and the other ports for LAN is a very popular use case which is what this test emulates. As such, this change proposes that we retroactively modify stable kernels (which don't support the modification of the CPU port assignments, so as to let user space fix the problem and restore the throughput) to keep the mt7530 driver preferring port 6 even with device trees where the hardware is more fully described. Fixes: c288575f7810 ("net: dsa: mt7530: Add the support of MT7531 switch") Signed-off-by: Vladimir Oltean Signed-off-by: Arınç ÜNAL Reviewed-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 15 +++++++++++++++ include/net/dsa.h | 8 ++++++++ net/dsa/dsa.c | 24 +++++++++++++++++++++++- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 6d6ff293900c..7e773c4ba046 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -399,6 +399,20 @@ static void mt7530_pll_setup(struct mt7530_priv *priv) core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); } +/* If port 6 is available as a CPU port, always prefer that as the default, + * otherwise don't care. + */ +static struct dsa_port * +mt753x_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp = dsa_to_port(ds, 6); + + if (dsa_port_is_cpu(cpu_dp)) + return cpu_dp; + + return NULL; +} + /* Setup port 6 interface mode and TRGMII TX circuit */ static int mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface) @@ -3098,6 +3112,7 @@ static int mt7988_setup(struct dsa_switch *ds) const struct dsa_switch_ops mt7530_switch_ops = { .get_tag_protocol = mtk_get_tag_protocol, .setup = mt753x_setup, + .preferred_default_local_cpu_port = mt753x_preferred_default_local_cpu_port, .get_strings = mt7530_get_strings, .get_ethtool_stats = mt7530_get_ethtool_stats, .get_sset_count = mt7530_get_sset_count, diff --git a/include/net/dsa.h b/include/net/dsa.h index 8903053fa5aa..ab0f0a5b0860 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -958,6 +958,14 @@ struct dsa_switch_ops { struct phy_device *phy); void (*port_disable)(struct dsa_switch *ds, int port); + /* + * Compatibility between device trees defining multiple CPU ports and + * drivers which are not OK to use by default the numerically smallest + * CPU port of a switch for its local ports. This can return NULL, + * meaning "don't know/don't care". + */ + struct dsa_port *(*preferred_default_local_cpu_port)(struct dsa_switch *ds); + /* * Port's MAC EEE settings */ diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ab1afe67fd18..1afed89e03c0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -403,6 +403,24 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return 0; } +static struct dsa_port * +dsa_switch_preferred_default_local_cpu_port(struct dsa_switch *ds) +{ + struct dsa_port *cpu_dp; + + if (!ds->ops->preferred_default_local_cpu_port) + return NULL; + + cpu_dp = ds->ops->preferred_default_local_cpu_port(ds); + if (!cpu_dp) + return NULL; + + if (WARN_ON(!dsa_port_is_cpu(cpu_dp) || cpu_dp->ds != ds)) + return NULL; + + return cpu_dp; +} + /* Perform initial assignment of CPU ports to user ports and DSA links in the * fabric, giving preference to CPU ports local to each switch. Default to * using the first CPU port in the switch tree if the port does not have a CPU @@ -410,12 +428,16 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) */ static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) { - struct dsa_port *cpu_dp, *dp; + struct dsa_port *preferred_cpu_dp, *cpu_dp, *dp; list_for_each_entry(cpu_dp, &dst->ports, list) { if (!dsa_port_is_cpu(cpu_dp)) continue; + preferred_cpu_dp = dsa_switch_preferred_default_local_cpu_port(cpu_dp->ds); + if (preferred_cpu_dp && preferred_cpu_dp != cpu_dp) + continue; + /* Prefer a local CPU port */ dsa_switch_for_each_port(dp, cpu_dp->ds) { /* Prefer the first local CPU port found */ From 94d12d88b4a849765bf7185c876c6c672ad4714e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Sat, 17 Jun 2023 09:26:49 +0300 Subject: [PATCH 881/934] MAINTAINERS: add me as maintainer of MEDIATEK SWITCH DRIVER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add me as a maintainer of the MediaTek MT7530 DSA subdriver. List maintainers in alphabetical order by first name. Signed-off-by: Arınç ÜNAL Reviewed-by: Vladimir Oltean Reviewed-by: Matthias Brugger Reviewed-by: Matthias Brugger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 82203351ea38..5a7f13e6c6c1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13270,10 +13270,11 @@ F: drivers/memory/mtk-smi.c F: include/soc/mediatek/smi.h MEDIATEK SWITCH DRIVER -M: Sean Wang +M: Arınç ÜNAL +M: Daniel Golle M: Landen Chao M: DENG Qingfang -M: Daniel Golle +M: Sean Wang L: netdev@vger.kernel.org S: Maintained F: drivers/net/dsa/mt7530-mdio.c From 8d0d16a3ef44c60d66f937c21f32fa2cf8f2c9ed Mon Sep 17 00:00:00 2001 From: Pranjal Ramajor Asha Kanojiya Date: Wed, 14 Jun 2023 10:15:28 -0600 Subject: [PATCH 882/934] accel/qaic: Call DRM helper function to destroy prime GEM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit smatch warning: drivers/accel/qaic/qaic_data.c:620 qaic_free_object() error: dereferencing freed memory 'obj->import_attach' obj->import_attach is detached and freed using dma_buf_detach(). But used after free to decrease the dmabuf ref count using dma_buf_put(). drm_prime_gem_destroy() handles this issue and performs the proper clean up instead of open coding it in the driver. Fixes: ff13be830333 ("accel/qaic: Add datapath") Reported-by: Sukrut Bellary Closes: https://lore.kernel.org/all/20230610021200.377452-1-sukrut.bellary@linux.com/ Suggested-by: Christian König Signed-off-by: Pranjal Ramajor Asha Kanojiya Reviewed-by: Carl Vanderlip Reviewed-by: Jeffrey Hugo Signed-off-by: Jeffrey Hugo Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230614161528.11710-1-quic_jhugo@quicinc.com --- drivers/accel/qaic/qaic_data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index e42c1f9ffff8..e9a1cb779b30 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -616,8 +617,7 @@ static void qaic_free_object(struct drm_gem_object *obj) if (obj->import_attach) { /* DMABUF/PRIME Path */ - dma_buf_detach(obj->import_attach->dmabuf, obj->import_attach); - dma_buf_put(obj->import_attach->dmabuf); + drm_prime_gem_destroy(obj, NULL); } else { /* Private buffer allocation path */ qaic_free_sgt(bo->sgt); From a7299a18a179a9713651fce9ad00972a633c14a9 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 20 Jun 2023 17:57:31 +0800 Subject: [PATCH 883/934] btrfs: fix u32 overflows when left shifting stripe_nr [BUG] David reported an ASSERT() get triggered during fio load on 8 devices with data/raid6 and metadata/raid1c3: fio --rw=randrw --randrepeat=1 --size=3000m \ --bsrange=512b-64k --bs_unaligned \ --ioengine=libaio --fsync=1024 \ --name=job0 --name=job1 \ The ASSERT() is from rbio_add_bio() of raid56.c: ASSERT(orig_logical >= full_stripe_start && orig_logical + orig_len <= full_stripe_start + rbio->nr_data * BTRFS_STRIPE_LEN); Which is checking if the target rbio is crossing the full stripe boundary. [100.789] assertion failed: orig_logical >= full_stripe_start && orig_logical + orig_len <= full_stripe_start + rbio->nr_data * BTRFS_STRIPE_LEN, in fs/btrfs/raid56.c:1622 [100.795] ------------[ cut here ]------------ [100.796] kernel BUG at fs/btrfs/raid56.c:1622! [100.797] invalid opcode: 0000 [#1] PREEMPT SMP KASAN [100.798] CPU: 1 PID: 100 Comm: kworker/u8:4 Not tainted 6.4.0-rc6-default+ #124 [100.799] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014 [100.802] Workqueue: writeback wb_workfn (flush-btrfs-1) [100.803] RIP: 0010:rbio_add_bio+0x204/0x210 [btrfs] [100.806] RSP: 0018:ffff888104a8f300 EFLAGS: 00010246 [100.808] RAX: 00000000000000a1 RBX: ffff8881075907e0 RCX: ffffed1020951e01 [100.809] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 0000000000000001 [100.811] RBP: 0000000141d20000 R08: 0000000000000001 R09: ffff888104a8f04f [100.813] R10: ffffed1020951e09 R11: 0000000000000003 R12: ffff88810e87f400 [100.815] R13: 0000000041d20000 R14: 0000000144529000 R15: ffff888101524000 [100.817] FS: 0000000000000000(0000) GS:ffff88811ac00000(0000) knlGS:0000000000000000 [100.821] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [100.822] CR2: 000055d54e44c270 CR3: 000000010a9a1006 CR4: 00000000003706a0 [100.824] Call Trace: [100.825] [100.825] ? die+0x32/0x80 [100.826] ? do_trap+0x12d/0x160 [100.827] ? rbio_add_bio+0x204/0x210 [btrfs] [100.827] ? rbio_add_bio+0x204/0x210 [btrfs] [100.829] ? do_error_trap+0x90/0x130 [100.830] ? rbio_add_bio+0x204/0x210 [btrfs] [100.831] ? handle_invalid_op+0x2c/0x30 [100.833] ? rbio_add_bio+0x204/0x210 [btrfs] [100.835] ? exc_invalid_op+0x29/0x40 [100.836] ? asm_exc_invalid_op+0x16/0x20 [100.837] ? rbio_add_bio+0x204/0x210 [btrfs] [100.837] raid56_parity_write+0x64/0x270 [btrfs] [100.838] btrfs_submit_chunk+0x26e/0x800 [btrfs] [100.840] ? btrfs_bio_init+0x80/0x80 [btrfs] [100.841] ? release_pages+0x503/0x6d0 [100.842] ? folio_unlock+0x2f/0x60 [100.844] ? __folio_put+0x60/0x60 [100.845] ? btrfs_do_readpage+0xae0/0xae0 [btrfs] [100.847] btrfs_submit_bio+0x21/0x60 [btrfs] [100.847] submit_one_bio+0x6a/0xb0 [btrfs] [100.849] extent_write_cache_pages+0x395/0x680 [btrfs] [100.850] ? __extent_writepage+0x520/0x520 [btrfs] [100.851] ? mark_usage+0x190/0x190 [100.852] extent_writepages+0xdb/0x130 [btrfs] [100.853] ? extent_write_locked_range+0x480/0x480 [btrfs] [100.854] ? mark_usage+0x190/0x190 [100.854] ? attach_extent_buffer_page+0x220/0x220 [btrfs] [100.855] ? reacquire_held_locks+0x178/0x280 [100.856] ? writeback_sb_inodes+0x245/0x7f0 [100.857] do_writepages+0x102/0x2e0 [100.858] ? page_writeback_cpu_online+0x10/0x10 [100.859] ? __lock_release.isra.0+0x14a/0x4d0 [100.860] ? reacquire_held_locks+0x280/0x280 [100.861] ? __lock_acquired+0x1e9/0x3d0 [100.862] ? do_raw_spin_lock+0x1b0/0x1b0 [100.863] __writeback_single_inode+0x94/0x450 [100.864] writeback_sb_inodes+0x372/0x7f0 [100.864] ? lock_sync+0xd0/0xd0 [100.865] ? do_raw_spin_unlock+0x93/0xf0 [100.866] ? sync_inode_metadata+0xc0/0xc0 [100.867] ? rwsem_optimistic_spin+0x340/0x340 [100.868] __writeback_inodes_wb+0x70/0x130 [100.869] wb_writeback+0x2d1/0x530 [100.869] ? __writeback_inodes_wb+0x130/0x130 [100.870] ? lockdep_hardirqs_on_prepare.part.0+0xf1/0x1c0 [100.870] wb_do_writeback+0x3eb/0x480 [100.871] ? wb_writeback+0x530/0x530 [100.871] ? mark_lock_irq+0xcd0/0xcd0 [100.872] wb_workfn+0xe0/0x3f0< [CAUSE] Commit a97699d1d610 ("btrfs: replace map_lookup->stripe_len by BTRFS_STRIPE_LEN") changes how we calculate the map length, to reduce u64 division. Function btrfs_max_io_len() is to get the length to the stripe boundary. It calculates the full stripe start offset (inside the chunk) by the following code: *full_stripe_start = rounddown(*stripe_nr, nr_data_stripes(map)) << BTRFS_STRIPE_LEN_SHIFT; The calculation itself is fine, but the value returned by rounddown() is dependent on both @stripe_nr (which is u32) and nr_data_stripes() (which returned int). Thus the result is also u32, then we do the left shift, which can overflow u32. If such overflow happens, @full_stripe_start will be a value way smaller than @offset, causing later "full_stripe_len - (offset - *full_stripe_start)" to underflow, thus make later length calculation to have no stripe boundary limit, resulting a write bio to exceed stripe boundary. There are some other locations like this, with a u32 @stripe_nr got left shift, which can lead to a similar overflow. [FIX] Fix all @stripe_nr with left shift with a type cast to u64 before the left shift. Those involved @stripe_nr or similar variables are recording the stripe number inside the chunk, which is small enough to be contained by u32, but their offset inside the chunk can not fit into u32. Thus for those specific left shifts, a type cast to u64 is necessary so this patch does not touch them and the code will be cleaned up in the future to keep the fix minimal. Reported-by: David Sterba Fixes: a97699d1d610 ("btrfs: replace map_lookup->stripe_len by BTRFS_STRIPE_LEN") Tested-by: David Sterba Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 841e799dece5..e60beb14852a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5975,12 +5975,12 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT; /* stripe_offset is the offset of this block in its stripe */ - stripe_offset = offset - (stripe_nr << BTRFS_STRIPE_LEN_SHIFT); + stripe_offset = offset - ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT); stripe_nr_end = round_up(offset + length, BTRFS_STRIPE_LEN) >> BTRFS_STRIPE_LEN_SHIFT; stripe_cnt = stripe_nr_end - stripe_nr; - stripe_end_offset = (stripe_nr_end << BTRFS_STRIPE_LEN_SHIFT) - + stripe_end_offset = ((u64)stripe_nr_end << BTRFS_STRIPE_LEN_SHIFT) - (offset + length); /* * after this, stripe_nr is the number of stripes on this @@ -6023,7 +6023,7 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, for (i = 0; i < *num_stripes; i++) { stripes[i].physical = map->stripes[stripe_index].physical + - stripe_offset + (stripe_nr << BTRFS_STRIPE_LEN_SHIFT); + stripe_offset + ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT); stripes[i].dev = map->stripes[stripe_index].dev; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | @@ -6196,9 +6196,11 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op, * not ensured to be power of 2. */ *full_stripe_start = - rounddown(*stripe_nr, nr_data_stripes(map)) << + (u64)rounddown(*stripe_nr, nr_data_stripes(map)) << BTRFS_STRIPE_LEN_SHIFT; + ASSERT(*full_stripe_start + full_stripe_len > offset); + ASSERT(*full_stripe_start <= offset); /* * For writes to RAID56, allow to write a full stripe set, but * no straddling of stripe sets. @@ -6221,7 +6223,7 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup * { dst->dev = map->stripes[stripe_index].dev; dst->physical = map->stripes[stripe_index].physical + - stripe_offset + (stripe_nr << BTRFS_STRIPE_LEN_SHIFT); + stripe_offset + ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT); } int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, From 7580e0a78eb29e7bb1a772eba4088250bbb70d41 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 16 Jun 2023 17:45:49 +0100 Subject: [PATCH 884/934] be2net: Extend xmit workaround to BE3 chip We have seen a bug where the NIC incorrectly changes the length in the IP header of a padded packet to include the padding bytes. The driver already has a workaround for this so do the workaround for this NIC too. This resolves the issue. The NIC in question identifies itself as follows: [ 8.828494] be2net 0000:02:00.0: FW version is 10.7.110.31 [ 8.834759] be2net 0000:02:00.0: Emulex OneConnect(be3): PF FLEX10 port 1 02:00.0 Ethernet controller: Emulex Corporation OneConnect 10Gb NIC (be3) (rev 01) Fixes: ca34fe38f06d ("be2net: fix wrong usage of adapter->generation") Signed-off-by: Ross Lagerwall Link: https://lore.kernel.org/r/20230616164549.2863037-1-ross.lagerwall@citrix.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/emulex/benet/be_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7e408bcc88de..0defd519ba62 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1135,8 +1135,8 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ? VLAN_ETH_HLEN : ETH_HLEN; if (skb->len <= 60 && - (lancer_chip(adapter) || skb_vlan_tag_present(skb)) && - is_ipv4_pkt(skb)) { + (lancer_chip(adapter) || BE3_chip(adapter) || + skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) { ip = (struct iphdr *)ip_hdr(skb); pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len)); } From 4e9f0ec38852c18faa9689322e758575af33e5d4 Mon Sep 17 00:00:00 2001 From: Mukesh Sisodiya Date: Mon, 19 Jun 2023 17:02:34 +0200 Subject: [PATCH 885/934] wifi: iwlwifi: pcie: Handle SO-F device for PCI id 0x7AF0 Add support for AX1690i and AX1690s devices with PCIE id 0x7AF0. Cc: stable@vger.kernel.org # 6.1+ Signed-off-by: Mukesh Sisodiya Signed-off-by: Gregory Greenman Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20230619150233.461290-2-johannes@sipsolutions.net Signed-off-by: Jakub Kicinski --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index dba112394838..79115eb1c285 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -548,6 +548,8 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(0x54F0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x7A70, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), IWL_DEV_INFO(0x7A70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), + IWL_DEV_INFO(0x7AF0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), + IWL_DEV_INFO(0x7AF0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x271C, 0x0214, iwl9260_2ac_cfg, iwl9260_1_name), IWL_DEV_INFO(0x7E40, 0x1691, iwl_cfg_ma_a0_gf4_a0, iwl_ax411_killer_1690s_name), From 54d217406afe250d7a768783baaa79a035f21d38 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Apr 2023 07:24:46 -0400 Subject: [PATCH 886/934] drm: use mgr->dev in drm_dbg_kms in drm_dp_add_payload_part2 I've been experiencing some intermittent crashes down in the display driver code. The symptoms are ususally a line like this in dmesg: amdgpu 0000:30:00.0: [drm] Failed to create MST payload for port 000000006d3a3885: -5 ...followed by an Oops due to a NULL pointer dereference. Switch to using mgr->dev instead of state->dev since "state" can be NULL in some cases. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2184855 Suggested-by: Jani Nikula Signed-off-by: Jeff Layton Reviewed-by: Jani Nikula Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230419112447.18471-1-jlayton@kernel.org --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 38dab76ae69e..e2e21ce79510 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3404,7 +3404,7 @@ int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr, /* Skip failed payloads */ if (payload->vc_start_slot == -1) { - drm_dbg_kms(state->dev, "Part 1 of payload creation for %s failed, skipping part 2\n", + drm_dbg_kms(mgr->dev, "Part 1 of payload creation for %s failed, skipping part 2\n", payload->port->connector->name); return -EIO; } From c8e796895e2310b6130e7577248da1d771431a77 Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Tue, 20 Jun 2023 13:28:24 -0700 Subject: [PATCH 887/934] regmap: spi-avmm: Fix regmap_bus max_raw_write The max_raw_write member of the regmap_spi_avmm_bus structure is defined as: .max_raw_write = SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT SPI_AVMM_VAL_SIZE == 4 and MAX_WRITE_CNT == 1 so this results in a maximum write transfer size of 4 bytes which provides only enough space to transfer the address of the target register. It provides no space for the value to be transferred. This bug became an issue (divide-by-zero in _regmap_raw_write()) after the following was accepted into mainline: commit 3981514180c9 ("regmap: Account for register length when chunking") Change max_raw_write to include space (4 additional bytes) for both the register address and value: .max_raw_write = SPI_AVMM_REG_SIZE + SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT Fixes: 7f9fb67358a2 ("regmap: add Intel SPI Slave to AVMM Bus Bridge support") Reviewed-by: Matthew Gerlach Signed-off-by: Russ Weight Link: https://lore.kernel.org/r/20230620202824.380313-1-russell.h.weight@intel.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-spi-avmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-spi-avmm.c b/drivers/base/regmap/regmap-spi-avmm.c index 4c2b94b3e30b..6af692844c19 100644 --- a/drivers/base/regmap/regmap-spi-avmm.c +++ b/drivers/base/regmap/regmap-spi-avmm.c @@ -660,7 +660,7 @@ static const struct regmap_bus regmap_spi_avmm_bus = { .reg_format_endian_default = REGMAP_ENDIAN_NATIVE, .val_format_endian_default = REGMAP_ENDIAN_NATIVE, .max_raw_read = SPI_AVMM_VAL_SIZE * MAX_READ_CNT, - .max_raw_write = SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT, + .max_raw_write = SPI_AVMM_REG_SIZE + SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT, .free_context = spi_avmm_bridge_ctx_free, }; From 4bedf9eee016286c835e3d8fa981ddece5338795 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 14:45:22 +0200 Subject: [PATCH 888/934] netfilter: nf_tables: fix chain binding transaction logic Add bound flag to rule and chain transactions as in 6a0a8d10a366 ("netfilter: nf_tables: use-after-free in failing rule with bound set") to skip them in case that the chain is already bound from the abort path. This patch fixes an imbalance in the chain use refcnt that triggers a WARN_ON on the table and chain destroy path. This patch also disallows nested chain bindings, which is not supported from userspace. The logic to deal with chain binding in nft_data_hold() and nft_data_release() is not correct. The NFT_TRANS_PREPARE state needs a special handling in case a chain is bound but next expressions in the same rule fail to initialize as described by 1240eb93f061 ("netfilter: nf_tables: incorrect error path handling with NFT_MSG_NEWRULE"). The chain is left bound if rule construction fails, so the objects stored in this chain (and the chain itself) are released by the transaction records from the abort path, follow up patch ("netfilter: nf_tables: add NFT_TRANS_PREPARE_ERROR to deal with bound set/chain") completes this error handling. When deleting an existing rule, chain bound flag is set off so the rule expression .destroy path releases the objects. Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 21 +++++++- net/netfilter/nf_tables_api.c | 86 +++++++++++++++++++----------- net/netfilter/nft_immediate.c | 87 +++++++++++++++++++++++++++---- 3 files changed, 153 insertions(+), 41 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 83db182decc8..8ba7f81e81a6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1009,7 +1009,10 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) return (void *)&rule->data[rule->dlen]; } -void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule); +void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule); +void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule, + enum nft_trans_phase phase); +void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule); static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, struct nft_regs *regs, @@ -1104,6 +1107,7 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, struct nft_set_elem *elem); int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); +int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, @@ -1140,11 +1144,17 @@ int nft_chain_validate_dependency(const struct nft_chain *chain, int nft_chain_validate_hooks(const struct nft_chain *chain, unsigned int hook_flags); +static inline bool nft_chain_binding(const struct nft_chain *chain) +{ + return chain->flags & NFT_CHAIN_BINDING; +} + static inline bool nft_chain_is_bound(struct nft_chain *chain) { return (chain->flags & NFT_CHAIN_BINDING) && chain->bound; } +int nft_chain_add(struct nft_table *table, struct nft_chain *chain); void nft_chain_del(struct nft_chain *chain); void nf_tables_chain_destroy(struct nft_ctx *ctx); @@ -1575,6 +1585,7 @@ struct nft_trans_rule { struct nft_rule *rule; struct nft_flow_rule *flow; u32 rule_id; + bool bound; }; #define nft_trans_rule(trans) \ @@ -1583,6 +1594,8 @@ struct nft_trans_rule { (((struct nft_trans_rule *)trans->data)->flow) #define nft_trans_rule_id(trans) \ (((struct nft_trans_rule *)trans->data)->rule_id) +#define nft_trans_rule_bound(trans) \ + (((struct nft_trans_rule *)trans->data)->bound) struct nft_trans_set { struct nft_set *set; @@ -1607,15 +1620,19 @@ struct nft_trans_set { (((struct nft_trans_set *)trans->data)->gc_int) struct nft_trans_chain { + struct nft_chain *chain; bool update; char *name; struct nft_stats __percpu *stats; u8 policy; + bool bound; u32 chain_id; struct nft_base_chain *basechain; struct list_head hook_list; }; +#define nft_trans_chain(trans) \ + (((struct nft_trans_chain *)trans->data)->chain) #define nft_trans_chain_update(trans) \ (((struct nft_trans_chain *)trans->data)->update) #define nft_trans_chain_name(trans) \ @@ -1624,6 +1641,8 @@ struct nft_trans_chain { (((struct nft_trans_chain *)trans->data)->stats) #define nft_trans_chain_policy(trans) \ (((struct nft_trans_chain *)trans->data)->policy) +#define nft_trans_chain_bound(trans) \ + (((struct nft_trans_chain *)trans->data)->bound) #define nft_trans_chain_id(trans) \ (((struct nft_trans_chain *)trans->data)->chain_id) #define nft_trans_basechain(trans) \ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 69bceefaa5c8..5e4965b98528 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -193,6 +193,48 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) } } +static void nft_chain_trans_bind(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + struct nftables_pernet *nft_net; + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_chain_binding(chain)) + return; + + nft_net = nft_pernet(net); + list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain(trans) == chain) + nft_trans_chain_bound(trans) = true; + break; + case NFT_MSG_NEWRULE: + if (trans->ctx.chain == chain) + nft_trans_rule_bound(trans) = true; + break; + } + } +} + +int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + if (!nft_chain_binding(chain)) + return 0; + + if (nft_chain_binding(ctx->chain)) + return -EOPNOTSUPP; + + if (chain->bound) + return -EBUSY; + + chain->bound = true; + chain->use++; + nft_chain_trans_bind(ctx, chain); + + return 0; +} + static int nft_netdev_register_hooks(struct net *net, struct list_head *hook_list) { @@ -338,8 +380,9 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); } } - + nft_trans_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); + return trans; } @@ -357,8 +400,7 @@ static int nft_delchain(struct nft_ctx *ctx) return 0; } -static void nft_rule_expr_activate(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr; @@ -371,9 +413,8 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, } } -static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule, - enum nft_trans_phase phase) +void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr; @@ -2226,7 +2267,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, return 0; } -static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) +int nft_chain_add(struct nft_table *table, struct nft_chain *chain) { int err; @@ -3490,8 +3531,7 @@ err_fill_rule_info: return err; } -static void nf_tables_rule_destroy(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr, *next; @@ -3508,7 +3548,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } -void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) +static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); @@ -6638,7 +6678,6 @@ static int nf_tables_newsetelem(struct sk_buff *skb, void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { struct nft_chain *chain; - struct nft_rule *rule; if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { @@ -6646,15 +6685,6 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) case NFT_GOTO: chain = data->verdict.chain; chain->use++; - - if (!nft_chain_is_bound(chain)) - break; - - chain->table->use++; - list_for_each_entry(rule, &chain->rules, list) - chain->use++; - - nft_chain_add(chain->table, chain); break; } } @@ -9677,7 +9707,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { - if (nft_chain_is_bound(trans->ctx.chain)) { + if (nft_trans_chain_bound(trans)) { nft_trans_destroy(trans); break; } @@ -9700,6 +9730,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: + if (nft_trans_rule_bound(trans)) { + nft_trans_destroy(trans); + break; + } trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, @@ -10263,22 +10297,12 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { struct nft_chain *chain; - struct nft_rule *rule; switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: chain = data->verdict.chain; chain->use--; - - if (!nft_chain_is_bound(chain)) - break; - - chain->table->use--; - list_for_each_entry(rule, &chain->rules, list) - chain->use--; - - nft_chain_del(chain); break; } } diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index c9d2f7c29f53..0492a04064f1 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -76,11 +76,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx, switch (priv->data.verdict.code) { case NFT_JUMP: case NFT_GOTO: - if (nft_chain_is_bound(chain)) { - err = -EBUSY; - goto err1; - } - chain->bound = true; + err = nf_tables_bind_chain(ctx, chain); + if (err < 0) + return err; break; default: break; @@ -98,6 +96,31 @@ static void nft_immediate_activate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + const struct nft_data *data = &priv->data; + struct nft_ctx chain_ctx; + struct nft_chain *chain; + struct nft_rule *rule; + + if (priv->dreg == NFT_REG_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; + if (!nft_chain_binding(chain)) + break; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nft_rule_expr_activate(&chain_ctx, rule); + + nft_clear(ctx->net, chain); + break; + default: + break; + } + } return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg)); } @@ -107,6 +130,40 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx, enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + const struct nft_data *data = &priv->data; + struct nft_ctx chain_ctx; + struct nft_chain *chain; + struct nft_rule *rule; + + if (priv->dreg == NFT_REG_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; + if (!nft_chain_binding(chain)) + break; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nft_rule_expr_deactivate(&chain_ctx, rule, phase); + + switch (phase) { + case NFT_TRANS_PREPARE: + nft_deactivate_next(ctx->net, chain); + break; + default: + nft_chain_del(chain); + chain->bound = false; + chain->table->use--; + break; + } + break; + default: + break; + } + } if (phase == NFT_TRANS_COMMIT) return; @@ -131,15 +188,27 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, case NFT_GOTO: chain = data->verdict.chain; - if (!nft_chain_is_bound(chain)) + if (!nft_chain_binding(chain)) break; + /* Rule construction failed, but chain is already bound: + * let the transaction records release this chain and its rules. + */ + if (chain->bound) { + chain->use--; + break; + } + + /* Rule has been deleted, release chain and its rules. */ chain_ctx = *ctx; chain_ctx.chain = chain; - list_for_each_entry_safe(rule, n, &chain->rules, list) - nf_tables_rule_release(&chain_ctx, rule); - + chain->use--; + list_for_each_entry_safe(rule, n, &chain->rules, list) { + chain->use--; + list_del(&rule->list); + nf_tables_rule_destroy(&chain_ctx, rule); + } nf_tables_chain_destroy(&chain_ctx); break; default: From 26b5a5712eb85e253724e56a54c17f8519bd8e4e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 14:45:26 +0200 Subject: [PATCH 889/934] netfilter: nf_tables: add NFT_TRANS_PREPARE_ERROR to deal with bound set/chain Add a new state to deal with rule expressions deactivation from the newrule error path, otherwise the anonymous set remains in the list in inactive state for the next generation. Mark the set/chain transaction as unbound so the abort path releases this object, set it as inactive in the next generation so it is not reachable anymore from this transaction and reference counter is dropped. Fixes: 1240eb93f061 ("netfilter: nf_tables: incorrect error path handling with NFT_MSG_NEWRULE") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 45 ++++++++++++++++++++++++++----- net/netfilter/nft_immediate.c | 3 +++ 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8ba7f81e81a6..de2b0130c151 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -901,6 +901,7 @@ struct nft_expr_type { enum nft_trans_phase { NFT_TRANS_PREPARE, + NFT_TRANS_PREPARE_ERROR, NFT_TRANS_ABORT, NFT_TRANS_COMMIT, NFT_TRANS_RELEASE @@ -1108,6 +1109,7 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_elem *elem); int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); +void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5e4965b98528..f8afefc8294f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -169,7 +169,8 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set, + bool bind) { struct nftables_pernet *nft_net; struct net *net = ctx->net; @@ -183,17 +184,28 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) switch (trans->msg_type) { case NFT_MSG_NEWSET: if (nft_trans_set(trans) == set) - nft_trans_set_bound(trans) = true; + nft_trans_set_bound(trans) = bind; break; case NFT_MSG_NEWSETELEM: if (nft_trans_elem_set(trans) == set) - nft_trans_elem_set_bound(trans) = true; + nft_trans_elem_set_bound(trans) = bind; break; } } } -static void nft_chain_trans_bind(const struct nft_ctx *ctx, struct nft_chain *chain) +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, true); +} + +static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, false); +} + +static void __nft_chain_trans_bind(const struct nft_ctx *ctx, + struct nft_chain *chain, bool bind) { struct nftables_pernet *nft_net; struct net *net = ctx->net; @@ -207,16 +219,22 @@ static void nft_chain_trans_bind(const struct nft_ctx *ctx, struct nft_chain *ch switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: if (nft_trans_chain(trans) == chain) - nft_trans_chain_bound(trans) = true; + nft_trans_chain_bound(trans) = bind; break; case NFT_MSG_NEWRULE: if (trans->ctx.chain == chain) - nft_trans_rule_bound(trans) = true; + nft_trans_rule_bound(trans) = bind; break; } } } +static void nft_chain_trans_bind(const struct nft_ctx *ctx, + struct nft_chain *chain) +{ + __nft_chain_trans_bind(ctx, chain, true); +} + int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) { if (!nft_chain_binding(chain)) @@ -235,6 +253,11 @@ int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) return 0; } +void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + __nft_chain_trans_bind(ctx, chain, false); +} + static int nft_netdev_register_hooks(struct net *net, struct list_head *hook_list) { @@ -3884,7 +3907,7 @@ err_destroy_flow_rule: if (flow) nft_flow_rule_destroy(flow); err_release_rule: - nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE); + nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR); nf_tables_rule_destroy(&ctx, rule); err_release_expr: for (i = 0; i < n; i++) { @@ -5183,6 +5206,13 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + nft_set_trans_unbind(ctx, set); + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + + set->use--; + break; case NFT_TRANS_PREPARE: if (nft_set_is_anonymous(set)) nft_deactivate_next(ctx->net, set); @@ -7701,6 +7731,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: case NFT_TRANS_PREPARE: case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 0492a04064f1..3d76ebfe8939 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -150,6 +150,9 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx, nft_rule_expr_deactivate(&chain_ctx, rule, phase); switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + nf_tables_unbind_chain(ctx, chain); + fallthrough; case NFT_TRANS_PREPARE: nft_deactivate_next(ctx->net, chain); break; From 628bd3e49cba1c066228e23d71a852c23e26da73 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 14:51:49 +0200 Subject: [PATCH 890/934] netfilter: nf_tables: drop map element references from preparation phase set .destroy callback releases the references to other objects in maps. This is very late and it results in spurious EBUSY errors. Drop refcount from the preparation phase instead, update set backend not to drop reference counter from set .destroy path. Exceptions: NFT_TRANS_PREPARE_ERROR does not require to drop the reference counter because the transaction abort path releases the map references for each element since the set is unbound. The abort path also deals with releasing reference counter for new elements added to unbound sets. Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +- net/netfilter/nf_tables_api.c | 147 ++++++++++++++++++++++++++---- net/netfilter/nft_set_bitmap.c | 5 +- net/netfilter/nft_set_hash.c | 23 ++++- net/netfilter/nft_set_pipapo.c | 14 ++- net/netfilter/nft_set_rbtree.c | 5 +- 6 files changed, 167 insertions(+), 32 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index de2b0130c151..f84b6daea5c4 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -472,7 +472,8 @@ struct nft_set_ops { int (*init)(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const nla[]); - void (*destroy)(const struct nft_set *set); + void (*destroy)(const struct nft_ctx *ctx, + const struct nft_set *set); void (*gc_init)(const struct nft_set *set); unsigned int elemsize; @@ -809,6 +810,8 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, struct nft_expr *expr_array[]); void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr); +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem); /** * struct nft_set_gc_batch_head - nf_tables set garbage collection batch diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f8afefc8294f..e2493f83c48e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -559,6 +559,58 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, return __nft_trans_set_add(ctx, msg_type, set, NULL); } +static void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_deactivate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_deactivate(ctx->net, set, elem); + + return 0; +} + +struct nft_set_elem_catchall { + struct list_head list; + struct rcu_head rcu; + void *elem; +}; + +static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, + struct nft_set *set) +{ + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set_elem_catchall *catchall; + struct nft_set_elem elem; + struct nft_set_ext *ext; + + list_for_each_entry(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + if (!nft_set_elem_active(ext, genmask)) + continue; + + elem.priv = catchall->elem; + nft_setelem_data_deactivate(ctx->net, set, &elem); + break; + } +} + +static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_deactivate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); + + nft_map_catchall_deactivate(ctx, set); +} + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; @@ -567,6 +619,9 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) if (err < 0) return err; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); ctx->table->use--; @@ -3659,12 +3714,6 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, return 0; } -struct nft_set_elem_catchall { - struct list_head list; - struct rcu_head rcu; - void *elem; -}; - int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) { u8 genmask = nft_genmask_next(ctx->net); @@ -4997,7 +5046,7 @@ err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); err_set_destroy: - ops->destroy(set); + ops->destroy(&ctx, set); err_set_init: kfree(set->name); err_set_name: @@ -5012,7 +5061,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx, list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { list_del_rcu(&catchall->list); - nft_set_elem_destroy(set, catchall->elem, true); + nf_tables_set_elem_destroy(ctx, set, catchall->elem); kfree_rcu(catchall, rcu); } } @@ -5027,7 +5076,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(ctx, set->exprs[i]); - set->ops->destroy(set); + set->ops->destroy(ctx, set); nft_set_catchall_destroy(ctx, set); kfree(set->name); kvfree(set); @@ -5192,10 +5241,60 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } } +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_activate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_activate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_catchall_activate(const struct nft_ctx *ctx, + struct nft_set *set) +{ + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set_elem_catchall *catchall; + struct nft_set_elem elem; + struct nft_set_ext *ext; + + list_for_each_entry(catchall, &set->catchall_list, list) { + ext = nft_set_elem_ext(set, catchall->elem); + if (!nft_set_elem_active(ext, genmask)) + continue; + + elem.priv = catchall->elem; + nft_setelem_data_activate(ctx->net, set, &elem); + break; + } +} + +static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_activate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); + + nft_map_catchall_activate(ctx, set); +} + void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (nft_set_is_anonymous(set)) + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(ctx, set); + nft_clear(ctx->net, set); + } set->use++; } @@ -5214,13 +5313,20 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, set->use--; break; case NFT_TRANS_PREPARE: - if (nft_set_is_anonymous(set)) - nft_deactivate_next(ctx->net, set); + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); + } set->use--; return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: + if (nft_set_is_anonymous(set) && + set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + set->use--; fallthrough; default: @@ -5973,6 +6079,7 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, __nft_set_elem_expr_destroy(ctx, expr); } +/* Drop references and destroy. Called from gc, dynset and abort path. */ void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr) { @@ -5994,11 +6101,11 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_setelem_data_deactivate() already deals - * with the refcounting from the preparation phase. +/* Destroy element. References have been already dropped in the preparation + * path via nft_setelem_data_deactivate(). */ -static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem) +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); @@ -6631,7 +6738,7 @@ err_elem_free: if (obj) obj->use--; err_elem_userdata: - nf_tables_set_elem_destroy(ctx, set, elem.priv); + nft_set_elem_destroy(set, elem.priv, true); err_parse_data: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_release(&elem.data.val, desc.type); @@ -9799,6 +9906,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DESTROYSET: trans->ctx.table->use++; nft_clear(trans->ctx.net, nft_trans_set(trans)); + if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -10568,6 +10678,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table) list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); table->use--; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(&ctx, set); + nft_set_destroy(&ctx, set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 96081ac8d2b4..1e5e7a181e0b 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -271,13 +271,14 @@ static int nft_bitmap_init(const struct nft_set *set, return 0; } -static void nft_bitmap_destroy(const struct nft_set *set) +static void nft_bitmap_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be, *n; list_for_each_entry_safe(be, n, &priv->list, head) - nft_set_elem_destroy(set, be, true); + nf_tables_set_elem_destroy(ctx, set, be); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 76de6c8d9865..0b73cb0e752f 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -400,19 +400,31 @@ static int nft_rhash_init(const struct nft_set *set, return 0; } +struct nft_rhash_ctx { + const struct nft_ctx ctx; + const struct nft_set *set; +}; + static void nft_rhash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy(arg, ptr, true); + struct nft_rhash_ctx *rhash_ctx = arg; + + nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr); } -static void nft_rhash_destroy(const struct nft_set *set) +static void nft_rhash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_ctx rhash_ctx = { + .ctx = *ctx, + .set = set, + }; cancel_delayed_work_sync(&priv->gc_work); rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, - (void *)set); + (void *)&rhash_ctx); } /* Number of buckets is stored in u32, so cap our result to 1U<<31 */ @@ -643,7 +655,8 @@ static int nft_hash_init(const struct nft_set *set, return 0; } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; @@ -653,7 +666,7 @@ static void nft_hash_destroy(const struct nft_set *set) for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_safe(he, next, &priv->table[i], node) { hlist_del_rcu(&he->node); - nft_set_elem_destroy(set, he, true); + nf_tables_set_elem_destroy(ctx, set, he); } } } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 15e451dc3fc4..c867b5b772e8 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2148,10 +2148,12 @@ out_scratch: /** * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array + * @ctx: context * @set: nftables API set representation * @m: matching data pointing to key mapping array */ -static void nft_set_pipapo_match_destroy(const struct nft_set *set, +static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo_field *f; @@ -2168,15 +2170,17 @@ static void nft_set_pipapo_match_destroy(const struct nft_set *set, e = f->mt[r].e; - nft_set_elem_destroy(set, e, true); + nf_tables_set_elem_destroy(ctx, set, e); } } /** * nft_pipapo_destroy() - Free private data for set and all committed elements + * @ctx: context * @set: nftables API set representation */ -static void nft_pipapo_destroy(const struct nft_set *set) +static void nft_pipapo_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; @@ -2186,7 +2190,7 @@ static void nft_pipapo_destroy(const struct nft_set *set) if (m) { rcu_barrier(); - nft_set_pipapo_match_destroy(set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); @@ -2203,7 +2207,7 @@ static void nft_pipapo_destroy(const struct nft_set *set) m = priv->clone; if (priv->dirty) - nft_set_pipapo_match_destroy(set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(priv->clone->scratch_aligned); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 2f114aa10f1a..5c05c9b990fb 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -664,7 +664,8 @@ static int nft_rbtree_init(const struct nft_set *set, return 0; } -static void nft_rbtree_destroy(const struct nft_set *set) +static void nft_rbtree_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; @@ -675,7 +676,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe, true); + nf_tables_set_elem_destroy(ctx, set, rbe); } } From 2b84e215f87443c74ac0aa7f76bb172d43a87033 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 15:20:04 +0200 Subject: [PATCH 891/934] netfilter: nft_set_pipapo: .walk does not deal with generations The .walk callback iterates over the current active set, but it might be useful to iterate over the next generation set. Use the generation mask to determine what set view (either current or next generation) is use for the walk iteration. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index c867b5b772e8..0452ee586c1c 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1974,12 +1974,16 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_pipapo *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->net); struct nft_pipapo_match *m; struct nft_pipapo_field *f; int i, r; rcu_read_lock(); - m = rcu_dereference(priv->match); + if (iter->genmask == nft_genmask_cur(net)) + m = rcu_dereference(priv->match); + else + m = priv->clone; if (unlikely(!m)) goto out; From d6b478666ffa6d2c25386d78bf1c4640d4da305e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 15:20:08 +0200 Subject: [PATCH 892/934] netfilter: nf_tables: fix underflow in object reference counter Since ("netfilter: nf_tables: drop map element references from preparation phase"), integration with commit protocol is better, therefore drop the workaround that b91d90368837 ("netfilter: nf_tables: fix leaking object reference count") provides. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e2493f83c48e..cf10b3bd5c0f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6668,19 +6668,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (flags) *nft_set_ext_flags(ext) = flags; + if (obj) { + *nft_set_ext_obj(ext) = obj; + obj->use++; + } if (ulen > 0) { if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) { err = -EINVAL; - goto err_elem_userdata; + goto err_elem_free; } udata = nft_set_ext_userdata(ext); udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } - if (obj) { - *nft_set_ext_obj(ext) = obj; - obj->use++; - } err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs); if (err < 0) goto err_elem_free; @@ -6735,9 +6735,6 @@ err_set_full: err_element_clash: kfree(trans); err_elem_free: - if (obj) - obj->use--; -err_elem_userdata: nft_set_elem_destroy(set, elem.priv, true); err_parse_data: if (nla[NFTA_SET_ELEM_DATA] != NULL) From c88c535b592d3baeee74009f3eceeeaf0fdd5e1b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 15:20:16 +0200 Subject: [PATCH 893/934] netfilter: nf_tables: disallow element updates of bound anonymous sets Anonymous sets come with NFT_SET_CONSTANT from userspace. Although API allows to create anonymous sets without NFT_SET_CONSTANT, it makes no sense to allow to add and to delete elements for bound anonymous sets. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cf10b3bd5c0f..6f26520bd865 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6779,7 +6779,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb, if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); @@ -7053,7 +7054,9 @@ static int nf_tables_delsetelem(struct sk_buff *skb, set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) + + if (!list_empty(&set->bindings) && + (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) return -EBUSY; nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); From 938154b93be8cd611ddfd7bafc1849f3c4355201 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 15:21:33 +0200 Subject: [PATCH 894/934] netfilter: nf_tables: reject unbound anonymous set before commit phase Add a new list to track set transaction and to check for unbound anonymous sets before entering the commit phase. Bail out at the end of the transaction handling if an anonymous set remains unbound. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +++ net/netfilter/nf_tables_api.c | 35 ++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f84b6daea5c4..ee47d7143d99 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1573,6 +1573,7 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) * struct nft_trans - nf_tables object update in transaction * * @list: used internally + * @binding_list: list of objects with possible bindings * @msg_type: message type * @put_net: ctx->net needs to be put * @ctx: transaction context @@ -1580,6 +1581,7 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) */ struct nft_trans { struct list_head list; + struct list_head binding_list; int msg_type; bool put_net; struct nft_ctx ctx; @@ -1724,6 +1726,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head binding_list; struct list_head module_list; struct list_head notify_list; struct mutex commit_mutex; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6f26520bd865..66da44bff5e4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -151,6 +151,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, return NULL; INIT_LIST_HEAD(&trans->list); + INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -163,9 +164,15 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } -static void nft_trans_destroy(struct nft_trans *trans) +static void nft_trans_list_del(struct nft_trans *trans) { list_del(&trans->list); + list_del(&trans->binding_list); +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + nft_trans_list_del(trans); kfree(trans); } @@ -357,6 +364,14 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr { struct nftables_pernet *nft_net = nft_pernet(net); + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (!nft_trans_set_update(trans) && + nft_set_is_anonymous(nft_trans_set(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + } + list_add_tail(&trans->list, &nft_net->commit_list); } @@ -9111,7 +9126,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) synchronize_rcu(); list_for_each_entry_safe(trans, next, &head, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nft_commit_release(trans); } } @@ -9476,6 +9491,19 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } + list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (!nft_trans_set_update(trans) && + nft_set_is_anonymous(nft_trans_set(trans)) && + !nft_trans_set_bound(trans)) { + pr_warn_once("nftables ruleset with unbound set\n"); + return -EINVAL; + } + break; + } + } + /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) return -EAGAIN; @@ -9989,7 +10017,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nf_tables_abort_release(trans); } @@ -10765,6 +10793,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); mutex_init(&nft_net->commit_mutex); From 62e1e94b246e685d89c3163aaef4b160e42ceb02 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 15:21:39 +0200 Subject: [PATCH 895/934] netfilter: nf_tables: reject unbound chain set before commit phase Use binding list to track set transaction and to check for unbound chains before entering the commit phase. Bail out if chain binding remain unused before entering the commit step. Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 66da44bff5e4..bab792434a8d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -370,6 +370,11 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr nft_set_is_anonymous(nft_trans_set(trans))) list_add_tail(&trans->binding_list, &nft_net->binding_list); break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; } list_add_tail(&trans->list, &nft_net->commit_list); @@ -9501,6 +9506,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return -EINVAL; } break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans)) && + !nft_trans_chain_bound(trans)) { + pr_warn_once("nftables ruleset with unbound chain\n"); + return -EINVAL; + } + break; } } From b770283c98e0eee9133c47bc03b6cc625dc94723 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 15:22:01 +0200 Subject: [PATCH 896/934] netfilter: nf_tables: disallow updates of anonymous sets Disallow updates of set timeout and garbage collection parameters for anonymous sets. Fixes: 123b99619cca ("netfilter: nf_tables: honor set timeout and garbage collection updates") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index bab792434a8d..16995b88da2f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4963,6 +4963,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (nft_set_is_anonymous(set)) + return -EOPNOTSUPP; + err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags); if (err < 0) return err; From e26d3009efda338f19016df4175f354a9bd0a4ab Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Jun 2023 15:22:18 +0200 Subject: [PATCH 897/934] netfilter: nf_tables: disallow timeout for anonymous sets Never used from userspace, disallow these parameters. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 16995b88da2f..0d293eab310b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4909,6 +4909,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; @@ -4917,6 +4920,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; + + if (flags & NFT_SET_ANONYMOUS) + return -EOPNOTSUPP; + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } From 043d2acf57227db1fdaaa620b2a420acfaa56d6e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 14 Jun 2023 23:20:18 +0200 Subject: [PATCH 898/934] netfilter: nf_tables: drop module reference after updating chain Otherwise the module reference counter is leaked. Fixes b9703ed44ffb ("netfilter: nf_tables: support for adding new devices to an existing netdev chain") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0d293eab310b..c1db2f4b2aa4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2667,6 +2667,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, nft_trans_basechain(trans) = basechain; INIT_LIST_HEAD(&nft_trans_chain_hooks(trans)); list_splice(&hook.list, &nft_trans_chain_hooks(trans)); + if (nla[NFTA_CHAIN_HOOK]) + module_put(hook.type->owner); nft_trans_commit_list_add_tail(ctx->net, trans); From 62f9a68a36d4441a6c412b81faed102594bc6670 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 15 Jun 2023 10:14:25 +0200 Subject: [PATCH 899/934] netfilter: nfnetlink_osf: fix module autoload Move the alias from xt_osf to nfnetlink_osf. Fixes: f9324952088f ("netfilter: nfnetlink_osf: extract nfnetlink_subsystem code from xt_osf.c") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_osf.c | 1 + net/netfilter/xt_osf.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index ee6840bd5933..8f1bfa6ccc2d 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -439,3 +439,4 @@ module_init(nfnl_osf_init); module_exit(nfnl_osf_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index e1990baf3a3b..dc9485854002 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -71,4 +71,3 @@ MODULE_AUTHOR("Evgeniy Polyakov "); MODULE_DESCRIPTION("Passive OS fingerprint matching."); MODULE_ALIAS("ipt_osf"); MODULE_ALIAS("ip6t_osf"); -MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); From 42e344f01688490cdac4bed8f5ba21817cad26ee Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Fri, 16 Jun 2023 17:56:11 +0200 Subject: [PATCH 900/934] netfilter: nf_tables: Fix for deleting base chains with payload When deleting a base chain, iptables-nft simply submits the whole chain to the kernel, including the NFTA_CHAIN_HOOK attribute. The new code added by fixed commit then turned this into a chain update, destroying the hook but not the chain itself. Detect the situation by checking if the chain type is either netdev or inet/ingress. Fixes: 7d937b107108f ("netfilter: nf_tables: support for deleting devices in an existing netdev chain") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c1db2f4b2aa4..4c7937fd803f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2811,21 +2811,18 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack); } -static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_chain *chain, +static int nft_delchain_hook(struct nft_ctx *ctx, + struct nft_base_chain *basechain, struct netlink_ext_ack *extack) { + const struct nft_chain *chain = &basechain->chain; const struct nlattr * const *nla = ctx->nla; struct nft_chain_hook chain_hook = {}; - struct nft_base_chain *basechain; struct nft_hook *this, *hook; LIST_HEAD(chain_del_list); struct nft_trans *trans; int err; - if (!nft_is_base_chain(chain)) - return -EOPNOTSUPP; - - basechain = nft_base_chain(chain); err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook, ctx->family, chain->flags, extack); if (err < 0) @@ -2910,7 +2907,12 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, if (chain->flags & NFT_CHAIN_HW_OFFLOAD) return -EOPNOTSUPP; - return nft_delchain_hook(&ctx, chain, extack); + if (nft_is_base_chain(chain)) { + struct nft_base_chain *basechain = nft_base_chain(chain); + + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) + return nft_delchain_hook(&ctx, basechain, extack); + } } if (info->nlh->nlmsg_flags & NLM_F_NONREC && From 408c090002c8ca5da3da1417d1d675583379fae6 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Mon, 19 Jun 2023 17:49:48 +0800 Subject: [PATCH 901/934] net: mdio: fix the wrong parameters PHY address and device address are passed in the wrong order. Cc: stable@vger.kernel.org Fixes: 4e4aafcddbbf ("net: mdio: Add dedicated C45 API to MDIO bus drivers") Signed-off-by: Jiawen Wu Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20230619094948.84452-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 389f33a12534..8b3618d3da4a 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -1287,7 +1287,7 @@ EXPORT_SYMBOL_GPL(mdiobus_modify_changed); * @mask: bit mask of bits to clear * @set: bit mask of bits to set */ -int mdiobus_c45_modify_changed(struct mii_bus *bus, int devad, int addr, +int mdiobus_c45_modify_changed(struct mii_bus *bus, int addr, int devad, u32 regnum, u16 mask, u16 set) { int err; From a129b41fe0a8b4da828c46b10f5244ca07a3fec3 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Mon, 19 Jun 2023 17:44:35 +0200 Subject: [PATCH 902/934] Revert "net: phy: dp83867: perform soft reset and retain established link" This reverts commit da9ef50f545f86ffe6ff786174d26500c4db737a. This fixes a regression in which the link would come up, but no communication was possible. The reverted commit was also removing a comment about DP83867_PHYCR_FORCE_LINK_GOOD, this is not added back in this commits since it seems that this is unrelated to the original code change. Closes: https://lore.kernel.org/all/ZGuDJos8D7N0J6Z2@francesco-nb.int.toradex.com/ Fixes: da9ef50f545f ("net: phy: dp83867: perform soft reset and retain established link") Signed-off-by: Francesco Dolcini Reviewed-by: Andrew Lunn Reviewed-by: Praneeth Bajjuri Link: https://lore.kernel.org/r/20230619154435.355485-1-francesco@dolcini.it Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83867.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 76f5a2402fb0..e397e7d642d9 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -936,7 +936,7 @@ static int dp83867_phy_reset(struct phy_device *phydev) { int err; - err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART); + err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET); if (err < 0) return err; From afd384f0dbea2229fd11159efb86a5b41051c4a9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 8 Jun 2023 17:42:53 -0400 Subject: [PATCH 903/934] Revert "virtio-blk: support completion batching for the IRQ path" This reverts commit 07b679f70d73483930e8d3c293942416d9cd5c13. This change appears to have broken things... We now see applications hanging during disk accesses. e.g. multi-port virtio-blk device running in h/w (FPGA) Host running a simple 'fio' test. [global] thread=1 direct=1 ioengine=libaio norandommap=1 group_reporting=1 bs=4K rw=read iodepth=128 runtime=1 numjobs=4 time_based [job0] filename=/dev/vda [job1] filename=/dev/vdb [job2] filename=/dev/vdc ... [job15] filename=/dev/vdp i.e. 16 disks; 4 queues per disk; simple burst of 4KB reads This is repeatedly run in a loop. After a few, normally <10 seconds, fio hangs. With 64 queues (16 disks), failure occurs within a few seconds; with 8 queues (2 disks) it may take ~hour before hanging. Last message: fio-3.19 Starting 8 threads Jobs: 1 (f=1): [_(7),R(1)][68.3%][eta 03h:11m:06s] I think this means at the end of the run 1 queue was left incomplete. 'diskstats' (run while fio is hung) shows no outstanding transactions. e.g. $ cat /proc/diskstats ... 252 0 vda 1843140071 0 14745120568 712568645 0 0 0 0 0 3117947 712568645 0 0 0 0 0 0 252 16 vdb 1816291511 0 14530332088 704905623 0 0 0 0 0 3117711 704905623 0 0 0 0 0 0 ... Other stats (in the h/w, and added to the virtio-blk driver ([a]virtio_queue_rq(), [b]virtblk_handle_req(), [c]virtblk_request_done()) all agree, and show every request had a completion, and that virtblk_request_done() never gets called. e.g. PF= 0 vq=0 1 2 3 [a]request_count - 839416590 813148916 105586179 84988123 [b]completion1_count - 839416590 813148916 105586179 84988123 [c]completion2_count - 0 0 0 0 PF= 1 vq=0 1 2 3 [a]request_count - 823335887 812516140 104582672 75856549 [b]completion1_count - 823335887 812516140 104582672 75856549 [c]completion2_count - 0 0 0 0 i.e. the issue is after the virtio-blk driver. This change was introduced in kernel 6.3.0. I am seeing this using 6.3.3. If I run with an earlier kernel (5.15), it does not occur. If I make a simple patch to the 6.3.3 virtio-blk driver, to skip the blk_mq_add_to_batch()call, it does not fail. e.g. kernel 5.15 - this is OK virtio_blk.c,virtblk_done() [irq handler] if (likely(!blk_should_fake_timeout(req->q))) { blk_mq_complete_request(req); } kernel 6.3.3 - this fails virtio_blk.c,virtblk_handle_req() [irq handler] if (likely(!blk_should_fake_timeout(req->q))) { if (!blk_mq_complete_request_remote(req)) { if (!blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), virtblk_complete_batch)) { virtblk_request_done(req); //this never gets called... so blk_mq_add_to_batch() must always succeed } } } If I do, kernel 6.3.3 - this is OK virtio_blk.c,virtblk_handle_req() [irq handler] if (likely(!blk_should_fake_timeout(req->q))) { if (!blk_mq_complete_request_remote(req)) { virtblk_request_done(req); //force this here... if (!blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), virtblk_complete_batch)) { virtblk_request_done(req); //this never gets called... so blk_mq_add_to_batch() must always succeed } } } Perhaps you might like to fix/test/revert this change... Martin Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202306090826.C1fZmdMe-lkp@intel.com/ Cc: Suwan Kim Tested-by: edliaw@google.com Reported-by: "Roberts, Martin" Message-Id: <336455b4f630f329380a8f53ee8cad3868764d5c.1686295549.git.mst@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 82 +++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2b918e28acaa..b47358da92a2 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -348,63 +348,33 @@ static inline void virtblk_request_done(struct request *req) blk_mq_end_request(req, status); } -static void virtblk_complete_batch(struct io_comp_batch *iob) -{ - struct request *req; - - rq_list_for_each(&iob->req_list, req) { - virtblk_unmap_data(req, blk_mq_rq_to_pdu(req)); - virtblk_cleanup_cmd(req); - } - blk_mq_end_request_batch(iob); -} - -static int virtblk_handle_req(struct virtio_blk_vq *vq, - struct io_comp_batch *iob) -{ - struct virtblk_req *vbr; - int req_done = 0; - unsigned int len; - - while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { - struct request *req = blk_mq_rq_from_pdu(vbr); - - if (likely(!blk_should_fake_timeout(req->q)) && - !blk_mq_complete_request_remote(req) && - !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), - virtblk_complete_batch)) - virtblk_request_done(req); - req_done++; - } - - return req_done; -} - static void virtblk_done(struct virtqueue *vq) { struct virtio_blk *vblk = vq->vdev->priv; - struct virtio_blk_vq *vblk_vq = &vblk->vqs[vq->index]; - int req_done = 0; + bool req_done = false; + int qid = vq->index; + struct virtblk_req *vbr; unsigned long flags; - DEFINE_IO_COMP_BATCH(iob); + unsigned int len; - spin_lock_irqsave(&vblk_vq->lock, flags); + spin_lock_irqsave(&vblk->vqs[qid].lock, flags); do { virtqueue_disable_cb(vq); - req_done += virtblk_handle_req(vblk_vq, &iob); + while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { + struct request *req = blk_mq_rq_from_pdu(vbr); + if (likely(!blk_should_fake_timeout(req->q))) + blk_mq_complete_request(req); + req_done = true; + } if (unlikely(virtqueue_is_broken(vq))) break; } while (!virtqueue_enable_cb(vq)); - if (req_done) { - if (!rq_list_empty(iob.req_list)) - iob.complete(&iob); - - /* In case queue is stopped waiting for more buffers. */ + /* In case queue is stopped waiting for more buffers. */ + if (req_done) blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); - } - spin_unlock_irqrestore(&vblk_vq->lock, flags); + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); } static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) @@ -1283,15 +1253,37 @@ static void virtblk_map_queues(struct blk_mq_tag_set *set) } } +static void virtblk_complete_batch(struct io_comp_batch *iob) +{ + struct request *req; + + rq_list_for_each(&iob->req_list, req) { + virtblk_unmap_data(req, blk_mq_rq_to_pdu(req)); + virtblk_cleanup_cmd(req); + } + blk_mq_end_request_batch(iob); +} + static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct virtio_blk *vblk = hctx->queue->queuedata; struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx); + struct virtblk_req *vbr; unsigned long flags; + unsigned int len; int found = 0; spin_lock_irqsave(&vq->lock, flags); - found = virtblk_handle_req(vq, iob); + + while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) { + struct request *req = blk_mq_rq_from_pdu(vbr); + + found++; + if (!blk_mq_complete_request_remote(req) && + !blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), + virtblk_complete_batch)) + virtblk_request_done(req); + } if (found) blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); From 9724160b3942b0a967b91a59f81da5593f28b8ba Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Thu, 15 Jun 2023 16:56:07 +0200 Subject: [PATCH 904/934] bpf/btf: Accept function names that contain dots When building a kernel with LLVM=1, LLVM_IAS=0 and CONFIG_KASAN=y, LLVM leaves DWARF tags for the "asan.module_ctor" & co symbols. In turn, pahole creates BTF_KIND_FUNC entries for these and this makes the BTF metadata validation fail because they contain a dot. In a dramatic turn of event, this BTF verification failure can cause the netfilter_bpf initialization to fail, causing netfilter_core to free the netfilter_helper hashmap and netfilter_ftp to trigger a use-after-free. The risk of u-a-f in netfilter will be addressed separately but the existence of "asan.module_ctor" debug info under some build conditions sounds like a good enough reason to accept functions that contain dots in BTF. Although using only LLVM=1 is the recommended way to compile clang-based kernels, users can certainly do LLVM=1, LLVM_IAS=0 as well and we still try to support that combination according to Nick. To clarify: - > v5.10 kernel, LLVM=1 (LLVM_IAS=0 is not the default) is recommended, but user can still have LLVM=1, LLVM_IAS=0 to trigger the issue - <= 5.10 kernel, LLVM=1 (LLVM_IAS=0 is the default) is recommended in which case GNU as will be used Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec") Signed-off-by: Florent Revest Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Cc: Yonghong Song Cc: Nick Desaulniers Link: https://lore.kernel.org/bpf/20230615145607.3469985-1-revest@chromium.org --- kernel/bpf/btf.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 6b682b8e4b50..72b32b7cd9cd 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -744,13 +744,12 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset) return offset < btf->hdr.str_len; } -static bool __btf_name_char_ok(char c, bool first, bool dot_ok) +static bool __btf_name_char_ok(char c, bool first) { if ((first ? !isalpha(c) : !isalnum(c)) && c != '_' && - ((c == '.' && !dot_ok) || - c != '.')) + c != '.') return false; return true; } @@ -767,20 +766,20 @@ static const char *btf_str_by_offset(const struct btf *btf, u32 offset) return NULL; } -static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok) +static bool __btf_name_valid(const struct btf *btf, u32 offset) { /* offset must be valid */ const char *src = btf_str_by_offset(btf, offset); const char *src_limit; - if (!__btf_name_char_ok(*src, true, dot_ok)) + if (!__btf_name_char_ok(*src, true)) return false; /* set a limit on identifier length */ src_limit = src + KSYM_NAME_LEN; src++; while (*src && src < src_limit) { - if (!__btf_name_char_ok(*src, false, dot_ok)) + if (!__btf_name_char_ok(*src, false)) return false; src++; } @@ -788,17 +787,14 @@ static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok) return !*src; } -/* Only C-style identifier is permitted. This can be relaxed if - * necessary. - */ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) { - return __btf_name_valid(btf, offset, false); + return __btf_name_valid(btf, offset); } static bool btf_name_valid_section(const struct btf *btf, u32 offset) { - return __btf_name_valid(btf, offset, true); + return __btf_name_valid(btf, offset); } static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) @@ -4422,7 +4418,7 @@ static s32 btf_var_check_meta(struct btf_verifier_env *env, } if (!t->name_off || - !__btf_name_valid(env->btf, t->name_off, true)) { + !__btf_name_valid(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } From db8eae6bc5c702d8e3ab2d0c6bb5976c131576eb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 18 Jun 2023 15:14:14 +0200 Subject: [PATCH 905/934] bpf: Force kprobe multi expected_attach_type for kprobe_multi link We currently allow to create perf link for program with expected_attach_type == BPF_TRACE_KPROBE_MULTI. This will cause crash when we call helpers like get_attach_cookie or get_func_ip in such program, because it will call the kprobe_multi's version (current->bpf_ctx context setup) of those helpers while it expects perf_link's current->bpf_ctx context setup. Making sure that we use BPF_TRACE_KPROBE_MULTI expected_attach_type only for programs attaching through kprobe_multi link. Fixes: ca74823c6e16 ("bpf: Add cookie support to programs attached with kprobe multi link") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230618131414.75649-1-jolsa@kernel.org --- kernel/bpf/syscall.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0c21d0d8efe4..f1c8733f76b8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3440,6 +3440,11 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, return prog->enforce_expected_attach_type && prog->expected_attach_type != attach_type ? -EINVAL : 0; + case BPF_PROG_TYPE_KPROBE: + if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && + attach_type != BPF_TRACE_KPROBE_MULTI) + return -EINVAL; + return 0; default: return 0; } From 82edd1bd7f98567928871e2a2a317724d35f0085 Mon Sep 17 00:00:00 2001 From: "Luke D. Jones" Date: Wed, 21 Jun 2023 20:57:15 +1200 Subject: [PATCH 906/934] ALSA: hda/realtek: Add quirk for ASUS ROG GV601V Adds the required quirk to enable the Cirrus amp and correct pins on the ASUS ROG GV601V series. While this works if the related _DSD properties are made available, these aren't included in the ACPI of these laptops (yet). Signed-off-by: Luke D. Jones Link: https://lore.kernel.org/r/20230621085715.5382-1-luke@ljones.dev Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 17d5bdd4be6f..dabfdecece26 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9527,6 +9527,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1473, "ASUS GU604V", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1483, "ASUS GU603V", ALC285_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x1493, "ASUS GV601V", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1683, "ASUS UM3402YAR", ALC287_FIXUP_CS35L41_I2C_2), From b1dc492087db0f2e5a45f1072a743d04618dd6be Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 19 Jun 2023 09:35:34 -0600 Subject: [PATCH 907/934] io_uring/net: clear msg_controllen on partial sendmsg retry If we have cmsg attached AND we transferred partial data at least, clear msg_controllen on retry so we don't attempt to send that again. Cc: stable@vger.kernel.org # 5.10+ Fixes: cac9e4418f4c ("io_uring/net: save msghdr->msg_control for retries") Reported-by: Stefan Metzmacher Reviewed-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- io_uring/net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/net.c b/io_uring/net.c index 51b0f7fbb4f5..c0924ab1ea11 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -326,6 +326,8 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) return io_setup_async_msg(req, kmsg, issue_flags); if (ret > 0 && io_net_retry(sock, flags)) { + kmsg->msg.msg_controllen = 0; + kmsg->msg.msg_control = NULL; sr->done_io += ret; req->flags |= REQ_F_PARTIAL_IO; return io_setup_async_msg(req, kmsg, issue_flags); From 78d0d2063bab954d19a1696feae4c7706a626d48 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 19 Jun 2023 09:41:05 -0600 Subject: [PATCH 908/934] io_uring/net: disable partial retries for recvmsg with cmsg We cannot sanely handle partial retries for recvmsg if we have cmsg attached. If we don't, then we'd just be overwriting the initial cmsg header on retries. Alternatively we could increment and handle this appropriately, but it doesn't seem worth the complication. Move the MSG_WAITALL check into the non-multishot case while at it, since MSG_WAITALL is explicitly disabled for multishot anyway. Link: https://lore.kernel.org/io-uring/0b0d4411-c8fd-4272-770b-e030af6919a0@kernel.dk/ Cc: stable@vger.kernel.org # 5.10+ Reported-by: Stefan Metzmacher Reviewed-by: Stefan Metzmacher Signed-off-by: Jens Axboe --- io_uring/net.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index c0924ab1ea11..2bc2cb2f4d6c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -789,16 +789,19 @@ retry_multishot: flags = sr->msg_flags; if (force_nonblock) flags |= MSG_DONTWAIT; - if (flags & MSG_WAITALL) - min_ret = iov_iter_count(&kmsg->msg.msg_iter); kmsg->msg.msg_get_inq = 1; - if (req->flags & REQ_F_APOLL_MULTISHOT) + if (req->flags & REQ_F_APOLL_MULTISHOT) { ret = io_recvmsg_multishot(sock, sr, kmsg, flags, &mshot_finished); - else + } else { + /* disable partial retry for recvmsg with cmsg attached */ + if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) + min_ret = iov_iter_count(&kmsg->msg.msg_iter); + ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags); + } if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { From 26fed83653d0154704cadb7afc418f315c7ac1f0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 20 Jun 2023 16:11:51 -0600 Subject: [PATCH 909/934] io_uring/net: use the correct msghdr union member in io_sendmsg_copy_hdr Rather than assign the user pointer to msghdr->msg_control, assign it to msghdr->msg_control_user to make sparse happy. They are in a union so the end result is the same, but let's avoid new sparse warnings and squash this one. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202306210654.mDMcyMuB-lkp@intel.com/ Fixes: cac9e4418f4c ("io_uring/net: save msghdr->msg_control for retries") Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- io_uring/net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 2bc2cb2f4d6c..c8a4b2ac00f7 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -203,7 +203,7 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags, &iomsg->free_iov); /* save msg_control as sys_sendmsg() overwrites it */ - sr->msg_control = iomsg->msg.msg_control; + sr->msg_control = iomsg->msg.msg_control_user; return ret; } @@ -302,7 +302,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (req_has_async_data(req)) { kmsg = req->async_data; - kmsg->msg.msg_control = sr->msg_control; + kmsg->msg.msg_control_user = sr->msg_control; } else { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) From 69cbeb61ff9093a9155cb19a36d633033f71093a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 21 Jun 2023 10:58:46 -0700 Subject: [PATCH 910/934] Revert "efi: random: refresh non-volatile random seed when RNG is initialized" This reverts commit e7b813b32a42a3a6281a4fd9ae7700a0257c1d50 (and the subsequent fix for it: 41a15855c1ee "efi: random: fix NULL-deref when refreshing seed"). It turns otu to cause non-deterministic boot stalls on at least a HP 6730b laptop. Reported-and-bisected-by: Sami Korkalainen Link: https://lore.kernel.org/all/GQUnKz2al3yke5mB2i1kp3SzNHjK8vi6KJEh7rnLrOQ24OrlljeCyeWveLW9pICEmB9Qc8PKdNt3w1t_g3-Uvxq1l8Wj67PpoMeWDoH8PKk=@proton.me/ Cc: Jason A. Donenfeld Cc: Bagas Sanjaya Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- drivers/firmware/efi/efi.c | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index abeff7dc0b58..34b9e7876538 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -361,24 +361,6 @@ static void __init efi_debugfs_init(void) static inline void efi_debugfs_init(void) {} #endif -static void refresh_nv_rng_seed(struct work_struct *work) -{ - u8 seed[EFI_RANDOM_SEED_SIZE]; - - get_random_bytes(seed, sizeof(seed)); - efi.set_variable(L"RandomSeed", &LINUX_EFI_RANDOM_SEED_TABLE_GUID, - EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS, sizeof(seed), seed); - memzero_explicit(seed, sizeof(seed)); -} -static int refresh_nv_rng_seed_notification(struct notifier_block *nb, unsigned long action, void *data) -{ - static DECLARE_WORK(work, refresh_nv_rng_seed); - schedule_work(&work); - return NOTIFY_DONE; -} -static struct notifier_block refresh_nv_rng_seed_nb = { .notifier_call = refresh_nv_rng_seed_notification }; - /* * We register the efi subsystem with the firmware subsystem and the * efivars subsystem with the efi subsystem, if the system was booted with @@ -451,9 +433,6 @@ static int __init efisubsys_init(void) platform_device_register_simple("efi_secret", 0, NULL, 0); #endif - if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE)) - execute_with_initialized_rng(&refresh_nv_rng_seed_nb); - return 0; err_remove_group: From c2b2ae3925b65070adb27d5a31a31c376f26dec7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Jun 2023 18:24:18 +0200 Subject: [PATCH 911/934] mptcp: handle correctly disconnect() failures Currently the mptcp code has assumes that disconnect() can fail only at mptcp_sendmsg_fastopen() time - to avoid a deadlock scenario - and don't even bother returning an error code. Soon mptcp_disconnect() will handle more error conditions: let's track them explicitly. As a bonus, explicitly annotate TCP-level disconnect as not failing: the mptcp code never blocks for event on the subflows. Fixes: 7d803344fdc3 ("mptcp: fix deadlock in fastopen error path") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Tested-by: Christoph Paasch Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 67311e7d5b21..86f8a7621aff 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1727,7 +1727,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) *copied_syn = 0; } else if (ret && ret != -EINPROGRESS) { - mptcp_disconnect(sk, 0); + /* The disconnect() op called by tcp_sendmsg_fastopen()/ + * __inet_stream_connect() can fail, due to looking check, + * see mptcp_disconnect(). + * Attempt it again outside the problematic scope. + */ + if (!mptcp_disconnect(sk, 0)) + sk->sk_socket->state = SS_UNCONNECTED; } inet_sk(sk)->defer_connect = 0; @@ -2389,7 +2395,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { - tcp_disconnect(ssk, 0); + /* The MPTCP code never wait on the subflow sockets, TCP-level + * disconnect should never fail + */ + WARN_ON_ONCE(tcp_disconnect(ssk, 0)); msk->subflow->state = SS_UNCONNECTED; mptcp_subflow_ctx_reset(subflow); release_sock(ssk); @@ -2812,7 +2821,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) break; fallthrough; case TCP_SYN_SENT: - tcp_disconnect(ssk, O_NONBLOCK); + WARN_ON_ONCE(tcp_disconnect(ssk, O_NONBLOCK)); break; default: if (__mptcp_check_fallback(mptcp_sk(sk))) { @@ -3075,11 +3084,10 @@ static int mptcp_disconnect(struct sock *sk, int flags) /* We are on the fastopen error path. We can't call straight into the * subflows cleanup code due to lock nesting (we are already under - * msk->firstsocket lock). Do nothing and leave the cleanup to the - * caller. + * msk->firstsocket lock). */ if (msk->fastopening) - return 0; + return -EBUSY; mptcp_listen_inuse_dec(sk); inet_sk_state_store(sk, TCP_CLOSE); From 0ad529d9fd2bfa3fc619552a8d2fb2f2ef0bce2e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Jun 2023 18:24:19 +0200 Subject: [PATCH 912/934] mptcp: fix possible divide by zero in recvmsg() Christoph reported a divide by zero bug in mptcp_recvmsg(): divide error: 0000 [#1] PREEMPT SMP CPU: 1 PID: 19978 Comm: syz-executor.6 Not tainted 6.4.0-rc2-gffcc7899081b #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 RIP: 0010:__tcp_select_window+0x30e/0x420 net/ipv4/tcp_output.c:3018 Code: 11 ff 0f b7 cd c1 e9 0c b8 ff ff ff ff d3 e0 89 c1 f7 d1 01 cb 21 c3 eb 17 e8 2e 83 11 ff 31 db eb 0e e8 25 83 11 ff 89 d8 99 7c 24 04 29 d3 65 48 8b 04 25 28 00 00 00 48 3b 44 24 10 75 60 RSP: 0018:ffffc90000a07a18 EFLAGS: 00010246 RAX: 000000000000ffd7 RBX: 000000000000ffd7 RCX: 0000000000040000 RDX: 0000000000000000 RSI: 000000000003ffff RDI: 0000000000040000 RBP: 000000000000ffd7 R08: ffffffff820cf297 R09: 0000000000000001 R10: 0000000000000000 R11: ffffffff8103d1a0 R12: 0000000000003f00 R13: 0000000000300000 R14: ffff888101cf3540 R15: 0000000000180000 FS: 00007f9af4c09640(0000) GS:ffff88813bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b33824000 CR3: 000000012f241001 CR4: 0000000000170ee0 Call Trace: __tcp_cleanup_rbuf+0x138/0x1d0 net/ipv4/tcp.c:1611 mptcp_recvmsg+0xcb8/0xdd0 net/mptcp/protocol.c:2034 inet_recvmsg+0x127/0x1f0 net/ipv4/af_inet.c:861 ____sys_recvmsg+0x269/0x2b0 net/socket.c:1019 ___sys_recvmsg+0xe6/0x260 net/socket.c:2764 do_recvmmsg+0x1a5/0x470 net/socket.c:2858 __do_sys_recvmmsg net/socket.c:2937 [inline] __se_sys_recvmmsg net/socket.c:2953 [inline] __x64_sys_recvmmsg+0xa6/0x130 net/socket.c:2953 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x47/0xa0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f9af58fc6a9 Code: 5c c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 4f 37 0d 00 f7 d8 64 89 01 48 RSP: 002b:00007f9af4c08cd8 EFLAGS: 00000246 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 00000000006bc050 RCX: 00007f9af58fc6a9 RDX: 0000000000000001 RSI: 0000000020000140 RDI: 0000000000000004 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000f00 R11: 0000000000000246 R12: 00000000006bc05c R13: fffffffffffffea8 R14: 00000000006bc050 R15: 000000000001fe40 mptcp_recvmsg is allowed to release the msk socket lock when blocking, and before re-acquiring it another thread could have switched the sock to TCP_LISTEN status - with a prior connect(AF_UNSPEC) - also clearing icsk_ack.rcv_mss. Address the issue preventing the disconnect if some other process is concurrently performing a blocking syscall on the same socket, alike commit 4faeee0cf8a5 ("tcp: deny tcp_disconnect() when threads are waiting"). Fixes: a6b118febbab ("mptcp: add receive buffer auto-tuning") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/404 Signed-off-by: Paolo Abeni Tested-by: Christoph Paasch Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 86f8a7621aff..ee357700b27b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3082,6 +3082,12 @@ static int mptcp_disconnect(struct sock *sk, int flags) { struct mptcp_sock *msk = mptcp_sk(sk); + /* Deny disconnect if other threads are blocked in sk_wait_event() + * or inet_wait_for_connect(). + */ + if (sk->sk_wait_pending) + return -EBUSY; + /* We are on the fastopen error path. We can't call straight into the * subflows cleanup code due to lock nesting (we are already under * msk->firstsocket lock). @@ -3148,6 +3154,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif + nsk->sk_wait_pending = 0; __mptcp_init_sock(nsk); msk = mptcp_sk(nsk); From 56a666c48b038e91b76471289e2cf60c79d326b9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Jun 2023 18:24:20 +0200 Subject: [PATCH 913/934] mptcp: fix possible list corruption on passive MPJ At passive MPJ time, if the msk socket lock is held by the user, the new subflow is appended to the msk->join_list under the msk data lock. In mptcp_release_cb()/__mptcp_flush_join_list(), the subflows in that list are moved from the join_list into the conn_list under the msk socket lock. Append and removal could race, possibly corrupting such list. Address the issue splicing the join list into a temporary one while still under the msk data lock. Found by code inspection, the race itself should be almost impossible to trigger in practice. Fixes: 3e5014909b56 ("mptcp: cleanup MPJ subflow list handling") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index ee357700b27b..9a40dae31cec 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -850,12 +850,12 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) return true; } -static void __mptcp_flush_join_list(struct sock *sk) +static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list) { struct mptcp_subflow_context *tmp, *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) { + list_for_each_entry_safe(subflow, tmp, join_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast(ssk); @@ -3342,9 +3342,14 @@ static void mptcp_release_cb(struct sock *sk) for (;;) { unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | msk->push_pending; + struct list_head join_list; + if (!flags) break; + INIT_LIST_HEAD(&join_list); + list_splice_init(&msk->join_list, &join_list); + /* the following actions acquire the subflow socket lock * * 1) can't be invoked in atomic scope @@ -3355,8 +3360,9 @@ static void mptcp_release_cb(struct sock *sk) msk->push_pending = 0; msk->cb_flags &= ~flags; spin_unlock_bh(&sk->sk_lock.slock); + if (flags & BIT(MPTCP_FLUSH_JOIN_LIST)) - __mptcp_flush_join_list(sk); + __mptcp_flush_join_list(sk, &join_list); if (flags & BIT(MPTCP_PUSH_PENDING)) __mptcp_push_pending(sk, 0); if (flags & BIT(MPTCP_RETRANSMIT)) From 81c1d029016001f994ce1c46849c5e9900d8eab8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Jun 2023 18:24:21 +0200 Subject: [PATCH 914/934] mptcp: consolidate fallback and non fallback state machine An orphaned msk releases the used resources via the worker, when the latter first see the msk in CLOSED status. If the msk status transitions to TCP_CLOSE in the release callback invoked by the worker's final release_sock(), such instance of the workqueue will not take any action. Additionally the MPTCP code prevents scheduling the worker once the socket reaches the CLOSE status: such msk resources will be leaked. The only code path that can trigger the above scenario is the __mptcp_check_send_data_fin() in fallback mode. Address the issue removing the special handling of fallback socket in __mptcp_check_send_data_fin(), consolidating the state machine for fallback and non fallback socket. Since non-fallback sockets do not send and do not receive data_fin, the mptcp code can update the msk internal status to match the next step in the SM every time data fin (ack) should be generated or received. As a consequence we can remove a bunch of checks for fallback from the fastpath. Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 41 +++++++++++++++-------------------------- net/mptcp/subflow.c | 17 ++++++++++------- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9a40dae31cec..27d206f7af62 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -44,7 +44,7 @@ enum { static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp; static void __mptcp_destroy_sock(struct sock *sk); -static void __mptcp_check_send_data_fin(struct sock *sk); +static void mptcp_check_send_data_fin(struct sock *sk); DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); static struct net_device mptcp_napi_dev; @@ -424,8 +424,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - return !__mptcp_check_fallback(msk) && - ((1 << sk->sk_state) & + return ((1 << sk->sk_state) & (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) && msk->write_seq == READ_ONCE(msk->snd_una); } @@ -583,9 +582,6 @@ static bool mptcp_check_data_fin(struct sock *sk) u64 rcv_data_fin_seq; bool ret = false; - if (__mptcp_check_fallback(msk)) - return ret; - /* Need to ack a DATA_FIN received from a peer while this side * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2. * msk->rcv_data_fin was set when parsing the incoming options @@ -623,7 +619,8 @@ static bool mptcp_check_data_fin(struct sock *sk) } ret = true; - mptcp_send_ack(msk); + if (!__mptcp_check_fallback(msk)) + mptcp_send_ack(msk); mptcp_close_wake_up(sk); } return ret; @@ -1609,7 +1606,7 @@ out: if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); if (do_check_data_fin) - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); } static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first) @@ -2680,8 +2677,6 @@ static void mptcp_worker(struct work_struct *work) if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN))) goto unlock; - mptcp_check_data_fin_ack(sk); - mptcp_check_fastclose(msk); mptcp_pm_nl_work(msk); @@ -2689,7 +2684,8 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) mptcp_check_for_eof(msk); - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); + mptcp_check_data_fin_ack(sk); mptcp_check_data_fin(sk); if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) @@ -2828,6 +2824,12 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) pr_debug("Fallback"); ssk->sk_shutdown |= how; tcp_shutdown(ssk, how); + + /* simulate the data_fin ack reception to let the state + * machine move forward + */ + WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt); + mptcp_schedule_work(sk); } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); tcp_send_ack(ssk); @@ -2867,7 +2869,7 @@ static int mptcp_close_state(struct sock *sk) return next & TCP_ACTION_FIN; } -static void __mptcp_check_send_data_fin(struct sock *sk) +static void mptcp_check_send_data_fin(struct sock *sk) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); @@ -2885,19 +2887,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk) WRITE_ONCE(msk->snd_nxt, msk->write_seq); - /* fallback socket will not get data_fin/ack, can move to the next - * state now - */ - if (__mptcp_check_fallback(msk)) { - WRITE_ONCE(msk->snd_una, msk->write_seq); - if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) { - inet_sk_state_store(sk, TCP_CLOSE); - mptcp_close_wake_up(sk); - } else if (sk->sk_state == TCP_FIN_WAIT1) { - inet_sk_state_store(sk, TCP_FIN_WAIT2); - } - } - mptcp_for_each_subflow(msk, subflow) { struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); @@ -2917,7 +2906,7 @@ static void __mptcp_wr_shutdown(struct sock *sk) WRITE_ONCE(msk->write_seq, msk->write_seq + 1); WRITE_ONCE(msk->snd_data_fin_enable, 1); - __mptcp_check_send_data_fin(sk); + mptcp_check_send_data_fin(sk); } static void __mptcp_destroy_sock(struct sock *sk) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4688daa6b38b..d9c8b21c6076 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1749,14 +1749,16 @@ static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; + struct mptcp_sock *msk; __subflow_state_change(sk); + msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { mptcp_propagate_sndbuf(parent, sk); mptcp_do_fallback(sk); - mptcp_rcv_space_init(mptcp_sk(parent), sk); - pr_fallback(mptcp_sk(parent)); + mptcp_rcv_space_init(msk, sk); + pr_fallback(msk); subflow->conn_finished = 1; mptcp_set_connected(parent); } @@ -1772,11 +1774,12 @@ static void subflow_state_change(struct sock *sk) subflow_sched_work_if_closed(mptcp_sk(parent), sk); - if (__mptcp_check_fallback(mptcp_sk(parent)) && - !subflow->rx_eof && subflow_is_done(sk)) { - subflow->rx_eof = 1; - mptcp_subflow_eof(parent); - } + /* when the fallback subflow closes the rx side, trigger a 'dummy' + * ingress data fin, so that the msk state will follow along + */ + if (__mptcp_check_fallback(msk) && subflow_is_done(sk) && msk->first == sk && + mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true)) + mptcp_schedule_work(parent); } void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) From b7535cfed223a9f02f9530853616f197b386d775 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Jun 2023 18:24:22 +0200 Subject: [PATCH 915/934] mptcp: drop legacy code around RX EOF Thanks to the previous patch -- "mptcp: consolidate fallback and non fallback state machine" -- we can finally drop the "temporary hack" used to detect rx eof. Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 49 -------------------------------------------- net/mptcp/protocol.h | 5 +---- 2 files changed, 1 insertion(+), 53 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 27d206f7af62..a66ec341485e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -894,49 +894,6 @@ bool mptcp_schedule_work(struct sock *sk) return false; } -void mptcp_subflow_eof(struct sock *sk) -{ - if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags)) - mptcp_schedule_work(sk); -} - -static void mptcp_check_for_eof(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow; - struct sock *sk = (struct sock *)msk; - int receivers = 0; - - mptcp_for_each_subflow(msk, subflow) - receivers += !subflow->rx_eof; - if (receivers) - return; - - if (!(sk->sk_shutdown & RCV_SHUTDOWN)) { - /* hopefully temporary hack: propagate shutdown status - * to msk, when all subflows agree on it - */ - WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); - - smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ - sk->sk_data_ready(sk); - } - - switch (sk->sk_state) { - case TCP_ESTABLISHED: - inet_sk_state_store(sk, TCP_CLOSE_WAIT); - break; - case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_CLOSING); - break; - case TCP_FIN_WAIT2: - inet_sk_state_store(sk, TCP_CLOSE); - break; - default: - return; - } - mptcp_close_wake_up(sk); -} - static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -2161,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, break; } - if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) - mptcp_check_for_eof(msk); - if (sk->sk_shutdown & RCV_SHUTDOWN) { /* race breaker: the shutdown could be after the * previous receive queue check @@ -2681,9 +2635,6 @@ static void mptcp_worker(struct work_struct *work) mptcp_pm_nl_work(msk); - if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) - mptcp_check_for_eof(msk); - mptcp_check_send_data_fin(sk); mptcp_check_data_fin_ack(sk); mptcp_check_data_fin(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 70c957bc56a8..d3783a7056e1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -113,7 +113,6 @@ /* MPTCP socket atomic flags */ #define MPTCP_NOSPACE 1 #define MPTCP_WORK_RTX 2 -#define MPTCP_WORK_EOF 3 #define MPTCP_FALLBACK_DONE 4 #define MPTCP_WORK_CLOSE_SUBFLOW 5 @@ -476,14 +475,13 @@ struct mptcp_subflow_context { send_mp_fail : 1, send_fastclose : 1, send_infinite_map : 1, - rx_eof : 1, remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 8; + __unused : 9; enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; @@ -720,7 +718,6 @@ static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit) void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void __mptcp_error_report(struct sock *sk); -void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) { From 57fc0f1ceaa4016354cf6f88533e20b56190e41a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Jun 2023 18:24:23 +0200 Subject: [PATCH 916/934] mptcp: ensure listener is unhashed before updating the sk status The MPTCP protocol access the listener subflow in a lockless manner in a couple of places (poll, diag). That works only if the msk itself leaves the listener status only after that the subflow itself has been closed/disconnected. Otherwise we risk deadlock in diag, as reported by Christoph. Address the issue ensuring that the first subflow (the listener one) is always disconnected before updating the msk socket status. Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/407 Fixes: b29fcfb54cd7 ("mptcp: full disconnect implementation") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 1 + net/mptcp/protocol.c | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 59f8f3124855..1224dfca5bf3 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1047,6 +1047,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err; + inet_sk_state_store(newsk, TCP_LISTEN); err = kernel_listen(ssock, backlog); if (err) return err; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a66ec341485e..a6c7f2d24909 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2368,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, kfree_rcu(subflow, rcu); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ - if (ssk->sk_state == TCP_LISTEN) { - tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(sk, ssk); - inet_csk_listen_stop(ssk); - mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); - } - __tcp_close(ssk, 0); /* close acquired an extra ref */ @@ -2902,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk) return EPOLLIN | EPOLLRDNORM; } -static void mptcp_listen_inuse_dec(struct sock *sk) +static void mptcp_check_listen_stop(struct sock *sk) { - if (inet_sk_state_load(sk) == TCP_LISTEN) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + struct sock *ssk; + + if (inet_sk_state_load(sk) != TCP_LISTEN) + return; + + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + ssk = mptcp_sk(sk)->first; + if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN)) + return; + + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + mptcp_subflow_queue_clean(sk, ssk); + inet_csk_listen_stop(ssk); + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED); + tcp_set_state(ssk, TCP_CLOSE); + release_sock(ssk); } bool __mptcp_close(struct sock *sk, long timeout) @@ -2918,7 +2925,7 @@ bool __mptcp_close(struct sock *sk, long timeout) WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { - mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); goto cleanup; } @@ -3035,7 +3042,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) if (msk->fastopening) return -EBUSY; - mptcp_listen_inuse_dec(sk); + mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_timer(sk); From 7f4e09700bdc13ce9aafa279bc999051e9bcda35 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 21 Jun 2023 14:05:44 +0200 Subject: [PATCH 917/934] wifi: mac80211: report all unusable beacon frames Properly check for RX_DROP_UNUSABLE now that the new drop reason infrastructure is used. Without this change, the comparison will always be false as a more specific reason is given in the lower bits of result. Fixes: baa951a1c177 ("mac80211: use the new drop reasons infrastructure") Signed-off-by: Benjamin Berg Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20230621120543.412920-2-johannes@sipsolutions.net Signed-off-by: Jakub Kicinski --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d996aa2579df..fc6e130364da 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2110,7 +2110,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* either the frame has been decrypted or will be dropped */ status->flag |= RX_FLAG_DECRYPTED; - if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE && + if (unlikely(ieee80211_is_beacon(fc) && (result & RX_DROP_UNUSABLE) && rx->sdata->dev)) cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, skb->data, skb->len); From c7c059fba6fb19c3bc924925c984772e733cb594 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 20 Jun 2023 14:45:15 +0200 Subject: [PATCH 918/934] selftests: forwarding: Fix race condition in mirror installation When mirroring to a gretap in hardware the device expects to be programmed with the egress port and all the encapsulating headers. This requires the driver to resolve the path the packet will take in the software data path and program the device accordingly. If the path cannot be resolved (in this case because of an unresolved neighbor), then mirror installation fails until the path is resolved. This results in a race that causes the test to sometimes fail. Fix this by setting the neighbor's state to permanent in a couple of tests, so that it is always valid. Fixes: 35c31d5c323f ("selftests: forwarding: Test mirror-to-gretap w/ UL 802.1d") Fixes: 239e754af854 ("selftests: forwarding: Test mirror-to-gretap w/ UL 802.1q") Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/268816ac729cb6028c7a34d4dda6f4ec7af55333.1687264607.git.petrm@nvidia.com Signed-off-by: Paolo Abeni --- .../testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh | 4 ++++ .../testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh index c5095da7f6bf..aec752a22e9e 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh @@ -93,12 +93,16 @@ cleanup() test_gretap() { + ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \ + nud permanent dev br2 full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap" full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" } test_ip6gretap() { + ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \ + nud permanent dev br2 full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap" full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh index 9ff22f28032d..0cf4c47a46f9 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh @@ -90,12 +90,16 @@ cleanup() test_gretap() { + ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \ + nud permanent dev br1 full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap" full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" } test_ip6gretap() { + ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \ + nud permanent dev br1 full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap" full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } From 146b6f6855e7656e8329910606595220c761daac Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Thu, 22 Jun 2023 11:33:09 +0530 Subject: [PATCH 919/934] platform/x86/amd/pmf: Register notify handler only if SPS is enabled Power source notify handler is getting registered even when none of the PMF feature in enabled leading to a crash. ... [ 22.592162] Call Trace: [ 22.592164] [ 22.592164] ? rcu_note_context_switch+0x5e0/0x660 [ 22.592166] ? __warn+0x81/0x130 [ 22.592171] ? rcu_note_context_switch+0x5e0/0x660 [ 22.592172] ? report_bug+0x171/0x1a0 [ 22.592175] ? prb_read_valid+0x1b/0x30 [ 22.592177] ? handle_bug+0x3c/0x80 [ 22.592178] ? exc_invalid_op+0x17/0x70 [ 22.592179] ? asm_exc_invalid_op+0x1a/0x20 [ 22.592182] ? rcu_note_context_switch+0x5e0/0x660 [ 22.592183] ? acpi_ut_delete_object_desc+0x86/0xb0 [ 22.592186] ? acpi_ut_update_ref_count.part.0+0x22d/0x930 [ 22.592187] __schedule+0xc0/0x1410 [ 22.592189] ? ktime_get+0x3c/0xa0 [ 22.592191] ? lapic_next_event+0x1d/0x30 [ 22.592193] ? hrtimer_start_range_ns+0x25b/0x350 [ 22.592196] schedule+0x5e/0xd0 [ 22.592197] schedule_hrtimeout_range_clock+0xbe/0x140 [ 22.592199] ? __pfx_hrtimer_wakeup+0x10/0x10 [ 22.592200] usleep_range_state+0x64/0x90 [ 22.592203] amd_pmf_send_cmd+0x106/0x2a0 [amd_pmf bddfe0fe3712aaa99acce3d5487405c5213c6616] [ 22.592207] amd_pmf_update_slider+0x56/0x1b0 [amd_pmf bddfe0fe3712aaa99acce3d5487405c5213c6616] [ 22.592210] amd_pmf_set_sps_power_limits+0x72/0x80 [amd_pmf bddfe0fe3712aaa99acce3d5487405c5213c6616] [ 22.592213] amd_pmf_pwr_src_notify_call+0x49/0x90 [amd_pmf bddfe0fe3712aaa99acce3d5487405c5213c6616] [ 22.592216] notifier_call_chain+0x5a/0xd0 [ 22.592218] atomic_notifier_call_chain+0x32/0x50 ... Fix this by moving the registration of source change notify handler only when SPS(Static Slider) is advertised as supported. Reported-by: Allen Zhong Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217571 Fixes: 4c71ae414474 ("platform/x86/amd/pmf: Add support SPS PMF feature") Tested-by: Patil Rajesh Reddy Reviewed-by: Mario Limonciello Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20230622060309.310001-1-Shyam-sundar.S-k@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmf/core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index ee5f124f78b6..7780705917b7 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -297,6 +297,8 @@ static void amd_pmf_init_features(struct amd_pmf_dev *dev) /* Enable Static Slider */ if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR)) { amd_pmf_init_sps(dev); + dev->pwr_src_notifier.notifier_call = amd_pmf_pwr_src_notify_call; + power_supply_reg_notifier(&dev->pwr_src_notifier); dev_dbg(dev->dev, "SPS enabled and Platform Profiles registered\n"); } @@ -315,8 +317,10 @@ static void amd_pmf_init_features(struct amd_pmf_dev *dev) static void amd_pmf_deinit_features(struct amd_pmf_dev *dev) { - if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR)) + if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR)) { + power_supply_unreg_notifier(&dev->pwr_src_notifier); amd_pmf_deinit_sps(dev); + } if (is_apmf_func_supported(dev, APMF_FUNC_AUTO_MODE)) { amd_pmf_deinit_auto_mode(dev); @@ -399,9 +403,6 @@ static int amd_pmf_probe(struct platform_device *pdev) apmf_install_handler(dev); amd_pmf_dbgfs_register(dev); - dev->pwr_src_notifier.notifier_call = amd_pmf_pwr_src_notify_call; - power_supply_reg_notifier(&dev->pwr_src_notifier); - dev_info(dev->dev, "registered PMF device successfully\n"); return 0; @@ -411,7 +412,6 @@ static void amd_pmf_remove(struct platform_device *pdev) { struct amd_pmf_dev *dev = platform_get_drvdata(pdev); - power_supply_unreg_notifier(&dev->pwr_src_notifier); amd_pmf_deinit_features(dev); apmf_acpi_deinit(dev); amd_pmf_dbgfs_unregister(dev); From 2174a08db80d1efeea382e25ac41c4e7511eb6d6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Jun 2023 18:44:25 +0000 Subject: [PATCH 920/934] sch_netem: acquire qdisc lock in netem_change() syzbot managed to trigger a divide error [1] in netem. It could happen if q->rate changes while netem_enqueue() is running, since q->rate is read twice. It turns out netem_change() always lacked proper synchronization. [1] divide error: 0000 [#1] SMP KASAN CPU: 1 PID: 7867 Comm: syz-executor.1 Not tainted 6.1.30-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/25/2023 RIP: 0010:div64_u64 include/linux/math64.h:69 [inline] RIP: 0010:packet_time_ns net/sched/sch_netem.c:357 [inline] RIP: 0010:netem_enqueue+0x2067/0x36d0 net/sched/sch_netem.c:576 Code: 89 e2 48 69 da 00 ca 9a 3b 42 80 3c 28 00 4c 8b a4 24 88 00 00 00 74 0d 4c 89 e7 e8 c3 4f 3b fd 48 8b 4c 24 18 48 89 d8 31 d2 <49> f7 34 24 49 01 c7 4c 8b 64 24 48 4d 01 f7 4c 89 e3 48 c1 eb 03 RSP: 0018:ffffc9000dccea60 EFLAGS: 00010246 RAX: 000001a442624200 RBX: 000001a442624200 RCX: ffff888108a4f000 RDX: 0000000000000000 RSI: 000000000000070d RDI: 000000000000070d RBP: ffffc9000dcceb90 R08: ffffffff849c5e26 R09: fffffbfff10e1297 R10: 0000000000000000 R11: dffffc0000000001 R12: ffff888108a4f358 R13: dffffc0000000000 R14: 0000001a8cd9a7ec R15: 0000000000000000 FS: 00007fa73fe18700(0000) GS:ffff8881f6b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa73fdf7718 CR3: 000000011d36e000 CR4: 0000000000350ee0 Call Trace: [] __dev_xmit_skb net/core/dev.c:3931 [inline] [] __dev_queue_xmit+0xcf5/0x3370 net/core/dev.c:4290 [] dev_queue_xmit include/linux/netdevice.h:3030 [inline] [] neigh_hh_output include/net/neighbour.h:531 [inline] [] neigh_output include/net/neighbour.h:545 [inline] [] ip_finish_output2+0xb92/0x10d0 net/ipv4/ip_output.c:235 [] __ip_finish_output+0xc3/0x2b0 [] ip_finish_output+0x31/0x2a0 net/ipv4/ip_output.c:323 [] NF_HOOK_COND include/linux/netfilter.h:298 [inline] [] ip_output+0x224/0x2a0 net/ipv4/ip_output.c:437 [] dst_output include/net/dst.h:444 [inline] [] ip_local_out net/ipv4/ip_output.c:127 [inline] [] __ip_queue_xmit+0x1425/0x2000 net/ipv4/ip_output.c:542 [] ip_queue_xmit+0x4c/0x70 net/ipv4/ip_output.c:556 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Stephen Hemminger Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Reviewed-by: Jamal Hadi Salim Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230620184425.1179809-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/sched/sch_netem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6ef3021e1169..e79be1b3e74d 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -966,6 +966,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, if (ret < 0) return ret; + sch_tree_lock(sch); /* backup q->clg and q->loss_model */ old_clg = q->clg; old_loss_model = q->loss_model; @@ -974,7 +975,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]); if (ret) { q->loss_model = old_loss_model; - return ret; + goto unlock; } } else { q->loss_model = CLG_RANDOM; @@ -1041,6 +1042,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, /* capping jitter to the range acceptable by tabledist() */ q->jitter = min_t(s64, abs(q->jitter), INT_MAX); +unlock: + sch_tree_unlock(sch); return ret; get_table_failure: @@ -1050,7 +1053,8 @@ get_table_failure: */ q->clg = old_clg; q->loss_model = old_loss_model; - return ret; + + goto unlock; } static int netem_init(struct Qdisc *sch, struct nlattr *opt, From dec24b3b339487e58ce2da2875e9ee0316cc7e70 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 20 Jun 2023 12:42:38 -0700 Subject: [PATCH 921/934] net: wwan: iosm: Convert single instance struct member to flexible array struct mux_adth actually ends with multiple struct mux_adth_dg members. This is seen both in the comments about the member: /** * struct mux_adth - Structure of the Aggregated Datagram Table Header. ... * @dg: datagramm table with variable length */ and in the preparation for populating it: adth_dg_size = offsetof(struct mux_adth, dg) + ul_adb->dg_count[i] * sizeof(*dg); ... adth_dg_size -= offsetof(struct mux_adth, dg); memcpy(&adth->dg, ul_adb->dg[i], adth_dg_size); This was reported as a run-time false positive warning: memcpy: detected field-spanning write (size 16) of single field "&adth->dg" at drivers/net/wwan/iosm/iosm_ipc_mux_codec.c:852 (size 8) Adjust the struct mux_adth definition and associated sizeof() math; no binary output differences are observed in the resulting object file. Reported-by: Florian Klink Closes: https://lore.kernel.org/lkml/dbfa25f5-64c8-5574-4f5d-0151ba95d232@gmail.com/ Fixes: 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support") Cc: M Chetan Kumar Cc: Bagas Sanjaya Cc: Intel Corporation Cc: Loic Poulain Cc: Sergey Ryazanov Cc: Johannes Berg Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: "Gustavo A. R. Silva" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230620194234.never.023-kees@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 15 ++++++--------- drivers/net/wwan/iosm/iosm_ipc_mux_codec.h | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index d6b166fc5c0e..bff46f7ca59f 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -626,14 +626,12 @@ static void mux_dl_adb_decode(struct iosm_mux *ipc_mux, if (adth->signature != cpu_to_le32(IOSM_AGGR_MUX_SIG_ADTH)) goto adb_decode_err; - if (le16_to_cpu(adth->table_length) < (sizeof(struct mux_adth) - - sizeof(struct mux_adth_dg))) + if (le16_to_cpu(adth->table_length) < sizeof(struct mux_adth)) goto adb_decode_err; /* Calculate the number of datagrams. */ nr_of_dg = (le16_to_cpu(adth->table_length) - - sizeof(struct mux_adth) + - sizeof(struct mux_adth_dg)) / + sizeof(struct mux_adth)) / sizeof(struct mux_adth_dg); /* Is the datagram table empty ? */ @@ -649,7 +647,7 @@ static void mux_dl_adb_decode(struct iosm_mux *ipc_mux, } /* New aggregated datagram table. */ - dg = &adth->dg; + dg = adth->dg; if (mux_dl_process_dg(ipc_mux, adbh, dg, skb, if_id, nr_of_dg) < 0) goto adb_decode_err; @@ -849,7 +847,7 @@ static void ipc_mux_ul_encode_adth(struct iosm_mux *ipc_mux, adth->if_id = i; adth->table_length = cpu_to_le16(adth_dg_size); adth_dg_size -= offsetof(struct mux_adth, dg); - memcpy(&adth->dg, ul_adb->dg[i], adth_dg_size); + memcpy(adth->dg, ul_adb->dg[i], adth_dg_size); ul_adb->if_cnt++; } @@ -1426,14 +1424,13 @@ static int ipc_mux_get_payload_from_adb(struct iosm_mux *ipc_mux, if (adth->signature == cpu_to_le32(IOSM_AGGR_MUX_SIG_ADTH)) { nr_of_dg = (le16_to_cpu(adth->table_length) - - sizeof(struct mux_adth) + - sizeof(struct mux_adth_dg)) / + sizeof(struct mux_adth)) / sizeof(struct mux_adth_dg); if (nr_of_dg <= 0) return payload_size; - dg = &adth->dg; + dg = adth->dg; for (i = 0; i < nr_of_dg; i++, dg++) { if (le32_to_cpu(dg->datagram_index) < diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h index 5d4e3b89542c..f8df88f816c4 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h @@ -161,7 +161,7 @@ struct mux_adth { u8 opt_ipv4v6; __le32 next_table_index; __le32 reserved2; - struct mux_adth_dg dg; + struct mux_adth_dg dg[]; }; /** From a9628e88776eb7d045cf46467f1afdd0f7fe72ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Sun, 18 Jun 2023 03:31:30 -0700 Subject: [PATCH 922/934] revert "net: align SO_RCVMARK required privileges with SO_MARK" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 1f86123b9749 ("net: align SO_RCVMARK required privileges with SO_MARK") because the reasoning in the commit message is not really correct: SO_RCVMARK is used for 'reading' incoming skb mark (via cmsg), as such it is more equivalent to 'getsockopt(SO_MARK)' which has no priv check and retrieves the socket mark, rather than 'setsockopt(SO_MARK) which sets the socket mark and does require privs. Additionally incoming skb->mark may already be visible if sysctl_fwmark_reflect and/or sysctl_tcp_fwmark_accept are enabled. Furthermore, it is easier to block the getsockopt via bpf (either cgroup setsockopt hook, or via syscall filters) then to unblock it if it requires CAP_NET_RAW/ADMIN. On Android the socket mark is (among other things) used to store the network identifier a socket is bound to. Setting it is privileged, but retrieving it is not. We'd like unprivileged userspace to be able to read the network id of incoming packets (where mark is set via iptables [to be moved to bpf])... An alternative would be to add another sysctl to control whether setting SO_RCVMARK is privilged or not. (or even a MASK of which bits in the mark can be exposed) But this seems like over-engineering... Note: This is a non-trivial revert, due to later merged commit e42c7beee71d ("bpf: net: Consider has_current_bpf_ctx() when testing capable() in sk_setsockopt()") which changed both 'ns_capable' into 'sockopt_ns_capable' calls. Fixes: 1f86123b9749 ("net: align SO_RCVMARK required privileges with SO_MARK") Cc: Larysa Zaremba Cc: Simon Horman Cc: Paolo Abeni Cc: Eyal Birger Cc: Jakub Kicinski Cc: Eric Dumazet Cc: Patrick Rohr Signed-off-by: Maciej Żenczykowski Reviewed-by: Simon Horman Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230618103130.51628-1-maze@google.com Signed-off-by: Paolo Abeni --- net/core/sock.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 24f2761bdb1d..6e5662ca00fe 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1362,12 +1362,6 @@ set_sndbuf: __sock_set_mark(sk, val); break; case SO_RCVMARK: - if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { - ret = -EPERM; - break; - } - sock_valbool_flag(sk, SOCK_RCVMARK, valbool); break; From 9c39b7a905d84b7da5f59d80f2e455853fea7217 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 22 Jun 2023 16:42:49 +0800 Subject: [PATCH 923/934] block: make sure local irq is disabled when calling __blkcg_rstat_flush When __blkcg_rstat_flush() is called from cgroup_rstat_flush*() code path, interrupt is always disabled. When we start to flush blkcg per-cpu stats list in __blkg_release() for avoiding to leak blkcg_gq's reference in commit 20cb1c2fb756 ("blk-cgroup: Flush stats before releasing blkcg_gq"), local irq isn't disabled yet, then lockdep warning may be triggered because the dependent cgroup locks may be acquired from irq(soft irq) handler. Fix the issue by disabling local irq always. Fixes: 20cb1c2fb756 ("blk-cgroup: Flush stats before releasing blkcg_gq") Reported-by: Shinichiro Kawasaki Closes: https://lore.kernel.org/linux-block/pz2wzwnmn5tk3pwpskmjhli6g3qly7eoknilb26of376c7kwxy@qydzpvt6zpis/T/#u Cc: stable@vger.kernel.org Cc: Jay Shin Cc: Tejun Heo Cc: Waiman Long Signed-off-by: Ming Lei Reviewed-by: Waiman Long Link: https://lore.kernel.org/r/20230622084249.1208005-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f0b5c9c41cde..dce1548a7a0c 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -970,6 +970,7 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu); struct llist_node *lnode; struct blkg_iostat_set *bisc, *next_bisc; + unsigned long flags; rcu_read_lock(); @@ -983,7 +984,7 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) * When flushing from cgroup, cgroup_rstat_lock is always held, so * this lock won't cause contention most of time. */ - raw_spin_lock(&blkg_stat_lock); + raw_spin_lock_irqsave(&blkg_stat_lock, flags); /* * Iterate only the iostat_cpu's queued in the lockless list. @@ -1009,7 +1010,7 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) blkcg_iostat_update(parent, &blkg->iostat.cur, &blkg->iostat.last); } - raw_spin_unlock(&blkg_stat_lock); + raw_spin_unlock_irqrestore(&blkg_stat_lock, flags); out: rcu_read_unlock(); } From cb091225a538005965b7c59c7c33ebe5358a5815 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 22 Jun 2023 14:42:40 +0800 Subject: [PATCH 924/934] btrfs: fix remaining u32 overflows when left shifting stripe_nr There was regression caused by a97699d1d610 ("btrfs: replace map_lookup->stripe_len by BTRFS_STRIPE_LEN") and supposedly fixed by a7299a18a179 ("btrfs: fix u32 overflows when left shifting stripe_nr"). To avoid code churn the fix was open coding the type casts but unfortunately missed one which was still possible to hit [1]. The missing place was assignment of bioc->full_stripe_logical inside btrfs_map_block(). Fix it by adding a helper that does the safe calculation of the offset and use it everywhere even though it may not be strictly necessary due to already using u64 types. This replaces all remaining "<< BTRFS_STRIPE_LEN_SHIFT" calls. [1] https://lore.kernel.org/linux-btrfs/20230622065438.86402-1-wqu@suse.com/ Fixes: a7299a18a179 ("btrfs: fix u32 overflows when left shifting stripe_nr") Signed-off-by: Qu Wenruo Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 2 +- fs/btrfs/scrub.c | 22 +++++++++++----------- fs/btrfs/tree-checker.c | 4 ++-- fs/btrfs/volumes.c | 29 +++++++++++++++-------------- fs/btrfs/volumes.h | 11 +++++++++++ 5 files changed, 40 insertions(+), 28 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 590b03560265..e97af2e510c3 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1973,7 +1973,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, /* For RAID5/6 adjust to a full IO stripe length */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) - io_stripe_size = nr_data_stripes(map) << BTRFS_STRIPE_LEN_SHIFT; + io_stripe_size = btrfs_stripe_nr_to_offset(nr_data_stripes(map)); buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS); if (!buf) { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index bceaa8c2007e..16c228344cbb 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1304,7 +1304,7 @@ static int get_raid56_logic_offset(u64 physical, int num, u32 stripe_index; u32 rot; - *offset = last_offset + (i << BTRFS_STRIPE_LEN_SHIFT); + *offset = last_offset + btrfs_stripe_nr_to_offset(i); stripe_nr = (u32)(*offset >> BTRFS_STRIPE_LEN_SHIFT) / data_stripes; @@ -1319,7 +1319,7 @@ static int get_raid56_logic_offset(u64 physical, int num, if (stripe_index < num) j++; } - *offset = last_offset + (j << BTRFS_STRIPE_LEN_SHIFT); + *offset = last_offset + btrfs_stripe_nr_to_offset(j); return 1; } @@ -1715,7 +1715,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx) ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state)); scrub_throttle_dev_io(sctx, sctx->stripes[0].dev, - nr_stripes << BTRFS_STRIPE_LEN_SHIFT); + btrfs_stripe_nr_to_offset(nr_stripes)); for (int i = 0; i < nr_stripes; i++) { stripe = &sctx->stripes[i]; scrub_submit_initial_read(sctx, stripe); @@ -1838,7 +1838,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, bool all_empty = true; const int data_stripes = nr_data_stripes(map); unsigned long extent_bitmap = 0; - u64 length = data_stripes << BTRFS_STRIPE_LEN_SHIFT; + u64 length = btrfs_stripe_nr_to_offset(data_stripes); int ret; ASSERT(sctx->raid56_data_stripes); @@ -1853,13 +1853,13 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, data_stripes) >> BTRFS_STRIPE_LEN_SHIFT; stripe_index = (i + rot) % map->num_stripes; physical = map->stripes[stripe_index].physical + - (rot << BTRFS_STRIPE_LEN_SHIFT); + btrfs_stripe_nr_to_offset(rot); scrub_reset_stripe(stripe); set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state); ret = scrub_find_fill_first_stripe(bg, map->stripes[stripe_index].dev, physical, 1, - full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT), + full_stripe_start + btrfs_stripe_nr_to_offset(i), BTRFS_STRIPE_LEN, stripe); if (ret < 0) goto out; @@ -1869,7 +1869,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, */ if (ret > 0) { stripe->logical = full_stripe_start + - (i << BTRFS_STRIPE_LEN_SHIFT); + btrfs_stripe_nr_to_offset(i); stripe->dev = map->stripes[stripe_index].dev; stripe->mirror_num = 1; set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state); @@ -2062,7 +2062,7 @@ static u64 simple_stripe_full_stripe_len(const struct map_lookup *map) ASSERT(map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)); - return (map->num_stripes / map->sub_stripes) << BTRFS_STRIPE_LEN_SHIFT; + return btrfs_stripe_nr_to_offset(map->num_stripes / map->sub_stripes); } /* Get the logical bytenr for the stripe */ @@ -2078,7 +2078,7 @@ static u64 simple_stripe_get_logical(struct map_lookup *map, * (stripe_index / sub_stripes) gives how many data stripes we need to * skip. */ - return ((stripe_index / map->sub_stripes) << BTRFS_STRIPE_LEN_SHIFT) + + return btrfs_stripe_nr_to_offset(stripe_index / map->sub_stripes) + bg->start; } @@ -2204,7 +2204,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, } if (profile & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { ret = scrub_simple_stripe(sctx, bg, map, scrub_dev, stripe_index); - offset = (stripe_index / map->sub_stripes) << BTRFS_STRIPE_LEN_SHIFT; + offset = btrfs_stripe_nr_to_offset(stripe_index / map->sub_stripes); goto out; } @@ -2219,7 +2219,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, /* Initialize @offset in case we need to go to out: label */ get_raid56_logic_offset(physical, stripe_index, map, &offset, NULL); - increment = nr_data_stripes(map) << BTRFS_STRIPE_LEN_SHIFT; + increment = btrfs_stripe_nr_to_offset(nr_data_stripes(map)); /* * Due to the rotation, for RAID56 it's better to iterate each stripe diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index e2b54793bf0c..2138e9fc0564 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -857,10 +857,10 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, * * Thus it should be a good way to catch obvious bitflips. */ - if (unlikely(length >= ((u64)U32_MAX << BTRFS_STRIPE_LEN_SHIFT))) { + if (unlikely(length >= btrfs_stripe_nr_to_offset(U32_MAX))) { chunk_err(leaf, chunk, logical, "chunk length too large: have %llu limit %llu", - length, (u64)U32_MAX << BTRFS_STRIPE_LEN_SHIFT); + length, btrfs_stripe_nr_to_offset(U32_MAX)); return -EUCLEAN; } if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK | diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e60beb14852a..72a838c97534 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5125,7 +5125,7 @@ static void init_alloc_chunk_ctl_policy_regular( /* We don't want a chunk larger than 10% of writable space */ ctl->max_chunk_size = min(mult_perc(fs_devices->total_rw_bytes, 10), ctl->max_chunk_size); - ctl->dev_extent_min = ctl->dev_stripes << BTRFS_STRIPE_LEN_SHIFT; + ctl->dev_extent_min = btrfs_stripe_nr_to_offset(ctl->dev_stripes); } static void init_alloc_chunk_ctl_policy_zoned( @@ -5801,7 +5801,7 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, if (!WARN_ON(IS_ERR(em))) { map = em->map_lookup; if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) - len = nr_data_stripes(map) << BTRFS_STRIPE_LEN_SHIFT; + len = btrfs_stripe_nr_to_offset(nr_data_stripes(map)); free_extent_map(em); } return len; @@ -5975,12 +5975,12 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT; /* stripe_offset is the offset of this block in its stripe */ - stripe_offset = offset - ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT); + stripe_offset = offset - btrfs_stripe_nr_to_offset(stripe_nr); stripe_nr_end = round_up(offset + length, BTRFS_STRIPE_LEN) >> BTRFS_STRIPE_LEN_SHIFT; stripe_cnt = stripe_nr_end - stripe_nr; - stripe_end_offset = ((u64)stripe_nr_end << BTRFS_STRIPE_LEN_SHIFT) - + stripe_end_offset = btrfs_stripe_nr_to_offset(stripe_nr_end) - (offset + length); /* * after this, stripe_nr is the number of stripes on this @@ -6023,12 +6023,12 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, for (i = 0; i < *num_stripes; i++) { stripes[i].physical = map->stripes[stripe_index].physical + - stripe_offset + ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT); + stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr); stripes[i].dev = map->stripes[stripe_index].dev; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { - stripes[i].length = stripes_per_dev << BTRFS_STRIPE_LEN_SHIFT; + stripes[i].length = btrfs_stripe_nr_to_offset(stripes_per_dev); if (i / sub_stripes < remaining_stripes) stripes[i].length += BTRFS_STRIPE_LEN; @@ -6183,8 +6183,8 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op, ASSERT(*stripe_offset < U32_MAX); if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - unsigned long full_stripe_len = nr_data_stripes(map) << - BTRFS_STRIPE_LEN_SHIFT; + unsigned long full_stripe_len = + btrfs_stripe_nr_to_offset(nr_data_stripes(map)); /* * For full stripe start, we use previously calculated @@ -6196,8 +6196,8 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op, * not ensured to be power of 2. */ *full_stripe_start = - (u64)rounddown(*stripe_nr, nr_data_stripes(map)) << - BTRFS_STRIPE_LEN_SHIFT; + btrfs_stripe_nr_to_offset( + rounddown(*stripe_nr, nr_data_stripes(map))); ASSERT(*full_stripe_start + full_stripe_len > offset); ASSERT(*full_stripe_start <= offset); @@ -6223,7 +6223,7 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup * { dst->dev = map->stripes[stripe_index].dev; dst->physical = map->stripes[stripe_index].physical + - stripe_offset + ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT); + stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr); } int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, @@ -6345,7 +6345,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, /* Return the length to the full stripe end */ *length = min(logical + *length, raid56_full_stripe_start + em->start + - (data_stripes << BTRFS_STRIPE_LEN_SHIFT)) - logical; + btrfs_stripe_nr_to_offset(data_stripes)) - + logical; stripe_index = 0; stripe_offset = 0; } else { @@ -6435,7 +6436,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, * modulo, to reduce one modulo call. */ bioc->full_stripe_logical = em->start + - ((stripe_nr * data_stripes) << BTRFS_STRIPE_LEN_SHIFT); + btrfs_stripe_nr_to_offset(stripe_nr * data_stripes); for (i = 0; i < num_stripes; i++) set_io_stripe(&bioc->stripes[i], map, (i + stripe_nr) % num_stripes, @@ -8032,7 +8033,7 @@ static void map_raid56_repair_block(struct btrfs_io_context *bioc, for (i = 0; i < data_stripes; i++) { u64 stripe_start = bioc->full_stripe_logical + - (i << BTRFS_STRIPE_LEN_SHIFT); + btrfs_stripe_nr_to_offset(i); if (logical >= stripe_start && logical < stripe_start + BTRFS_STRIPE_LEN) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index bf47a1a70813..64066d48dce1 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -574,6 +574,17 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) sizeof(struct btrfs_stripe) * (num_stripes - 1); } +/* + * Do the type safe converstion from stripe_nr to offset inside the chunk. + * + * @stripe_nr is u32, with left shift it can overflow u32 for chunks larger + * than 4G. This does the proper type cast to avoid overflow. + */ +static inline u64 btrfs_stripe_nr_to_offset(u32 stripe_nr) +{ + return (u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT; +} + void btrfs_get_bioc(struct btrfs_io_context *bioc); void btrfs_put_bioc(struct btrfs_io_context *bioc); int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, From 2230f9e1171a2e9731422a14d1bbc313c0b719d1 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 15 Jun 2023 15:42:59 +1000 Subject: [PATCH 925/934] KVM: Avoid illegal stage2 mapping on invalid memory slot We run into guest hang in edk2 firmware when KSM is kept as running on the host. The edk2 firmware is waiting for status 0x80 from QEMU's pflash device (TYPE_PFLASH_CFI01) during the operation of sector erasing or buffered write. The status is returned by reading the memory region of the pflash device and the read request should have been forwarded to QEMU and emulated by it. Unfortunately, the read request is covered by an illegal stage2 mapping when the guest hang issue occurs. The read request is completed with QEMU bypassed and wrong status is fetched. The edk2 firmware runs into an infinite loop with the wrong status. The illegal stage2 mapping is populated due to same page sharing by KSM at (C) even the associated memory slot has been marked as invalid at (B) when the memory slot is requested to be deleted. It's notable that the active and inactive memory slots can't be swapped when we're in the middle of kvm_mmu_notifier_change_pte() because kvm->mn_active_invalidate_count is elevated, and kvm_swap_active_memslots() will busy loop until it reaches to zero again. Besides, the swapping from the active to the inactive memory slots is also avoided by holding &kvm->srcu in __kvm_handle_hva_range(), corresponding to synchronize_srcu_expedited() in kvm_swap_active_memslots(). CPU-A CPU-B ----- ----- ioctl(kvm_fd, KVM_SET_USER_MEMORY_REGION) kvm_vm_ioctl_set_memory_region kvm_set_memory_region __kvm_set_memory_region kvm_set_memslot(kvm, old, NULL, KVM_MR_DELETE) kvm_invalidate_memslot kvm_copy_memslot kvm_replace_memslot kvm_swap_active_memslots (A) kvm_arch_flush_shadow_memslot (B) same page sharing by KSM kvm_mmu_notifier_invalidate_range_start : kvm_mmu_notifier_change_pte kvm_handle_hva_range __kvm_handle_hva_range kvm_set_spte_gfn (C) : kvm_mmu_notifier_invalidate_range_end Fix the issue by skipping the invalid memory slot at (C) to avoid the illegal stage2 mapping so that the read request for the pflash's status is forwarded to QEMU and emulated by it. In this way, the correct pflash's status can be returned from QEMU to break the infinite loop in the edk2 firmware. We tried a git-bisect and the first problematic commit is cd4c71835228 (" KVM: arm64: Convert to the gfn-based MMU notifier callbacks"). With this, clean_dcache_guest_page() is called after the memory slots are iterated in kvm_mmu_notifier_change_pte(). clean_dcache_guest_page() is called before the iteration on the memory slots before this commit. This change literally enlarges the racy window between kvm_mmu_notifier_change_pte() and memory slot removal so that we're able to reproduce the issue in a practical test case. However, the issue exists since commit d5d8184d35c9 ("KVM: ARM: Memory virtualization setup"). Cc: stable@vger.kernel.org # v3.9+ Fixes: d5d8184d35c9 ("KVM: ARM: Memory virtualization setup") Reported-by: Shuai Hu Reported-by: Zhenyu Zhang Signed-off-by: Gavin Shan Reviewed-by: David Hildenbrand Reviewed-by: Oliver Upton Reviewed-by: Peter Xu Reviewed-by: Sean Christopherson Reviewed-by: Shaoqin Huang Message-Id: <20230615054259.14911-1-gshan@redhat.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 479802a892d4..65f94f592ff8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -686,6 +686,24 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn return __kvm_handle_hva_range(kvm, &range); } + +static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + /* + * Skipping invalid memslots is correct if and only change_pte() is + * surrounded by invalidate_range_{start,end}(), which is currently + * guaranteed by the primary MMU. If that ever changes, KVM needs to + * unmap the memslot instead of skipping the memslot to ensure that KVM + * doesn't hold references to the old PFN. + */ + WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); + + if (range->slot->flags & KVM_MEMSLOT_INVALID) + return false; + + return kvm_set_spte_gfn(kvm, range); +} + static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address, @@ -707,7 +725,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) return; - kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn); + kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn); } void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, From c0877829ada0406233aee5bd54f6813db79d5f1f Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 16 Jun 2023 08:14:38 -0700 Subject: [PATCH 926/934] dt-bindings: firmware: qcom,scm: Document that SCM can be dma-coherent Trogdor devices use firmware backed by TF-A instead of Qualcomm's normal TZ. On TF-A we end up mapping memory as cacheable. Specifically, you can see in Trogdor's TF-A code [1] in qti_sip_mem_assign() that we call qti_mmap_add_dynamic_region() with MT_RO_DATA. This translates down to MT_MEMORY instead of MT_NON_CACHEABLE or MT_DEVICE. Let's allow devices like trogdor to be described properly by allowing "dma-coherent" in the SCM node. Signed-off-by: Douglas Anderson Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230616081440.v2.1.Ie79b5f0ed45739695c9970df121e11d724909157@changeid Signed-off-by: Bjorn Andersson --- Documentation/devicetree/bindings/firmware/qcom,scm.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/firmware/qcom,scm.yaml b/Documentation/devicetree/bindings/firmware/qcom,scm.yaml index 367d04ad1923..83381f3a1341 100644 --- a/Documentation/devicetree/bindings/firmware/qcom,scm.yaml +++ b/Documentation/devicetree/bindings/firmware/qcom,scm.yaml @@ -71,6 +71,8 @@ properties: minItems: 1 maxItems: 3 + dma-coherent: true + interconnects: maxItems: 1 From 9a5f0b11e49e27f0a01a73c31d05df4a95bea3fa Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 16 Jun 2023 08:14:39 -0700 Subject: [PATCH 927/934] arm64: dts: qcom: sc7180: Mark SCM as dma-coherent for IDP sc7180-idp is, for most intents and purposes, a trogdor device. Specifically, sc7180-idp is designed to run the same style of firmware as trogdor devices. This can be seen from the fact that IDP has the same "Reserved memory changes" in its device tree that trogdor has. Recently it was realized that we need to mark SCM as dma-coherent to match what trogdor's style of firmware (based on TF-A) does [1]. That means we need this dma-coherent tag on IDP as well. Without this, on newer versions of Linux, specifically those with commit 7bd6680b47fa ("Revert "Revert "arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()"""), WiFi will fail to work. At bootup you'll see: qcom_scm firmware:scm: Assign memory protection call failed -22 qcom_rmtfs_mem 94600000.memory: assign memory failed qcom_rmtfs_mem: probe of 94600000.memory failed with error -22 [1] https://lore.kernel.org/r/20230615145253.1.Ic62daa649b47b656b313551d646c4de9a7da4bd4@changeid Fixes: 7bd6680b47fa ("Revert "Revert "arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()""") Fixes: f5ab220d162c ("arm64: dts: qcom: sc7180: Add remoteproc enablers") Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20230616081440.v2.2.I3c17d546d553378aa8a0c68c3fe04bccea7cba17@changeid Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc7180-idp.dts | 5 +++++ arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc7180-idp.dts b/arch/arm64/boot/dts/qcom/sc7180-idp.dts index 9f052270e090..299ef5dc225a 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-idp.dts +++ b/arch/arm64/boot/dts/qcom/sc7180-idp.dts @@ -393,6 +393,11 @@ qcom,spare-regs = <&tcsr_regs_2 0xb3e4>; }; +&scm { + /* TF-A firmware maps memory cached so mark dma-coherent to match. */ + dma-coherent; +}; + &sdhc_1 { status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index f479cab8ab45..a65be760d1a7 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -369,7 +369,7 @@ }; firmware { - scm { + scm: scm { compatible = "qcom,scm-sc7180", "qcom,scm"; }; }; From a54b7fa6b9ab6b4ecb7d9aba6b1a0ce1bcc961e3 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 16 Jun 2023 08:14:40 -0700 Subject: [PATCH 928/934] arm64: dts: qcom: sc7180: Mark SCM as dma-coherent for trogdor Trogdor devices use firmware backed by TF-A instead of Qualcomm's normal TZ. On TF-A we end up mapping memory as cacheable. Specifically, you can see in Trogdor's TF-A code [1] in qti_sip_mem_assign() that we call qti_mmap_add_dynamic_region() with MT_RO_DATA. This translates down to MT_MEMORY instead of MT_NON_CACHEABLE or MT_DEVICE. Apparently Qualcomm's normal TZ implementation maps the memory as non-cacheable. Let's add the "dma-coherent" attribute to the SCM for trogdor. Adding "dma-coherent" like this fixes WiFi on sc7180-trogdor devices. WiFi was broken as of commit 7bd6680b47fa ("Revert "Revert "arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()"""). Specifically at bootup we'd get: qcom_scm firmware:scm: Assign memory protection call failed -22 qcom_rmtfs_mem 94600000.memory: assign memory failed qcom_rmtfs_mem: probe of 94600000.memory failed with error -22 From discussion on the mailing lists [2] and over IRC [3], it was determined that we should always have been tagging the SCM as dma-coherent on trogdor but that the old "invalidate" happened to make things work most of the time. Tagging it properly like this is a much more robust solution. [1] https://chromium.googlesource.com/chromiumos/third_party/arm-trusted-firmware/+/refs/heads/firmware-trogdor-13577.B/plat/qti/common/src/qti_syscall.c [2] https://lore.kernel.org/r/20230614165904.1.I279773c37e2c1ed8fbb622ca6d1397aea0023526@changeid [3] https://oftc.irclog.whitequark.org/linux-msm/2023-06-15 Fixes: 7bd6680b47fa ("Revert "Revert "arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()""") Fixes: 7ec3e67307f8 ("arm64: dts: qcom: sc7180-trogdor: add initial trogdor and lazor dt") Reviewed-by: Konrad Dybcio Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20230616081440.v2.3.Ic62daa649b47b656b313551d646c4de9a7da4bd4@changeid Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index ca6920de7ea8..1472e7f10831 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -892,6 +892,11 @@ hp_i2c: &i2c9 { qcom,spare-regs = <&tcsr_regs_2 0xb3e4>; }; +&scm { + /* TF-A firmware maps memory cached so mark dma-coherent to match. */ + dma-coherent; +}; + &sdhc_1 { status = "okay"; From 7b59e8ae92fe089fed8ff1b23e53442ae5b204c9 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 16 Jun 2023 08:14:41 -0700 Subject: [PATCH 929/934] arm64: dts: qcom: sc7280: Mark SCM as dma-coherent for chrome devices Just like for sc7180 devices using the Chrome bootflow (AKA trogdor and IDP), sc7280 devices using the Chrome bootflow also need their firmware marked dma-coherent. On sc7280 this wasn't causing WiFi to fail to startup, since WiFi works differently there. However, on sc7280 devices we were still getting the message at bootup after commit 7bd6680b47fa ("Revert "Revert "arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()"""): qcom_scm firmware:scm: Assign memory protection call failed -22 qcom_rmtfs_mem 9c900000.memory: assign memory failed qcom_rmtfs_mem: probe of 9c900000.memory failed with error -22 We should mark SCM properly just like we did for trogdor. Fixes: 7bd6680b47fa ("Revert "Revert "arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()""") Fixes: 7a1f4e7f740d ("arm64: dts: qcom: sc7280: Add basic dts/dtsi files for sc7280 soc") Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20230616081440.v2.4.I21dc14a63327bf81c6bb58fe8ed91dbdc9849ee2@changeid Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi | 5 +++++ arch/arm64/boot/dts/qcom/sc7280.dtsi | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi index f562e4d2b655..2e1cd219fc18 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-chrome-common.dtsi @@ -79,6 +79,11 @@ firmware-name = "ath11k/WCN6750/hw1.0/wpss.mdt"; }; +&scm { + /* TF-A firmware maps memory cached so mark dma-coherent to match. */ + dma-coherent; +}; + &wifi { status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 2fd1d3c0eb34..36f0bb9b3cbb 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -656,7 +656,7 @@ }; firmware { - scm { + scm: scm { compatible = "qcom,scm-sc7280", "qcom,scm"; }; }; From bd5c7104d41b62a2ae5d7f11d07e7c5f232eee42 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 Jun 2023 14:11:04 -0600 Subject: [PATCH 930/934] dt-bindings: i2c: opencores: Add missing type for "regstep" "regstep" may be deprecated, but it still needs a type. Fixes: 8ad69f490516 ("dt-bindings: i2c: convert ocores binding to yaml") Signed-off-by: Rob Herring Reviewed-by: Andrew Lunn Reviewed-by: Peter Korsgaard Reviewed-by: Conor Dooley Acked-by: Andi Shyti Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml b/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml index 85d9efb743ee..d9ef86729011 100644 --- a/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml +++ b/Documentation/devicetree/bindings/i2c/opencores,i2c-ocores.yaml @@ -60,6 +60,7 @@ properties: default: 0 regstep: + $ref: /schemas/types.yaml#/definitions/uint32 description: | deprecated, use reg-shift above deprecated: true From cd9489623c29aa2f8cc07088168afb6e0d5ef06d Mon Sep 17 00:00:00 2001 From: Shuai Jiang Date: Tue, 18 Apr 2023 21:56:12 +0800 Subject: [PATCH 931/934] i2c: qup: Add missing unwind goto in qup_i2c_probe() Smatch Warns: drivers/i2c/busses/i2c-qup.c:1784 qup_i2c_probe() warn: missing unwind goto? The goto label "fail_runtime" and "fail" will disable qup->pclk, but here qup->pclk failed to obtain, in order to be consistent, change the direct return to goto label "fail_dma". Fixes: 9cedf3b2f099 ("i2c: qup: Add bam dma capabilities") Signed-off-by: Shuai Jiang Reviewed-by: Dongliang Mu Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang Cc: # v4.6+ --- drivers/i2c/busses/i2c-qup.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index 2e153f2f71b6..78682388e02e 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -1752,16 +1752,21 @@ nodma: if (!clk_freq || clk_freq > I2C_MAX_FAST_MODE_PLUS_FREQ) { dev_err(qup->dev, "clock frequency not supported %d\n", clk_freq); - return -EINVAL; + ret = -EINVAL; + goto fail_dma; } qup->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(qup->base)) - return PTR_ERR(qup->base); + if (IS_ERR(qup->base)) { + ret = PTR_ERR(qup->base); + goto fail_dma; + } qup->irq = platform_get_irq(pdev, 0); - if (qup->irq < 0) - return qup->irq; + if (qup->irq < 0) { + ret = qup->irq; + goto fail_dma; + } if (has_acpi_companion(qup->dev)) { ret = device_property_read_u32(qup->dev, @@ -1775,13 +1780,15 @@ nodma: qup->clk = devm_clk_get(qup->dev, "core"); if (IS_ERR(qup->clk)) { dev_err(qup->dev, "Could not get core clock\n"); - return PTR_ERR(qup->clk); + ret = PTR_ERR(qup->clk); + goto fail_dma; } qup->pclk = devm_clk_get(qup->dev, "iface"); if (IS_ERR(qup->pclk)) { dev_err(qup->dev, "Could not get iface clock\n"); - return PTR_ERR(qup->pclk); + ret = PTR_ERR(qup->pclk); + goto fail_dma; } qup_i2c_enable_clocks(qup); src_clk_freq = clk_get_rate(qup->clk); From e69b9bc170c6d93ee375a5cbfd15f74c0fb59bdd Mon Sep 17 00:00:00 2001 From: Clark Wang Date: Mon, 29 May 2023 16:02:51 +0800 Subject: [PATCH 932/934] i2c: imx-lpi2c: fix type char overflow issue when calculating the clock cycle Claim clkhi and clklo as integer type to avoid possible calculation errors caused by data overflow. Fixes: a55fa9d0e42e ("i2c: imx-lpi2c: add low power i2c bus driver") Signed-off-by: Clark Wang Signed-off-by: Carlos Song Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx-lpi2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 1af0a637d7f1..4d24ceb57ee7 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -201,8 +201,8 @@ static void lpi2c_imx_stop(struct lpi2c_imx_struct *lpi2c_imx) /* CLKLO = I2C_CLK_RATIO * CLKHI, SETHOLD = CLKHI, DATAVD = CLKHI/2 */ static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx) { - u8 prescale, filt, sethold, clkhi, clklo, datavd; - unsigned int clk_rate, clk_cycle; + u8 prescale, filt, sethold, datavd; + unsigned int clk_rate, clk_cycle, clkhi, clklo; enum lpi2c_imx_pincfg pincfg; unsigned int temp; From afa4bb778e48d79e4a642ed41e3b4e0de7489a6c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Jun 2023 12:08:14 -0700 Subject: [PATCH 933/934] workqueue: clean up WORK_* constant types, clarify masking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dave Airlie reports that gcc-13.1.1 has started complaining about some of the workqueue code in 32-bit arm builds: kernel/workqueue.c: In function ‘get_work_pwq’: kernel/workqueue.c:713:24: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] 713 | return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); | ^ [ ... a couple of other cases ... ] and while it's not immediately clear exactly why gcc started complaining about it now, I suspect it's some C23-induced enum type handlign fixup in gcc-13 is the cause. Whatever the reason for starting to complain, the code and data types are indeed disgusting enough that the complaint is warranted. The wq code ends up creating various "helper constants" (like that WORK_STRUCT_WQ_DATA_MASK) using an enum type, which is all kinds of confused. The mask needs to be 'unsigned long', not some unspecified enum type. To make matters worse, the actual "mask and cast to a pointer" is repeated a couple of times, and the cast isn't even always done to the right pointer, but - as the error case above - to a 'void *' with then the compiler finishing the job. That's now how we roll in the kernel. So create the masks using the proper types rather than some ambiguous enumeration, and use a nice helper that actually does the type conversion in one well-defined place. Incidentally, this magically makes clang generate better code. That, admittedly, is really just a sign of clang having been seriously confused before, and cleaning up the typing unconfuses the compiler too. Reported-by: Dave Airlie Link: https://lore.kernel.org/lkml/CAPM=9twNnV4zMCvrPkw3H-ajZOH-01JVh_kDrxdPYQErz8ZTdA@mail.gmail.com/ Cc: Arnd Bergmann Cc: Tejun Heo Cc: Nick Desaulniers Cc: Nathan Chancellor Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 15 ++++++++------- kernel/workqueue.c | 13 ++++++++----- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 3992c994787f..683efe29fa69 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -68,7 +68,6 @@ enum { WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, - WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING), /* * When a work item is off queue, its high bits point to the last @@ -79,12 +78,6 @@ enum { WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, - WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1, - - /* convenience constants */ - WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1, - WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK, - WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT, /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, @@ -94,6 +87,14 @@ enum { WORKER_DESC_LEN = 24, }; +/* Convenience constants - of type 'unsigned long', not 'enum'! */ +#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING) +#define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) +#define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) + +#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1) +#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) + struct work_struct { atomic_long_t data; struct list_head entry; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4666a1a92a31..c913e333cce8 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -705,12 +705,17 @@ static void clear_work_data(struct work_struct *work) set_work_data(work, WORK_STRUCT_NO_POOL, 0); } +static inline struct pool_workqueue *work_struct_pwq(unsigned long data) +{ + return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK); +} + static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) - return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); + return work_struct_pwq(data); else return NULL; } @@ -738,8 +743,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work) assert_rcu_or_pool_mutex(); if (data & WORK_STRUCT_PWQ) - return ((struct pool_workqueue *) - (data & WORK_STRUCT_WQ_DATA_MASK))->pool; + return work_struct_pwq(data)->pool; pool_id = data >> WORK_OFFQ_POOL_SHIFT; if (pool_id == WORK_OFFQ_POOL_NONE) @@ -760,8 +764,7 @@ static int get_work_pool_id(struct work_struct *work) unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) - return ((struct pool_workqueue *) - (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id; + return work_struct_pwq(data)->pool->id; return data >> WORK_OFFQ_POOL_SHIFT; } From 6995e2de6891c724bfeb2db33d7b87775f913ad1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 25 Jun 2023 16:29:58 -0700 Subject: [PATCH 934/934] Linux 6.4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b68b43c19072..e51e4d9174ab 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION*